-
Notifications
You must be signed in to change notification settings - Fork 0
Install and Run on DeltaAI
Cameron Smith edited this page Oct 28, 2025
·
1 revision
mkdir mlReconnection
cd mlReconnection
salloc -N 1 -n 30 -t120 --account=bfim-dtai-gh --gpus-per-node=1 --partition=ghx4
cat << EOF > installPgkyl.sh
module use /sw/user/modules/python
module load python/miniforge3_pytorch
source pgkyl/bin/activate
git clone https://github.com/ammarhakim/postgkyl.git
cd postgkyl/
pip install -e .[adios,test]
EOF
chmod +x installPgkyl.sh
./installPgkyl.sh
git clone -b cws/scorec [email protected]:scorec/pgkylFrontEnd.git
cat << EOF > envPython.sh
root=$PWD/mlReconnection
module use /sw/user/modules/python
module load python/miniforge3_pytorch
export PYTHONPATH=$PYTHONPATH:$root/pgkylFrontEnd
source $root/pgkyl/bin/activate
EOF
do this every time you begin work
source envPython.sh
pip install --upgrade pip
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126
git clone [email protected]:SCOREC/reconClassifier
data=/work/nvme/bfim/cwsmith/mlReconnection2025/1024Res_v0
python reconClassifier/XPointMLTest.py \
--paramFile=$data/pkpm_2d_turb_p2-params.txt \
--xptCacheDir=$data/cache \
--trainFrameFirst 1 --trainFrameLast 2 --validationFrameFirst 2 --validationFrameLast 3 --epochs 2 --minTrainingLoss 0
#copy data to ramdisk - requires ~10GB
nvme=/work/nvme/bfim/cwsmith/mlReconnection2025/1024Res_v0
ramdisk=/dev/shm/`whoami`
mkdir -p $ramdisk
time cp -r $nvme/cache04082025 $ramdisk/.
time cp $nvme/pkpm_2d_turb_p2-params.txt $ramdisk/.
data=$ramdisk
export OMP_NUM_THREADS=10
export OMP_SCHEDULE=STATIC
export OMP_PROC_BIND=CLOSE
python -u reconClassifier/XPointMLTest.py \
--paramFile=$data/pkpm_2d_turb_p2-params.txt \
--xptCacheDir=$data/cache04082025 \
--use-amp \
--benchmark \
--epochs 5 \
--batchSize 16 \
--learningRate 1e-5