-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsubmit_local
executable file
·29 lines (22 loc) · 975 Bytes
/
submit_local
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash
JOBNAME="${1/%.py/}"
LOGNAME="${2}"
MODELNAME="${3}"
MACHINENAME="${4}"
echo "$JOBNAME"
if [ -z "$1" ]; then
echo "Useage: submit_local PYTHON_FILE_NAME_WITH_DOTPY LOGFILE_NAME MODEL_NAME MACHINENAME"
exit
fi
if [ ! -f $JOBNAME.py ]; then
echo "Distributed Python input file $JOBNAME.py does not exist!"
exit -1
fi
if [ ! -f $JOBNAME.log ]; then
echo "Distributed Python output file $JOBNAME.log does not exist!"
touch $JOBNAME.log
fi
qsub -q $MACHINENAME -N $JOBNAME -j y -o ~/Jobs << EOF
cd "$PWD"
/Scr/hyunpark/anaconda3/envs/ai/bin/torchrun --nnodes=1 --nproc_per_node=gpu --max_restarts 0 --module main --which_mode train --name convnext_model_indiv_2 --backbone convnext --filename dppc.pickle --multiprocessing --optimizer torch_adam --gpu --epoches 1000 --ce_re_ratio 1 0.1 --load_ckpt_path /Scr/hyunpark-new/Protein-TDA/saved --save_dir /Scr/hyunpark-new/Protein-TDA/pickled_indiv --truncated --batch_size 512 --resume
EOF