-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathabaqus_job_submission
executable file
·164 lines (135 loc) · 6 KB
/
abaqus_job_submission
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/usr/bin/python3 -u
import sys
import os
import re
import shutil
import getpass
import socket
import helpers
from constants import AbaqusConstants, LicenseConstants
def first_run():
cmd_to_run = (AbaqusConstants.BIN_LOCATION +
u'abq' + abaqus_version + u' job=' + job_name +
u' input=' + input_file_name +
u' cpus=' + slurm_ntasks +
u' -verbose 3 standard_parallel=all mp_mode=mpi interactive')
os.system(cmd_to_run)
def restart_join_run():
cmd_to_run = (AbaqusConstants.BIN_LOCATION + 'abq' + abaqus_version +
u' restartjoin originalodb=' + job_name +
u' restartodb=Res_' + job_name + u' history')
os.system(cmd_to_run)
def restart_run(new_job, new_input_file, old_job):
cmd_to_run = (AbaqusConstants.BIN_LOCATION +
u'abq' + abaqus_version + u' job=' + new_job +
u' input=' + new_input_file +
u' oldjob=' + old_job + u' cpus=' + slurm_ntasks +
u' -verbose 3 standard_parallel=all mp_mode=mpi interactive')
print(cmd_to_run)
os.system(cmd_to_run)
def end_run():
file_name = u'Res_' + job_name
res_msg_file_name = file_name + u'.msg'
msg_file_name = job_name + u'.msg'
if os.path.isfile(res_msg_file_name):
msg_file_name = res_msg_file_name
file_name = job_name
if os.path.isfile(msg_file_name):
if next(helpers.find_the_line('the analysis','has been completed',
msg_file_name)):
# if 'SBATCH_GRES' in os.environ:
# helpers.release_tokens(slurm_job_id)
helpers.finalize_job(job_name,submit_dir)
elif next(helpers.find_the_line('fatal','',msg_file_name)):
helpers.copy_dat_file(file_name, submit_dir)
else:
print("No message file has been found. The analysis has probably " +
"never been run!")
sys.exit()
### Script Starts here
if __name__ == "__main__":
#Get slurm env variables, input filename and abq version
submit_dir = os.environ['SLURM_SUBMIT_DIR']
job_name = os.environ['SLURM_JOB_NAME']
input_file_name = job_name + '.inp'
slurm_job_id = os.environ['SLURM_JOB_ID']
slurm_ntasks = os.environ['SLURM_NTASKS']
abaqus_version = os.environ['ABQVER']
partition_name = os.environ['SLURM_JOB_PARTITION']
# Necessary for abaqus jobs
if 'SLURM_GTIDS' in os.environ:
del os.environ['SLURM_GTIDS']
#contact license manager and reserve/release licenses if possible
#if 'SBATCH_GRES' in os.environ:
# helpers.reserve_tokens(slurm_job_id)
if 'OLDJOB' in os.environ:
old_job_name = os.environ['OLDJOB']
old_input_file_name = old_job_name + u'inp'
else:
old_job_name = None
old_input_file_name = None
user_name = getpass.getuser()
job_folder_name = slurm_job_id + '_' + job_name + '_' + user_name
#Set the sbatch job name
#os.environ['SLURM_JOB_NAME'] = job_name
#Determine if checkpointing is necessary
partition_dict = helpers.read_partition(partition_name)
if isinstance(partition_dict, dict):
preempt_method = partition_dict['PreemptMode']
else:
print("The partition preemption method cannot be determined")
sys.exit()
if re.search('requeue', preempt_method, re.IGNORECASE):
print("REQUEUE preemption detected. "
"Abaqus checkpoint/restart will be used")
pass
else: #This means abaqus checkpoint restart is not gonna be used.
if old_job_name:
'''this means job is submitted with a restart option in
from the beginning'''
helpers.create_scratch_and_move(input_file_name,
job_folder_name)
#cd in the scratch folder
os.chdir(AbaqusConstants.SCRATCH_FOLDER + job_folder_name)
restart_run(job_name, input_file_name, old_job_name)
else:
helpers.create_scratch_and_move(input_file_name,
job_folder_name)
#cd in the scratch folder
os.chdir(AbaqusConstants.SCRATCH_FOLDER + job_folder_name)
first_run()
end_run()
# Search and find the checkpointing line in the input file.
#If found save the frequency value.
restart_line = helpers.find_the_line('\*restart', 'frequency',
input_file_name)
restart_line = next(restart_line)
if restart_line:
search_object = re.search(r'\d+', restart_line.split("=",2)[1])
if search_object:
frequency = search_object.group()
else:
raise Exception(('LowPrio queue requires checkpointing enabled. '
'Check {} file and make sure the line '
'starting with \"*Restart,\" is there and '
'settings are correct').format(input_file_name))
helpers.create_scratch_and_move(input_file_name,job_folder_name)
#cd in the scratch folder
os.chdir(AbaqusConstants.SCRATCH_FOLDER + job_folder_name)
# If there the job has been already restarted at least once
#move the Res_ files to original files and join odbs
if os.path.isfile('Res_' + job_name + '.sta'):
print("Job is restarted where it has left of")
# wildcard_operations(os.curdir,'*.lck', operation='remove')
os.remove(os.path.join(os.curdir, job_name + '.lck'))
restart_join_run()
helpers.merge_res_files(job_name)
if os.path.isfile(job_name + '.sta'):
print("Job is restarted where it has left of.(First Restart)")
helpers.create_new_input_file(job_name, input_file_name,
submit_dir, frequency)
restart_run(u'Res_' + job_name, u'Res_' + input_file_name, job_name)
else:
print("First run")
first_run()
end_run()