-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathoptimizer.bash
executable file
·348 lines (307 loc) · 7.9 KB
/
optimizer.bash
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#!/bin/bash
. ./utils/functions.bash
. ./utils/job_pool.sh
cdIntoFirstArg $@
#parrent is locked on wait_for_ressource
#sons receive directly the signal
function gonna_be_killed_parrent(){
echo "all stop received, sleep a little"
# get_childs_pid
# get_childs_pid | xargs -I % kill -s USR2 %
sleep 50s
exit 0
}
function gonna_be_killed(){
if [ $CONTINUE -ne 0 ] ; then
rm $here/running
#cp -r * $here
if [ -e continue.data ] ; then
rm continue.data
fi
ls continue.*.data >& /dev/null
if [ $? -eq 0 ] ; then
tar cf - continue.*.data | gzip - > continue.data
rm continue.*.data
cp continue.data $here/continue.data.tmp >& /dev/null
if [ -e $here/continue.data ] ; then
mv $here/continue.data $here/continue.data.old
fi
mv $here/continue.data.tmp $here/continue.data
fi
elif [ -e $here ] ; then
cd $here
cd ..
rm -rf $setup
fi
echo "I know I must stop $setup"
killall $(basename $COMMAND)
if [ $CPU -ne 1 ] ; then
#unlock father
unlock_wait
fi
exit 0
}
function cpFileFromArgs(){
destination=$1
args=$2
read -a array <<< "$args"
for element in "${array[@]}"
do
if [ -e "$element" ] ; then
cp $element $destination
echo -n "$(basename $element) "
else
echo -n "$element "
fi
done
}
if [ ! -e rules.out ] ; then
echo "Please run parsing_rules first"
exit 1
fi
max_cpu=$(nbcpu)
max_cpu=$(expr $max_cpu - 1)
if [[ $# -eq 2 && $2 -le $max_cpu && $2 -gt 0 ]] ; then
CPU=$2
else
CPU=$max_cpu
fi
export MAX_CPU=$($XML sel -t -m "/xml/max_cpu" -v @value rules.xml)
if [[ ! $MAX_CPU == "" && $MAX_CPU -lt $CPU ]] ; then
CPU=$MAX_CPU
fi
if [ $CPU == 0 ] ; then
CPU=1
fi
echo "Number of thread set to $CPU."
export COMMAND=$($XML sel -t -m "/xml/command" -v @value rules.xml)
export DATA=$($XML sel -t -m "/xml/data" -v @value rules.xml)
export RM_DATA=$($XML sel -t -m "/xml/rm_data" -v @value rules.xml)
export ARGS=$($XML sel -t -m "/xml/args" -v @value rules.xml)
export CONFIG_FILES=$($XML sel -t -m "/xml/ini_file" -v @value rules.xml)
export COMPRESSED_DATA=$($XML sel -t -m "/xml/compressed_data" -v @value rules.xml)
export END_FILE=$($XML sel -t -m "/xml/end_file" -v @value rules.xml)
export CONTINUE=$($XML sel -t -v "count(/xml/continue)" rules.xml)
if [ ! -e $COMMAND ] ; then
echo "$COMMAND doesn't exists"
exit 1
fi
function thread_run(){
dir=$1
setup=$2
shift
shift
parameters="$@"
main_dir=`pwd`
#configuration
cp $CONFIG_FILES $dir/$setup/
hostname >> $dir/$setup/host
tmp_dir=`mktemp -d`
echo -n "$(hostname):$tmp_dir:$OAR_JOB_ID" >> $dir/$setup/host_tmp
i=1
for parameter in $parameters ; do
value=`echo $setup | cut -d'_' -f$i `
#if parameter is file specific
if [ $(echo $parameter | grep -e ':' | wc -l) -eq 1 ] ; then
configf=$(echo $parameter | cut -d ':' -f1)
parameter=$(echo $parameter | cut -d ':' -f2)
sed -i "s/^\($parameter=\)[0-9,_:.a-zA-Z-]*$/\1$value/g" $dir/$setup/$configf
else
for configf in $CONFIG_FILES ; do
sed -i "s/^\($parameter=\)[0-9,_:.a-zA-Z-]*$/\1$value/g" $dir/$setup/$configf
done
fi
i=`expr $i + 1`
done
#run
cd $dir/$setup
if [ $? -ne 0 ] ; then
#directory might not exist anymore because of sshfs synchronization
cd $main_dir
return
fi
here=`pwd`
trap gonna_be_killed USR2
if [ $CONTINUE -ne 0 ] ; then
#cp -r * $tmp_dir/
if [ -e continue.data ] ; then
cp continue.data $tmp_dir/
cd $tmp_dir/
gzip -d -S .data continue.data
if [ $? -ne 0 ] ; then #archive corrupted
if [ -e $here/continue.data.old ] ; then
mv $here/continue.data.old $here/continue.data
cp $here/continue.data .
gzip -d -S .data continue.data
if [ $? -ne 0 ] ; then #old archive also corrupted
rm $here/continue.data
else
#saved with old data
tar -xf continue
rm continue
fi
else
#cannot recover acts as no data before
rm $here/continue.data
rm continue.data
fi
else
tar -xf continue
rm continue
fi
cd $here
fi
fi
cp $CONFIG_FILES $tmp_dir
cp $COMMAND $tmp_dir
if [[ ! $DATA == "" ]] ; then
cp $DATA $tmp_dir
cd $tmp_dir
for data_file in $DATA ; do
if [[ `file $data_file -b | cut -d ' ' -f1` == 'XZ' ]] ; then
tar -xJf $(basename $data_file)
rm $(basename $data_file)
fi
done
cd $here
fi
args=$(cpFileFromArgs $tmp_dir "$ARGS")
if [ $CONTINUE -ne 0 ] ; then
args="$args --continue"
fi
cd $tmp_dir/
executable="./$(basename $COMMAND)"
chmod +x $executable
echo "$executable $args" > executable.trace
echo "$executable $args >& full.trace"
FOLDNAME=$dir $executable $args >& full.trace &
last_pid=$!
echo ":$last_pid" >> $here/host_tmp
if [ $CONTINUE -ne 0 ] ; then
counter=0
while [ 1 ] ; do
sleep 1m &
wait $!
#test that exec is not finished
kill -0 $last_pid >& /dev/null
if [ $? -ne 0 ] ; then
break
fi
#upload each 50 min
if [ $counter -ge 50 ] ; then
counter=0
#cp -r * $here/
if [ -e continue.data ] ; then
rm continue.data
fi
ls continue.*.data >& /dev/null
if [ $? -eq 0 ] ; then
tar cf - continue.*.data | gzip -9 - > continue.data
rm continue.*.data
cp continue.data $here/continue.data.tmp
if [ -e $here/continue.data ] ; then
mv $here/continue.data $here/continue.data.old
fi
mv $here/continue.data.tmp $here/continue.data
fi
fi
counter=`expr $counter + 1`
done
fi
wait $last_pid
result=$?
echo $result >> full.trace
if [ $result -ne 0 ] ; then
echo "FAILED : ($tmp_dir)"
cat full.trace
rm $here/host
rm $here/host_tmp
rm $here/running
for configf in $CONFIG_FILES ; do
rm $here/$configf
done
if [ $CONTINUE -ne 0 ] ; then
rm $here/continue.data
if [ -e $here/continue.data.old ] ; then
#data can be saved
mv $here/continue.data.old $here/continue.data
#don't rmdir
exit 0
fi
fi
rmdir $here
exit 0
elif [ $CONTINUE -ne 0 ] ; then
ls continue.*.data >& /dev/null
if [ $? -eq 0 ] ; then
tar cf - continue.*.data | gzip -9 - > continue.data
rm continue.*.data
cp continue.data $here/continue.data.tmp
if [ -e $here/continue.data ] ; then
mv $here/continue.data $here/continue.data.old
fi
mv $here/continue.data.tmp $here/continue.data
fi
fi
rm $executable
if [[ ! $RM_DATA == "" ]] ; then
rm -rf $RM_DATA
fi
if [[ ! $COMPRESSED_DATA == "" ]] ; then
for cfile in $COMPRESSED_DATA ; do
gzip --best $cfile
mv $cfile.gz $cfile
done
fi
if [ -e $here ] ; then
cd $here
if [[ -e $here && $CONTINUE -ne 0 ]] ; then
cp $CONFIG_FILES $tmp_dir/
cp host $tmp_dir/
cp host_tmp $tmp_dir/
#rm -rf * #might be too dangerous if $here don't exist anymore
fi
mv $tmp_dir/* .
if [ -e running ] ; then
rm running
fi
fi
rmdir $tmp_dir
cd ../..
}
if [ $CPU -ne 1 ]; then
job_pool_init $CPU 0
trap gonna_be_killed_parrent USR2
fi
#full passage of %100
directories=`cat rules.out | shuf`
for dir in $directories ; do
parameters=`head -1 $dir/rules.out`
all_todo=`mktemp`
cat $dir/rules.out | sed -e '1d' | shuf > $all_todo
for setup in $(cat $all_todo) ; do
if [ $CPU -ne 1 ] ; then
wait_free_ressources
fi
if [ ! -e $dir/$setup ] ; then
mkdir $dir/$setup
touch $dir/$setup/running
if [ $CPU -ne 1 ] ; then
job_pool_run thread_run $dir $setup "$parameters"
else
thread_run $dir $setup "$parameters"
fi
elif [[ $CONTINUE -ne 0 && ! -e $dir/$setup/$END_FILE && ! -e $dir/$setup/running ]] ; then
touch $dir/$setup/running
if [ $CPU -ne 1 ] ; then
job_pool_run thread_run $dir $setup "$parameters"
else
thread_run $dir $setup "$parameters"
fi
fi
done
rm $all_todo
done
if [ $CPU -ne 1 ]; then
job_pool_shutdown
fi