source: trunk/oarutils/oar-parexec @ 326

Last change on this file since 326 was 147, checked in by g7moreau, 7 years ago
  • Update copyright
File size: 22.7 KB
Line 
1#!/usr/bin/perl
2#
3# 2011/11/27 Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>
4
5use strict;
6
7use Getopt::Long();
8use Pod::Usage;
9use Coro;
10use Coro::Semaphore;
11use Coro::Signal;
12use Coro::Channel;
13use Coro::Handle;
14use IO::File;
15use POSIX qw( WNOHANG WEXITSTATUS );
16use Cwd qw( getcwd );
17
18my $file;
19my $dir;
20my $cmd;
21my $logtrace;
22my $verbose;
23my $job_np         = 1;
24my $nodefile       = $ENV{OAR_NODE_FILE} || '';
25my $masterio;
26my $switchio;
27my $help;
28my $oarsh          = 'oarsh -q -T';
29my $sig_transmit;
30my $sig_checkpoint = 'USR2';
31my $job_launch_brake = 1; # one second time brake
32
33Getopt::Long::GetOptions(
34   'file=s'     => \$file,
35   'dir=s'      => \$dir,
36   'cmd=s'      => \$cmd,
37   'logtrace=s' => \$logtrace,
38   'verbose'    => \$verbose,
39   'help'       => \$help,
40   'oarsh=s'    => \$oarsh,
41   'jobnp=i'    => \$job_np,
42   'nodefile=s' => \$nodefile,
43   'masterio=s' => \$masterio,
44   'switchio'   => \$switchio,
45   'transmit'   => \$sig_transmit,
46   'kill=s'     => \$sig_checkpoint,
47   ) || pod2usage(-verbose => 0);
48pod2usage(-verbose => 2) if $help;
49pod2usage(-verbose => 2) if not (
50 (-e "$file")
51 or (-d "$dir" and $cmd ne '')
52 );
53
54my $oar_version = `oarsub -V | awk '{print \$4}'`;
55chomp $oar_version;
56
57# global time
58my $global_time_atstart    = time;
59my $global_time_total      = 0;
60my $global_time_cumulative = 0;
61
62# re-run, keep trace of job already done
63my %state;
64my $log_h = IO::File->new();
65if (-e "$logtrace") {
66   $log_h->open("< $logtrace")
67      or die "error: can't read log file: $!";
68   while (<$log_h>) {
69      # log version 1
70      $state{$1} = 'start' if m/^start\s+job\s+([^\s]+)\s/;
71      $state{$1} = 'end'   if m/^end\s+job\s+([^\s]+)\s/;
72      # log version 2
73      $state{$1} = 'start' if m/^start\s+subjob\s+([^\s]+)\s/;
74      $state{$1} = 'end'   if m/^end\s+subjob\s+([^\s]+)\s/;
75      ($global_time_total, $global_time_cumulative) = ($1, $2) if m/^global-time\s.*total\s+(\d+)\s+cumulative\s+(\d+)/;
76      }
77   $log_h->close();
78   }
79if ($logtrace) {
80   $log_h->open(">> $logtrace")
81      or die "error: can't append log file $logtrace: $!";
82   $log_h->autoflush;
83   $log_h = unblock $log_h;
84   }
85
86# write log format version
87$log_h->print("log version 2\n") if $logtrace;
88print("log version 2\n") if $verbose;
89
90# job to run
91my @job = ();
92if (-e "$file") {
93   my $job_num = 0;
94   open(JOB_LIST, '<', "$file") or die "error: can't open job file $file: $!";
95   while (my $job_cmd = <JOB_LIST>) {
96      chomp $job_cmd;
97      next if $job_cmd =~ m/^#/;
98      next if $job_cmd =~ m/^\s*$/;
99      $job_num++;
100      my ($job_name) = $job_cmd =~ m/#.*?\bname=(\S+?)\b/i;
101      $job_name ||= $job_num;
102      push @job, {
103         name   => $job_name,
104         cmd    => "$job_cmd",
105         num    => $job_num,
106         };
107      }
108   close JOB_LIST;
109   }
110else {
111   my $job_num = 0;
112   opendir(DIR, $dir) or die "error: can't open folder $dir: $!";
113   while (my $item = readdir(DIR)) {
114      next if $item =~ m/^\./;
115      next if $item =~ m/:/;
116      next if $item =~ m/\.old$/;
117      next if $item =~ m/\.sav$/;
118      next if $item =~ m/\.bak$/;
119      next if $item =~ m/\.no$/;
120      next unless (-d "$dir/$item");
121      $job_num++;
122      push @job, {
123         name   => $item,
124         cmd    => "cd $dir/$item/; $cmd",
125         num    => $job_num,
126         };
127      }
128   closedir DIR;
129   }
130
131# assume unique job name
132{
133   my %seen = ();
134   my $count_unique_name = grep { ! $seen{ $_->{name} }++ } @job;
135   if ($count_unique_name != $#job) {
136      $_->{name} = $_->{num} for @job;
137      }
138   }
139
140# ressources available
141my @ressources = ();
142open(NODE_FILE, '<', "$nodefile")
143   or die "can't open $nodefile: $!";
144while (<NODE_FILE>) {
145   chomp;
146   next if m/^#/;
147   next if m/^\s*$/;
148   push @ressources, $_;
149   }
150close NODE_FILE;
151
152my $ressource_size = scalar(@ressources);
153die "error: not enought ressources jobnp $job_np > ressources $ressource_size"
154   if $job_np > $ressource_size;
155
156my $current_dir = getcwd();
157
158my $stderr = $ENV{OAR_STDERR} || '';
159$stderr =~ s/\.stderr$//;
160$stderr = $masterio if $masterio;
161my $stdout = $ENV{OAR_STDOUT} || '';
162$stdout =~ s/\.stdout$//;
163$stdout = $masterio if $masterio;
164
165my $finished = new Coro::Signal;
166my $job_todo = new Coro::Semaphore 0;
167my $job_name_maxlen;
168for (@job) {
169   $job_todo->up;
170   $job_name_maxlen = length($_->{name}) if length($_->{name}) > $job_name_maxlen;
171   }
172
173# slice of ressources for parallel job
174my $ressources = new Coro::Channel;
175for my $slot (1 .. int($ressource_size / $job_np)) {
176   $ressources->put(
177      join(',',
178         @ressources[ (($slot - 1) * $job_np) .. (($slot * $job_np) - 1) ])
179         );
180   }
181
182my %scheduled = ();
183
184# OAR checkpoint and default signal SIGUSR2
185my $oar_checkpoint = new Coro::Semaphore 0;
186my $notify         = new Coro::Signal;
187$SIG{$sig_checkpoint} = sub {
188   print "warning: receive checkpoint at "
189      . time
190      . ", no new job, just finishing running job\n"
191      if $verbose;
192   $oar_checkpoint->up();
193   $notify->send if $sig_transmit;
194   };
195
196# asynchrone notify job
197async {
198   while () {
199      $notify->wait;
200
201      for my $job_pid (keys %scheduled) {
202         my $job_name     = $scheduled{$job_pid}->{name};
203         my $job_pidfile  = $scheduled{$job_pid}->{pidfile};
204         my $node_connect = $scheduled{$job_pid}->{node_connect};
205
206         my $fh = IO::File->new();
207         $fh->open("| $oarsh $node_connect >/dev/null 2>&1")
208            or die "error: can't notify subjob: $!";
209
210         $fh->autoflush;
211         $fh = unblock $fh;
212
213         $fh->print("kill -$sig_checkpoint \$(cat $job_pidfile)\n");
214         $fh->print("exit\n");
215
216         print "warning: transmit signal $sig_checkpoint"
217            . " to job $job_name on node $node_connect.\n"
218            if $verbose;
219
220         close $fh;
221         cede;
222         }
223      }
224   }
225
226# asynchrone start job block
227async {
228   my $timer;
229   JOB:
230   for my $job (@job) {
231      my $job_name   = $job->{name};
232      my $job_cmd    = $job->{cmd};
233
234      # job has been already run ?
235      if (exists $state{$job_name}) {
236         if ($state{$job_name} eq 'start') {
237            print "warning: job $job_name was not clearly finished, relaunching...\n"
238               if $verbose;
239            }
240         elsif ($state{$job_name} eq 'end') {
241            delete $state{$job_name}; # free memory
242            $job_todo->down;
243            print "warning: job $job_name already run\n" if $verbose;
244            cede;
245            next JOB;
246            }
247         }
248
249      # wait to not re-launch oarstat to fast
250      # equivalent to sleep $job_launch_brake
251      $timer = AE::now + $job_launch_brake;
252      while ( AE::now < $timer ) {
253         # force update of AE time
254         AE::now_update;
255         cede;
256         }
257
258      # take job ressource
259      my $job_ressource = $ressources->get;
260
261      # no more launch job when OAR checkpointing
262      last JOB if $oar_checkpoint->count() > 0;
263
264      my ($node_connect) = split ',', $job_ressource;
265      my $fh = IO::File->new();
266      my $job_pid = $fh->open("| $oarsh $node_connect >/dev/null 2>&1")
267         or die "error: can't start subjob: $!";
268
269      $fh->autoflush;
270      $fh = unblock $fh;
271
272      my $begin_at = time;
273      #my $msg = sprintf "start job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
274      my $msg = sprintf "start   subjob %${job_name_maxlen}s pid %5i at %s oarjob %i onnode %s\n",
275         $job_name, $job_pid, $begin_at, $ENV{OAR_JOB_ID}, $job_ressource;
276      $log_h->print($msg) if $logtrace;
277      print($msg) if $verbose;
278
279      my ($job_stdout, $job_stderr);
280      $job_stdout = ">  $stdout-$job_name.stdout" if $stdout ne '' and $switchio;
281      $job_stderr = "2> $stderr-$job_name.stderr" if $stderr ne '' and $switchio;
282
283      my $job_nodefile   = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name";
284      my $job_pidfile    = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.pid";
285      my $job_statusfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.status";
286
287      $scheduled{$job_pid} = {
288         fh           => $fh,
289         node_connect => $node_connect,
290         ressource    => $job_ressource,
291         name         => $job_name,
292         pidfile      => $job_pidfile,
293         begin_at     => $begin_at,
294         };
295
296      # set job environment, run it and clean
297      if ($job_np > 1) {
298         $fh->print("printf \""
299               . join('\n', split(',', $job_ressource,))
300               . "\" > $job_nodefile\n");
301         $fh->print("OAR_NODE_FILE=$job_nodefile\n");
302         $fh->print("OAR_NP=$job_np\n");
303         $fh->print("export OAR_NODE_FILE\n");
304         $fh->print("export OAR_NP\n");
305         $fh->print("unset OAR_MSG_NODEFILE\n");
306         }
307
308      $fh->print("cd $current_dir\n");
309
310      if ($sig_transmit) {
311         $fh->print("trap 'jobs -p|xargs -r ps -o pid --no-headers --ppid|xargs -r kill -$sig_checkpoint' $sig_checkpoint\n");
312         $fh->print("echo \$\$ > $job_pidfile\n");
313         }
314
315      $fh->print("echo 0 > $job_statusfile\n");
316      $fh->print("(\n");
317      $fh->print("$job_cmd\n");
318      $fh->print(") $job_stdout $job_stderr || echo \$? > $job_statusfile \&\n");
319      $fh->print("while [ \$(jobs -p | wc -l) -gt 0 ]\n");
320      $fh->print("do\n");
321      $fh->print("   wait\n");
322      $fh->print("done\n");
323
324      $fh->print("OAR_SUBJOB_RETCODE=\$(cat $job_statusfile)\n");
325      $fh->print("rm -f $job_statusfile\n");
326      $fh->print("rm -f $job_pidfile\n")  if $sig_transmit;
327      $fh->print("rm -f $job_nodefile\n") if $job_np > 1;
328      $fh->print("exit \$OAR_SUBJOB_RETCODE\n");
329      cede;
330      }
331   }
332
333# asynchrone end job block
334async {
335   while () {
336      for my $job_pid (keys %scheduled) {
337         # non blocking PID test
338         if (waitpid($job_pid, WNOHANG)) {
339            # get return status code
340            my $job_retcode0 = $? >> 8;
341            #print "ERREUR0 $job_pid $job_retcode0\n" if $job_retcode0;
342
343            # job time
344            my $end_at = time;
345            my $duration = $end_at - $scheduled{$job_pid}->{begin_at};
346            $global_time_cumulative += $duration;
347
348            #my $msg = sprintf "end   job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
349            my $msg = sprintf "end     subjob %${job_name_maxlen}s pid %5i at %s oarjob %i onnode %s duration %i status %i\n",
350               $scheduled{$job_pid}->{name},
351               $job_pid, $end_at, $ENV{OAR_JOB_ID}, $scheduled{$job_pid}->{ressource},
352               $duration, $job_retcode0;
353
354            # Job error
355            $msg =~ s/^end\s+subjob/error   subjob/
356               if $job_retcode0 > 0 and $job_retcode0 != 99;
357
358            # Job non finish, just suspend if received checkpoint signal
359            $msg =~ s/^end\s+subjob/suspend subjob/
360               if $sig_transmit and $oar_checkpoint->count() > 0;
361
362            $log_h->print($msg) if $logtrace;
363            print($msg) if $verbose;
364            close $scheduled{$job_pid}->{fh};
365            # leave ressources for another job
366            $ressources->put($scheduled{$job_pid}->{ressource});
367            $job_todo->down;
368            delete $scheduled{$job_pid};
369            }
370         cede;
371         }
372
373      # checkpointing ! just finishing running job and quit
374      $finished->send if $oar_checkpoint->count() > 0 and scalar(keys(%scheduled)) == 0;
375
376      $finished->send if $job_todo->count() == 0;
377      cede;
378      }
379   }
380
381cede;
382
383# all job have been done
384$finished->wait;
385
386# global time
387$global_time_total += (time - $global_time_atstart);
388$log_h->print("global-time total $global_time_total cumulative $global_time_cumulative\n") if $logtrace;
389print("global-time total $global_time_total cumulative $global_time_cumulative\n") if $verbose;
390
391# close log trace file
392$log_h->close() if $logtrace;
393
394exit 99 if (($oar_checkpoint->count() > 0) and ($oar_version !~ m/^2\.4/));
395
396
397__END__
398
399=head1 NAME
400
401oar-parexec - parallel execution of many small short or long job
402
403=head1 SYNOPSIS
404
405 oar-parexec --file filecommand \
406    [--logtrace tracefile] [--verbose] \
407    [--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
408    [--switchio] [--masterio basefileio] \
409    [--kill signal] [--transmit]
410
411 oar-parexec --dir foldertoiterate --cmd commandtolaunch \
412    [--logtrace tracefile] [--verbose] \
413    [--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
414    [--switchio] [--masterio basefileio] \
415    [--kill signal] [--transmit]
416
417 oar-parexec --help
418
419=head1 DESCRIPTION
420
421C<oar-parexec> can execute lot of small short or long job in parallel inside a cluster.
422Number of parallel job at one time cannot exceed the number of core define in the node file.
423C<oar-parexec> is easier to use inside an OAR job environment
424which define automatically these strategics parameters...
425However, it can be used outside OAR.
426
427Option C<--file> or C<--dir> and C<--cmd> are the only mandatory parameters.
428
429Small job will be launch in the same folder as the master job.
430Two environment variable are defined for each small job
431and only in case of parallel small job (option C<--jobnp> > 1).
432
433 OAR_NODE_FILE - file that list node for parallel computing
434 OAR_NP        - number of processor affected
435
436The file define by OAR_NODE_FILE is created  in /tmp
437on the node before launching the small job
438and this file will be delete after job complete.
439C<oar-parexec> is a simple script,
440OAR_NODE_FILE will not be deleted in case of crash of the master job.
441
442OAR define other variable that are equivalent to OAR_NODE_FILE:
443OAR_NODEFILE, OAR_FILE_NODES, OAR_RESOURCE_FILE...
444You can use in your script the OAR original file ressources
445by using these variable if you need it.
446
447When use with long job,
448activate option C<--tranmit> to send OAR checkpoint signal
449and suspend small job before the walltime cut!
450
451=head1 OPTIONS
452
453=over 12
454
455=item B<-f|--file filecommand>
456
457File name which content job list.
458For the JOB_NAME definition,
459the first valid job in the list will have the number 1 and so on...
460
461It's possible to fix the name inside a comment on the job line.
462For example:
463
464 $HOME/test/subjob1.sh # name=subjob1
465
466The key C<name> is case insensitive,
467the associated value cannot have a space...
468
469The command can be any shell command.
470It's possible to change folder,
471or launch an asynchrone job in parallel,
472but one command must block and not be launch in asynchrone (with & or coproc).
473Example :
474
475 cd ./test; ./subjob1.sh
476 cd ./test; nice -18 du -sk ./ & ./subjob1.sh
477
478Commands C<du -sk ./>  and C<./subjob1.sh> will be done in parallel on the same ressource...
479It's better if C<du -sk ./> is faster than C<./subjob1.sh> !
480Do not abuse of that!
481
482=item B<-d|--dir foldertoiterate>
483
484Command C<--cmd> will be launch in all sub-folder of this master folder.
485Files in this folder will be ignored.
486Sub-folder name which begin with F<.>
487or finish with F<.old>, F<.sav>, F<.bak>, F<.no> will either be ignored...
488
489The JOB_NAME is simply the Sub-folder name.
490
491=item B<-c|--cmd commandtolaunch>
492
493Command (and argument to it) that will be launch in all sub-folder
494parameter folfer C<--dir>.
495Like for option C<--file>, command can be any valid shell command
496but one must block.
497
498=item B<-l|--logtrace tracefile>
499
500File which log and trace running job.
501In case of running the same master command (after crash for example),
502only job that are not mark as done will be run again.
503Be careful, job mark as running (start but not finish) will be run again.
504Tracing is base on the JOB_NAME between multiple run.
505
506This option is very usefull in case of crash
507but also for checkpointing and idempotent OAR job.
508
509=item B<-v|--verbose>
510
511=item B<-j|--jobnp integer>
512
513Number of processor to allocated for each small job.
5141 by default.
515
516=item B<-n|--nodefile filenode>
517
518File name that list all the node where job could be launch.
519By defaut, it's define automatically by OAR via
520environment variable C<OAR_NODE_FILE>.
521
522For example, if you want to use 6 core on your cluster node,
523you need to put 6 times the hostname node in this file,
524one per line...
525It's a very common file in MPI process !
526
527=item B<-o|-oarsh command>
528
529Command use to launch a shell on a node.
530By default
531
532 oarsh -q -T
533
534Change it to C<ssh> if you are not using an OAR cluster...
535
536=item B<-s|--switchio>
537
538Each small job will have it's own output STDOUT and STDERR
539base on master OAR job with C<JOB_NAME> inside
540(or base on C<basefileio> if option C<masterio>).
541Example :
542
543 OAR.151524.stdout -> OAR.151524-JOB_NAME.stdout
544
545where 151524 here is the master C<OAR_JOB_ID>
546and C<JOB_NAME> is the small job name.
547
548=item B<-m|--masterio basefileio>
549
550The C<basefileio> will be use in place of environment variable
551C<OAR_STDOUT> and C<OAR_STDERR> (without extension) to build the base name of the small job standart output
552(only use when option C<switchio> is activated).
553
554=item B<-k|--kill signal>
555
556Signal to listen and make a clean stop of the current C<oar-parexec> process.
557By default, use USR2 signal (see C<kill -l> for a list of possible signal).
558
559=item B<-t|--transmit>
560
561Resend catch signal to sub-job when receiving it.
562By default, no signal is transmis to child process.
563
564It's only valuable if use for long sub-job than can
565in return make themselves a clean restart.
566
567
568=item B<-h|--help>
569
570=back
571
572
573=head1 EXAMPLE
574
575=head2 Simple list of sequential job
576
577Content for the job file command (option C<--file>) could have:
578
579 - empty line
580 - comment line begin with #
581 - valid shell command (can containt comment)
582
583Example where F<$HOME/test/subjob1.sh> is a shell script (executable).
584
585 $HOME/test/subjob01.sh  # name=subjob01
586 $HOME/test/subjob02.sh  # name=subjob02
587 $HOME/test/subjob03.sh  # name=subjob03
588 $HOME/test/subjob04.sh  # name=subjob04
589 ...
590 $HOME/test/subjob38.sh  # name=subjob38
591 $HOME/test/subjob39.sh  # name=subjob39
592 $HOME/test/subjob40.sh  # name=subjob40
593
594These jobs could be launch by:
595
596 oarsub -n test -l /core=6,walltime=04:00:00 \
597   "oar-parexec -f ./subjob.list.txt"
598
599=head2 Folder job
600
601In a folder F<subjob.d>, create sub-folder with your data inside : F<test1>, <test2>...
602The same command will be executed in every sub-folder.
603C<oar-parexec> change the current directory to the sub-folder before launching it.
604
605A very simple job could be:
606
607 oarsub -n test -l /core=6,walltime=04:00:00 \
608   "oar-parexec -d ./subjob.d -c 'sleep 10; env'"
609
610The command C<env> will be excuted in all folder F<test1>, F<test2>... after a 10s pause.
611
612Sometime, it's simpler to use file list command,
613sometime, jobs by folder with the same command run is more relevant.
614
615=head2 Parallel job
616
617You need to put the number of core each small job need with option C<--jobnp>.
618If your job is build on OpenMP or MPI,
619you can use OAR_NP and OAR_NODE_FILE variables to configure them.
620On OAR cluster, you need to use C<oarsh> or a wrapper like C<oar-envsh>
621for connexion between node instead of C<ssh>.
622
623Example with parallel small job on 2 core:
624
625 oarsub -n test -l /core=6,walltime=04:00:00 \
626   "oar-parexec -j 2 -f ./subjob.list.txt"
627
628=head2 Tracing and master crash
629
630If the master node crash after hours of calculus, everything is lost ?
631No, with option C<--logtrace>,
632it's possible to remember older result
633and not re-run these job the second and next time.
634
635 oarsub -n test -l /core=6,walltime=04:00:00 \
636   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
637
638After a crash or an C<oardel> command,
639you can then re-run the same command that will end to execute the jobs in the list
640
641 oarsub -n test -l /core=6,walltime=04:00:00 \
642   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
643
644C<logtrace> file are just plain file.
645We use the extension '.log' because these files are automatically
646eliminate from our backup system!
647
648=head2 Checkpointing and Idempotent
649
650C<oar-parexec> is compatible with the OAR checkpointing.
651If you have 2000 small jobs that need 55h to be done on 6 cores,
652you can cut this in small parts.
653
654For this example, we suppose that each small job need about 10min...
655So, we send a checkpoint 12min before the end of the process
656to let C<oar-parexec> finish the jobs started.
657After being checkpointed, C<oar-parexec> do not start any new small job.
658
659 oarsub -t idempotent -n test \
660   -l /core=6,walltime=04:00:00 \
661   --checkpoint 720 \
662   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
663
664After 3h48min, the OAR job will begin to stop launching new small job.
665When all running small job are finished, it's exit.
666But as the OAR job is type C<idempotent>,
667OAR will re-submit it as long as all small job are not executed...
668
669This way, we let other users a chance to use the cluster!
670
671In this last exemple, we use moldable OAR job with idempotent
672to reserve many core for a small time or a few cores for a long time:
673
674 oarsub -t idempotent -n test \
675   -l /core=50,walltime=01:05:00 \
676   -l /core=6,walltime=04:00:00 \
677   --checkpoint 720 \
678   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
679
680=head2 Signal, recurse and long job
681
682By default, OAR use signal USR2 for checkpointing.
683It's possible to change this with option C<--kill>.
684
685When use with long small job, checkpointing could be too long...
686More than walltime!
687The option C<--transmit> could be use to checkpoint small job!
688These long small job will then stop cleanly and will be restarted next time.
689
690In the C<logtrace> file, small job will have the status suspend.
691They will be launch with the same command line at the next OAR run.
692
693Example: if you have 50 small jobs that each need 72h to be done on 1 cores,
694you can cut this in 24h parts.
695
696For this example, we suppose that each long job loop need about 20min...
697So, we send a checkpoint 30min before the end of the process
698to let C<oar-parexec> suspend the jobs started.
699After being checkpointed, C<oar-parexec> do not start any new small job.
700
701 oarsub -t idempotent -n test \
702   -l /core=6,walltime=24:00:00 \
703   --checkpoint 1800 \
704   --transmit \
705   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
706
707After 23h30min, the OAR job will begin to stop launching new small job.
708When all running small job are suspend, it's exit.
709But as the OAR job is type C<idempotent>,
710OAR will re-submit it as long as all small job are not finished...
711
712=head2 Log format
713
714=over
715
716=item B<Version 2>
717
718 log version 2
719 start   subjob  1 pid 101468 at 1450482228 oarjob 71725 onnode cl7n001
720 end     subjob  1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
721 error   subjob  1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
722 suspend subjob  1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
723 global-time total 555 cumulative 44444
724
725=item B<Version 1>
726
727 log version 1
728 start job 1 / 101468 at 1450482228 oar job 71725 on node cl7n001
729 end   job 1 / 101468 at 1450482556 oar job 71725 on node cl7n001
730 end   job 1 / 101468 at 1450482556 oar job 71725 on node cl7n001
731 error:retcode job 1 / 101468 at 1450482556 oar job 71725 on node cl7n00
732
733=back
734
735
736=head1 SEE ALSO
737
738oar-dispatch, mpilauncher,
739orsh, oar-envsh, ssh
740
741Web site L<http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/OarUtils>
742
743
744=head1 AUTHORS
745
746Written by Gabriel Moreau, Grenoble - France
747
748
749=head1 LICENSE AND COPYRIGHT
750
751Licence: GNU GPL version 2 or later and Perl equivalent
752
753Copyright (C) 2011-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.