source: trunk/oarutils/oar-parexec @ 124

Last change on this file since 124 was 124, checked in by g7moreau, 7 years ago
  • Error in global time cumulative
File size: 22.6 KB
RevLine 
[13]1#!/usr/bin/perl
2#
[118]3# 2011/11/27 Gabriel Moreau
[13]4
5use strict;
6
7use Getopt::Long();
8use Pod::Usage;
9use Coro;
10use Coro::Semaphore;
11use Coro::Signal;
12use Coro::Channel;
13use Coro::Handle;
14use IO::File;
15use POSIX qw( WNOHANG WEXITSTATUS );
[32]16use Cwd qw( getcwd );
[13]17
[75]18my $file;
19my $dir;
20my $cmd;
21my $logtrace;
[13]22my $verbose;
[82]23my $job_np         = 1;
24my $nodefile       = $ENV{OAR_NODE_FILE} || '';
[32]25my $masterio;
[13]26my $switchio;
27my $help;
[82]28my $oarsh          = 'oarsh -q -T';
[75]29my $sig_transmit;
30my $sig_checkpoint = 'USR2';
[113]31my $job_launch_brake = 1; # one second time brake
[13]32
33Getopt::Long::GetOptions(
[47]34   'file=s'     => \$file,
[45]35   'dir=s'      => \$dir,
36   'cmd=s'      => \$cmd,
[43]37   'logtrace=s' => \$logtrace,
[32]38   'verbose'    => \$verbose,
39   'help'       => \$help,
40   'oarsh=s'    => \$oarsh,
[34]41   'jobnp=i'    => \$job_np,
[32]42   'nodefile=s' => \$nodefile,
43   'masterio=s' => \$masterio,
44   'switchio'   => \$switchio,
[75]45   'transmit'   => \$sig_transmit,
46   'kill=s'     => \$sig_checkpoint,
[41]47   ) || pod2usage(-verbose => 0);
48pod2usage(-verbose => 2) if $help;
[45]49pod2usage(-verbose => 2) if not (
[47]50 (-e "$file")
[45]51 or (-d "$dir" and $cmd ne '')
52 );
[13]53
[116]54my $oar_version = `oarsub -V | awk '{print \$4}'`;
55chomp $oar_version;
56
[122]57# global time
58my $global_time_atstart    = time;
59my $global_time_total      = 0;
60my $global_time_cumulative = 0;
61
[43]62# re-run, keep trace of job already done
[38]63my %state;
64my $log_h = IO::File->new();
[45]65if (-e "$logtrace") {
[43]66   $log_h->open("< $logtrace")
67      or die "error: can't read log file: $!";
[38]68   while (<$log_h>) {
[122]69      # log version 1
[45]70      $state{$1} = 'start' if m/^start\s+job\s+([^\s]+)\s/;
71      $state{$1} = 'end'   if m/^end\s+job\s+([^\s]+)\s/;
[122]72      # log version 2
73      $state{$1} = 'start' if m/^start\s+subjob\s+([^\s]+)\s/;
74      $state{$1} = 'end'   if m/^end\s+subjob\s+([^\s]+)\s/;
75      ($global_time_total, $global_time_cumulative) = ($1, $2) if m/^global-time\s.*total\s+(\d+)\s+cumulative\s+(\d+)/;
[41]76      }
[38]77   $log_h->close();
78   }
[43]79if ($logtrace) {
80   $log_h->open(">> $logtrace")
81      or die "error: can't append log file $logtrace: $!";
[40]82   $log_h->autoflush;
[38]83   $log_h = unblock $log_h;
84   }
85
[121]86# write log format version
[123]87$log_h->print("log version 2\n") if $logtrace;
88print("log version 2\n") if $verbose;
[121]89
[43]90# job to run
[13]91my @job = ();
[47]92if (-e "$file") {
[45]93   my $job_num = 0;
[47]94   open(JOB_LIST, '<', "$file") or die "error: can't open job file $file: $!";
[77]95   while (my $job_cmd = <JOB_LIST>) {
96      chomp $job_cmd;
97      next if $job_cmd =~ m/^#/;
98      next if $job_cmd =~ m/^\s*$/;
[45]99      $job_num++;
[77]100      my ($job_name) = $job_cmd =~ m/#.*?\bname=(\S+?)\b/i;
101      $job_name ||= $job_num;
[88]102      push @job, {
103         name   => $job_name,
104         cmd    => "$job_cmd",
105         num    => $job_num,
106         };
[45]107      }
108   close JOB_LIST;
[13]109   }
[45]110else {
[88]111   my $job_num = 0;
[45]112   opendir(DIR, $dir) or die "error: can't open folder $dir: $!";
113   while (my $item = readdir(DIR)) {
114      next if $item =~ m/^\./;
115      next if $item =~ m/:/;
116      next if $item =~ m/\.old$/;
117      next if $item =~ m/\.sav$/;
118      next if $item =~ m/\.bak$/;
119      next if $item =~ m/\.no$/;
120      next unless (-d "$dir/$item");
[88]121      $job_num++;
122      push @job, {
123         name   => $item,
124         cmd    => "cd $dir/$item/; $cmd",
125         num    => $job_num,
126         };
[45]127      }
128   closedir DIR;
129   }
[13]130
[88]131# assume unique job name
132{
133   my %seen = ();
134   my $count_unique_name = grep { ! $seen{ $_->{name} }++ } @job;
135   if ($count_unique_name != $#job) {
136      $_->{name} = $_->{num} for @job;
137      }
138   }
139
[43]140# ressources available
[34]141my @ressources = ();
[41]142open(NODE_FILE, '<', "$nodefile")
[34]143   or die "can't open $nodefile: $!";
144while (<NODE_FILE>) {
145   chomp;
146   next if m/^#/;
147   next if m/^\s*$/;
[41]148   push @ressources, $_;
[34]149   }
150close NODE_FILE;
151
152my $ressource_size = scalar(@ressources);
[43]153die "error: not enought ressources jobnp $job_np > ressources $ressource_size"
[41]154   if $job_np > $ressource_size;
[34]155
156my $current_dir = getcwd();
157
[32]158my $stderr = $ENV{OAR_STDERR} || '';
[13]159$stderr =~ s/\.stderr$//;
[32]160$stderr = $masterio if $masterio;
161my $stdout = $ENV{OAR_STDOUT} || '';
[13]162$stdout =~ s/\.stdout$//;
[32]163$stdout = $masterio if $masterio;
[13]164
165my $finished = new Coro::Signal;
166my $job_todo = new Coro::Semaphore 0;
[45]167my $job_name_maxlen;
168for (@job) {
169   $job_todo->up;
170   $job_name_maxlen = length($_->{name}) if length($_->{name}) > $job_name_maxlen;
171   }
[13]172
[43]173# slice of ressources for parallel job
[13]174my $ressources = new Coro::Channel;
[34]175for my $slot (1 .. int($ressource_size / $job_np)) {
[41]176   $ressources->put(
177      join(',',
178         @ressources[ (($slot - 1) * $job_np) .. (($slot * $job_np) - 1) ])
179         );
[13]180   }
181
182my %scheduled = ();
183
[43]184# OAR checkpoint and default signal SIGUSR2
[39]185my $oar_checkpoint = new Coro::Semaphore 0;
[84]186my $notify         = new Coro::Signal;
[75]187$SIG{$sig_checkpoint} = sub {
[42]188   print "warning: receive checkpoint at "
189      . time
190      . ", no new job, just finishing running job\n"
191      if $verbose;
192   $oar_checkpoint->up();
[84]193   $notify->send if $sig_transmit;
[42]194   };
[39]195
[81]196# asynchrone notify job
197async {
198   while () {
[84]199      $notify->wait;
[81]200
[84]201      for my $job_pid (keys %scheduled) {
202         my $job_name     = $scheduled{$job_pid}->{name};
203         my $job_pidfile  = $scheduled{$job_pid}->{pidfile};
204         my $node_connect = $scheduled{$job_pid}->{node_connect};
[81]205
[84]206         my $fh = IO::File->new();
207         $fh->open("| $oarsh $node_connect >/dev/null 2>&1")
208            or die "error: can't notify subjob: $!";
[81]209
[84]210         $fh->autoflush;
211         $fh = unblock $fh;
[81]212
[84]213         $fh->print("kill -$sig_checkpoint \$(cat $job_pidfile)\n");
214         $fh->print("exit\n");
[81]215
[84]216         print "warning: transmit signal $sig_checkpoint"
217            . " to job $job_name on node $node_connect.\n"
218            if $verbose;
[82]219
[84]220         close $fh;
221         cede;
[81]222         }
223      }
224   }
225
[43]226# asynchrone start job block
[13]227async {
[113]228   my $timer;
[81]229   JOB:
[13]230   for my $job (@job) {
[83]231      my $job_name   = $job->{name};
232      my $job_cmd    = $job->{cmd};
[38]233
[43]234      # job has been already run ?
[45]235      if (exists $state{$job_name}) {
236         if ($state{$job_name} eq 'start') {
237            print "warning: job $job_name was not clearly finished, relaunching...\n"
[41]238               if $verbose;
239            }
[45]240         elsif ($state{$job_name} eq 'end') {
241            delete $state{$job_name}; # free memory
[41]242            $job_todo->down;
[45]243            print "warning: job $job_name already run\n" if $verbose;
[41]244            cede;
[43]245            next JOB;
[41]246            }
247         }
[40]248
[113]249      # wait to not re-launch oarstat to fast
250      # equivalent to sleep $job_launch_brake
251      $timer = AE::now + $job_launch_brake;
252      while ( AE::now < $timer ) {
253         # force update of AE time
254         AE::now_update;
255         cede;
256         }
257
[43]258      # take job ressource
[36]259      my $job_ressource = $ressources->get;
[13]260
[43]261      # no more launch job when OAR checkpointing
262      last JOB if $oar_checkpoint->count() > 0;
[39]263
[36]264      my ($node_connect) = split ',', $job_ressource;
[41]265      my $fh = IO::File->new();
[34]266      my $job_pid = $fh->open("| $oarsh $node_connect >/dev/null 2>&1")
[43]267         or die "error: can't start subjob: $!";
[13]268
269      $fh->autoflush;
270      $fh = unblock $fh;
271
[122]272      my $begin_at = time;
273      #my $msg = sprintf "start job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
[123]274      my $msg = sprintf "start   subjob %${job_name_maxlen}s pid %5i at %s oarjob %i onnode %s\n",
[122]275         $job_name, $job_pid, $begin_at, $ENV{OAR_JOB_ID}, $job_ressource;
[43]276      $log_h->print($msg) if $logtrace;
[42]277      print($msg) if $verbose;
[13]278
[41]279      my ($job_stdout, $job_stderr);
[45]280      $job_stdout = ">  $stdout-$job_name.stdout" if $stdout ne '' and $switchio;
281      $job_stderr = "2> $stderr-$job_name.stderr" if $stderr ne '' and $switchio;
[13]282
[120]283      my $job_nodefile   = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name";
284      my $job_pidfile    = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.pid";
285      my $job_statusfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.status";
[34]286
[81]287      $scheduled{$job_pid} = {
288         fh           => $fh,
289         node_connect => $node_connect,
290         ressource    => $job_ressource,
291         name         => $job_name,
292         pidfile      => $job_pidfile,
[122]293         begin_at     => $begin_at,
[81]294         };
295
296      # set job environment, run it and clean
[34]297      if ($job_np > 1) {
[36]298         $fh->print("printf \""
[41]299               . join('\n', split(',', $job_ressource,))
300               . "\" > $job_nodefile\n");
[37]301         $fh->print("OAR_NODE_FILE=$job_nodefile\n");
[34]302         $fh->print("OAR_NP=$job_np\n");
[37]303         $fh->print("export OAR_NODE_FILE\n");
[34]304         $fh->print("export OAR_NP\n");
305         $fh->print("unset OAR_MSG_NODEFILE\n");
306         }
[88]307
[32]308      $fh->print("cd $current_dir\n");
[88]309
[81]310      if ($sig_transmit) {
[87]311         $fh->print("trap 'jobs -p|xargs -r ps -o pid --no-headers --ppid|xargs -r kill -$sig_checkpoint' $sig_checkpoint\n");
[81]312         $fh->print("echo \$\$ > $job_pidfile\n");
313         }
[88]314
[120]315      $fh->print("echo 0 > $job_statusfile\n");
[88]316      $fh->print("(\n");
317      $fh->print("$job_cmd\n");
[120]318      $fh->print(") $job_stdout $job_stderr || echo \$? > $job_statusfile \&\n");
[88]319      $fh->print("while [ \$(jobs -p | wc -l) -gt 0 ]\n");
320      $fh->print("do\n");
321      $fh->print("   wait\n");
322      $fh->print("done\n");
323
[120]324      $fh->print("OAR_SUBJOB_RETCODE=\$(cat $job_statusfile)\n");
325      $fh->print("rm -f $job_statusfile\n");
[88]326      $fh->print("rm -f $job_pidfile\n")  if $sig_transmit;
[34]327      $fh->print("rm -f $job_nodefile\n") if $job_np > 1;
[120]328      $fh->print("exit \$OAR_SUBJOB_RETCODE\n");
[13]329      cede;
330      }
331   }
332
[43]333# asynchrone end job block
[13]334async {
335   while () {
[41]336      for my $job_pid (keys %scheduled) {
[82]337         # non blocking PID test
[41]338         if (waitpid($job_pid, WNOHANG)) {
[120]339            # get return status code
340            my $job_retcode0 = $? >> 8;
341            #print "ERREUR0 $job_pid $job_retcode0\n" if $job_retcode0;
342
[122]343            # job time
344            my $end_at = time;
345            my $duration = $end_at - $scheduled{$job_pid}->{begin_at};
[124]346            $global_time_cumulative += $duration;
[122]347
348            #my $msg = sprintf "end   job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
349            my $msg = sprintf "end     subjob %${job_name_maxlen}s pid %5i at %s oarjob %i onnode %s duration %i status %i\n",
[45]350               $scheduled{$job_pid}->{name},
[122]351               $job_pid, $end_at, $ENV{OAR_JOB_ID}, $scheduled{$job_pid}->{ressource},
352               $duration, $job_retcode0;
[76]353
[120]354            # Job error
[122]355            $msg =~ s/^end\s+subjob/error   subjob/
[120]356               if $job_retcode0 > 0 and $job_retcode0 != 99;
357
[76]358            # Job non finish, just suspend if received checkpoint signal
[122]359            $msg =~ s/^end\s+subjob/suspend subjob/
[76]360               if $sig_transmit and $oar_checkpoint->count() > 0;
361
[43]362            $log_h->print($msg) if $logtrace;
[42]363            print($msg) if $verbose;
[13]364            close $scheduled{$job_pid}->{fh};
[43]365            # leave ressources for another job
[41]366            $ressources->put($scheduled{$job_pid}->{ressource});
[13]367            $job_todo->down;
368            delete $scheduled{$job_pid};
369            }
370         cede;
371         }
372
[43]373      # checkpointing ! just finishing running job and quit
[42]374      $finished->send if $oar_checkpoint->count() > 0 and scalar(keys(%scheduled)) == 0;
[39]375
[42]376      $finished->send if $job_todo->count() == 0;
[13]377      cede;
378      }
379   }
380
381cede;
382
[43]383# all job have been done
[13]384$finished->wait;
385
[122]386# global time
387$global_time_total += (time - $global_time_atstart);
388$log_h->print("global-time total $global_time_total cumulative $global_time_cumulative\n") if $logtrace;
389print("global-time total $global_time_total cumulative $global_time_cumulative\n") if $verbose;
390
[43]391# close log trace file
392$log_h->close() if $logtrace;
[38]393
[116]394exit 99 if (($oar_checkpoint->count() > 0) and ($oar_version !~ m/^2\.4/));
395
396
[13]397__END__
398
399=head1 NAME
400
[88]401oar-parexec - parallel execution of many small short or long job
[13]402
403=head1 SYNOPSIS
404
[47]405 oar-parexec --file filecommand \
406    [--logtrace tracefile] [--verbose] \
407    [--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]408    [--switchio] [--masterio basefileio] \
409    [--kill signal] [--transmit]
[46]410
[47]411 oar-parexec --dir foldertoiterate --cmd commandtolaunch \
412    [--logtrace tracefile] [--verbose] \
413    [--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]414    [--switchio] [--masterio basefileio] \
415    [--kill signal] [--transmit]
[46]416
[13]417 oar-parexec --help
418
[32]419=head1 DESCRIPTION
420
[88]421C<oar-parexec> can execute lot of small short or long job in parallel inside a cluster.
422Number of parallel job at one time cannot exceed the number of core define in the node file.
[32]423C<oar-parexec> is easier to use inside an OAR job environment
[44]424which define automatically these strategics parameters...
425However, it can be used outside OAR.
[32]426
[47]427Option C<--file> or C<--dir> and C<--cmd> are the only mandatory parameters.
[32]428
429Small job will be launch in the same folder as the master job.
[44]430Two environment variable are defined for each small job
[37]431and only in case of parallel small job (option C<--jobnp> > 1).
[32]432
[34]433 OAR_NODE_FILE - file that list node for parallel computing
434 OAR_NP        - number of processor affected
[32]435
[44]436The file define by OAR_NODE_FILE is created  in /tmp
437on the node before launching the small job
438and this file will be delete after job complete.
[34]439C<oar-parexec> is a simple script,
440OAR_NODE_FILE will not be deleted in case of crash of the master job.
441
[37]442OAR define other variable that are equivalent to OAR_NODE_FILE:
443OAR_NODEFILE, OAR_FILE_NODES, OAR_RESOURCE_FILE...
444You can use in your script the OAR original file ressources
445by using these variable if you need it.
[34]446
[88]447When use with long job,
448activate option C<--tranmit> to send OAR checkpoint signal
449and suspend small job before the walltime cut!
[82]450
[13]451=head1 OPTIONS
452
[32]453=over 12
[13]454
[47]455=item B<-f|--file filecommand>
[13]456
[32]457File name which content job list.
[45]458For the JOB_NAME definition,
459the first valid job in the list will have the number 1 and so on...
[13]460
[77]461It's possible to fix the name inside a comment on the job line.
462For example:
463
464 $HOME/test/subjob1.sh # name=subjob1
465
466The key C<name> is case insensitive,
467the associated value cannot have a space...
468
[88]469The command can be any shell command.
470It's possible to change folder,
471or launch an asynchrone job in parallel,
472but one command must block and not be launch in asynchrone (with & or coproc).
473Example :
474
475 cd ./test; ./subjob1.sh
[119]476 cd ./test; nice -18 du -sk ./ & ./subjob1.sh
[88]477
[119]478Commands C<du -sk ./>  and C<./subjob1.sh> will be done in parallel on the same ressource...
479It's better if C<du -sk ./> is faster than C<./subjob1.sh> !
480Do not abuse of that!
[88]481
[47]482=item B<-d|--dir foldertoiterate>
[45]483
484Command C<--cmd> will be launch in all sub-folder of this master folder.
485Files in this folder will be ignored.
[47]486Sub-folder name which begin with F<.>
487or finish with F<.old>, F<.sav>, F<.bak>, F<.no> will either be ignored...
[45]488
489The JOB_NAME is simply the Sub-folder name.
490
491=item B<-c|--cmd commandtolaunch>
492
[88]493Command (and argument to it) that will be launch in all sub-folder
494parameter folfer C<--dir>.
495Like for option C<--file>, command can be any valid shell command
496but one must block.
[45]497
[43]498=item B<-l|--logtrace tracefile>
499
500File which log and trace running job.
[44]501In case of running the same master command (after crash for example),
502only job that are not mark as done will be run again.
503Be careful, job mark as running (start but not finish) will be run again.
[45]504Tracing is base on the JOB_NAME between multiple run.
[43]505
506This option is very usefull in case of crash
507but also for checkpointing and idempotent OAR job.
508
[32]509=item B<-v|--verbose>
[13]510
[34]511=item B<-j|--jobnp integer>
[13]512
[34]513Number of processor to allocated for each small job.
5141 by default.
515
516=item B<-n|--nodefile filenode>
517
[44]518File name that list all the node where job could be launch.
[32]519By defaut, it's define automatically by OAR via
520environment variable C<OAR_NODE_FILE>.
[13]521
[32]522For example, if you want to use 6 core on your cluster node,
523you need to put 6 times the hostname node in this file,
524one per line...
525It's a very common file in MPI process !
[13]526
[46]527=item B<-o|-oarsh command>
[13]528
[46]529Command use to launch a shell on a node.
530By default
[13]531
[46]532 oarsh -q -T
533
534Change it to C<ssh> if you are not using an OAR cluster...
535
[32]536=item B<-s|--switchio>
[21]537
[32]538Each small job will have it's own output STDOUT and STDERR
[45]539base on master OAR job with C<JOB_NAME> inside
[32]540(or base on C<basefileio> if option C<masterio>).
541Example :
[21]542
[45]543 OAR.151524.stdout -> OAR.151524-JOB_NAME.stdout
[21]544
[32]545where 151524 here is the master C<OAR_JOB_ID>
[45]546and C<JOB_NAME> is the small job name.
[21]547
[46]548=item B<-m|--masterio basefileio>
[32]549
[46]550The C<basefileio> will be use in place of environment variable
551C<OAR_STDOUT> and C<OAR_STDERR> (without extension) to build the base name of the small job standart output
[117]552(only use when option C<switchio> is activated).
[32]553
[78]554=item B<-k|--kill signal>
555
556Signal to listen and make a clean stop of the current C<oar-parexec> process.
[118]557By default, use USR2 signal (see C<kill -l> for a list of possible signal).
[78]558
559=item B<-t|--transmit>
560
561Resend catch signal to sub-job when receiving it.
562By default, no signal is transmis to child process.
563
564It's only valuable if use for long sub-job than can
565in return make themselves a clean restart.
566
567
[32]568=item B<-h|--help>
569
570=back
571
572
573=head1 EXAMPLE
574
[44]575=head2 Simple list of sequential job
576
[47]577Content for the job file command (option C<--file>) could have:
[21]578
[13]579 - empty line
580 - comment line begin with #
[86]581 - valid shell command (can containt comment)
[13]582
583Example where F<$HOME/test/subjob1.sh> is a shell script (executable).
584
[86]585 $HOME/test/subjob01.sh  # name=subjob01
586 $HOME/test/subjob02.sh  # name=subjob02
587 $HOME/test/subjob03.sh  # name=subjob03
588 $HOME/test/subjob04.sh  # name=subjob04
[32]589 ...
[86]590 $HOME/test/subjob38.sh  # name=subjob38
591 $HOME/test/subjob39.sh  # name=subjob39
592 $HOME/test/subjob40.sh  # name=subjob40
[13]593
[44]594These jobs could be launch by:
[13]595
[49]596 oarsub -n test -l /core=6,walltime=04:00:00 \
597   "oar-parexec -f ./subjob.list.txt"
[13]598
[47]599=head2 Folder job
600
601In a folder F<subjob.d>, create sub-folder with your data inside : F<test1>, <test2>...
602The same command will be executed in every sub-folder.
603C<oar-parexec> change the current directory to the sub-folder before launching it.
604
605A very simple job could be:
606
[49]607 oarsub -n test -l /core=6,walltime=04:00:00 \
608   "oar-parexec -d ./subjob.d -c 'sleep 10; env'"
[47]609
610The command C<env> will be excuted in all folder F<test1>, F<test2>... after a 10s pause.
611
612Sometime, it's simpler to use file list command,
613sometime, jobs by folder with the same command run is more relevant.
614
[44]615=head2 Parallel job
[28]616
[44]617You need to put the number of core each small job need with option C<--jobnp>.
618If your job is build on OpenMP or MPI,
619you can use OAR_NP and OAR_NODE_FILE variables to configure them.
620On OAR cluster, you need to use C<oarsh> or a wrapper like C<oar-envsh>
621for connexion between node instead of C<ssh>.
622
623Example with parallel small job on 2 core:
624
[49]625 oarsub -n test -l /core=6,walltime=04:00:00 \
626   "oar-parexec -j 2 -f ./subjob.list.txt"
[44]627
628=head2 Tracing and master crash
629
630If the master node crash after hours of calculus, everything is lost ?
631No, with option C<--logtrace>,
632it's possible to remember older result
633and not re-run these job the second and next time.
634
[49]635 oarsub -n test -l /core=6,walltime=04:00:00 \
636   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]637
638After a crash or an C<oardel> command,
639you can then re-run the same command that will end to execute the jobs in the list
640
[49]641 oarsub -n test -l /core=6,walltime=04:00:00 \
642   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]643
644C<logtrace> file are just plain file.
645We use the extension '.log' because these files are automatically
646eliminate from our backup system!
647
648=head2 Checkpointing and Idempotent
649
650C<oar-parexec> is compatible with the OAR checkpointing.
[89]651If you have 2000 small jobs that need 55h to be done on 6 cores,
[44]652you can cut this in small parts.
653
654For this example, we suppose that each small job need about 10min...
655So, we send a checkpoint 12min before the end of the process
656to let C<oar-parexec> finish the jobs started.
657After being checkpointed, C<oar-parexec> do not start any new small job.
658
[49]659 oarsub -t idempotent -n test \
660   -l /core=6,walltime=04:00:00 \
661   --checkpoint 720 \
[44]662   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
663
664After 3h48min, the OAR job will begin to stop launching new small job.
665When all running small job are finished, it's exit.
666But as the OAR job is type C<idempotent>,
667OAR will re-submit it as long as all small job are not executed...
668
669This way, we let other users a chance to use the cluster!
670
671In this last exemple, we use moldable OAR job with idempotent
672to reserve many core for a small time or a few cores for a long time:
673
674 oarsub -t idempotent -n test \
675   -l /core=50,walltime=01:05:00 \
676   -l /core=6,walltime=04:00:00 \
677   --checkpoint 720 \
678   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
679
[78]680=head2 Signal, recurse and long job
[44]681
[78]682By default, OAR use signal USR2 for checkpointing.
[79]683It's possible to change this with option C<--kill>.
[78]684
685When use with long small job, checkpointing could be too long...
[79]686More than walltime!
687The option C<--transmit> could be use to checkpoint small job!
688These long small job will then stop cleanly and will be restarted next time.
[78]689
690In the C<logtrace> file, small job will have the status suspend.
[79]691They will be launch with the same command line at the next OAR run.
[78]692
[89]693Example: if you have 50 small jobs that each need 72h to be done on 1 cores,
694you can cut this in 24h parts.
695
696For this example, we suppose that each long job loop need about 20min...
697So, we send a checkpoint 30min before the end of the process
698to let C<oar-parexec> suspend the jobs started.
699After being checkpointed, C<oar-parexec> do not start any new small job.
700
701 oarsub -t idempotent -n test \
702   -l /core=6,walltime=24:00:00 \
703   --checkpoint 1800 \
704   --transmit \
705   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
706
707After 23h30min, the OAR job will begin to stop launching new small job.
708When all running small job are suspend, it's exit.
709But as the OAR job is type C<idempotent>,
710OAR will re-submit it as long as all small job are not finished...
711
[121]712=head2 Log format
713
714=over
715
716=item B<Version 2>
717
718 log version 2
719 start   subjob  1 pid 101468 at 1450482228 oarjob 71725 onnode cl7n001
720 end     subjob  1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
721 error   subjob  1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
722 suspend subjob  1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
[122]723 global-time total 555 cumulative 44444
[121]724
725=item B<Version 1>
726
727 log version 1
728 start job 1 / 101468 at 1450482228 oar job 71725 on node cl7n001
729 end   job 1 / 101468 at 1450482556 oar job 71725 on node cl7n001
[122]730 end   job 1 / 101468 at 1450482556 oar job 71725 on node cl7n001
731 error:retcode job 1 / 101468 at 1450482556 oar job 71725 on node cl7n00
[121]732
733=back
734
[122]735
[21]736=head1 SEE ALSO
737
[44]738oar-dispatch, mpilauncher,
739orsh, oar-envsh, ssh
[21]740
741
[13]742=head1 AUTHORS
743
[21]744Written by Gabriel Moreau, Grenoble - France
[13]745
[21]746
747=head1 LICENSE AND COPYRIGHT
748
749GPL version 2 or later and Perl equivalent
750
[121]751Copyright (C) 2011-2017 Gabriel Moreau / LEGI - CNRS UMR 5519 - France
Note: See TracBrowser for help on using the repository browser.