Changeset 88 for trunk/oarutils


Ignore:
Timestamp:
Jun 5, 2012, 10:52:59 PM (12 years ago)
Author:
g7moreau
Message:
  • Assume unique job name
  • Always launch job in asynchrone
  • Doc on command line
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/oarutils/oar-parexec

    r87 r88  
    8282      my ($job_name) = $job_cmd =~ m/#.*?\bname=(\S+?)\b/i;
    8383      $job_name ||= $job_num;
    84       push @job, { name => $job_name, cmd => "$job_cmd" };
     84      push @job, {
     85         name   => $job_name,
     86         cmd    => "$job_cmd",
     87         num    => $job_num,
     88         };
    8589      }
    8690   close JOB_LIST;
    8791   }
    8892else {
     93   my $job_num = 0;
    8994   opendir(DIR, $dir) or die "error: can't open folder $dir: $!";
    9095   while (my $item = readdir(DIR)) {
     
    96101      next if $item =~ m/\.no$/;
    97102      next unless (-d "$dir/$item");
    98       push @job, { name => $item, cmd => "( cd $dir/$item/; $cmd )" };
     103      $job_num++;
     104      push @job, {
     105         name   => $item,
     106         cmd    => "cd $dir/$item/; $cmd",
     107         num    => $job_num,
     108         };
    99109      }
    100110   closedir DIR;
     111   }
     112
     113# assume unique job name
     114{
     115   my %seen = ();
     116   my $count_unique_name = grep { ! $seen{ $_->{name} }++ } @job;
     117   if ($count_unique_name != $#job) {
     118      $_->{name} = $_->{num} for @job;
     119      }
    101120   }
    102121
     
    243262         };
    244263
    245       #$job_cmd =~ s/(#.*)$// if $sig_transmit; # suppress comment
    246 
    247264      # set job environment, run it and clean
    248265      if ($job_np > 1) {
     
    256273         $fh->print("unset OAR_MSG_NODEFILE\n");
    257274         }
     275
    258276      $fh->print("cd $current_dir\n");
     277
    259278      if ($sig_transmit) {
    260279         $fh->print("trap 'jobs -p|xargs -r ps -o pid --no-headers --ppid|xargs -r kill -$sig_checkpoint' $sig_checkpoint\n");
    261280         $fh->print("echo \$\$ > $job_pidfile\n");
    262          $fh->print("(\n");
    263          $fh->print("$job_cmd\n");
    264          $fh->print(") $job_stdout $job_stderr \&\n");
    265          $fh->print("while [ \$(jobs -p | wc -l) -gt 0 ]\n");
    266          $fh->print("do\n");
    267          $fh->print("   wait\n");
    268          $fh->print("done\n");
    269          $fh->print("rm -f $job_pidfile\n");
    270281         }
    271       else {
    272          $fh->print("{\n");
    273          $fh->print("$job_cmd\n");
    274          $fh->print("} $job_stdout $job_stderr\n");
    275          }
     282
     283      $fh->print("(\n");
     284      $fh->print("$job_cmd\n");
     285      $fh->print(") $job_stdout $job_stderr \&\n");
     286      $fh->print("while [ \$(jobs -p | wc -l) -gt 0 ]\n");
     287      $fh->print("do\n");
     288      $fh->print("   wait\n");
     289      $fh->print("done\n");
     290
     291      $fh->print("rm -f $job_pidfile\n")  if $sig_transmit;
    276292      $fh->print("rm -f $job_nodefile\n") if $job_np > 1;
    277293      $fh->print("exit\n");
     
    325341=head1 NAME
    326342
    327 oar-parexec - parallel execution of many small job
     343oar-parexec - parallel execution of many small short or long job
    328344
    329345=head1 SYNOPSIS
     
    332348    [--logtrace tracefile] [--verbose] \
    333349    [--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
    334     [--switchio] [--masterio basefileio]
     350    [--switchio] [--masterio basefileio] \
     351    [--kill signal] [--transmit]
    335352
    336353 oar-parexec --dir foldertoiterate --cmd commandtolaunch \
    337354    [--logtrace tracefile] [--verbose] \
    338355    [--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
    339     [--switchio] [--masterio basefileio]
     356    [--switchio] [--masterio basefileio] \
     357    [--kill signal] [--transmit]
    340358
    341359 oar-parexec --help
     
    343361=head1 DESCRIPTION
    344362
    345 C<oar-parexec> can execute lot of small job in parallel inside a cluster.
    346 Number of parallel job at one time cannot exceed the number of core define in the node file
     363C<oar-parexec> can execute lot of small short or long job in parallel inside a cluster.
     364Number of parallel job at one time cannot exceed the number of core define in the node file.
    347365C<oar-parexec> is easier to use inside an OAR job environment
    348366which define automatically these strategics parameters...
     
    369387by using these variable if you need it.
    370388
     389When use with long job,
     390activate option C<--tranmit> to send OAR checkpoint signal
     391and suspend small job before the walltime cut!
    371392
    372393=head1 OPTIONS
     
    387408The key C<name> is case insensitive,
    388409the associated value cannot have a space...
     410
     411The command can be any shell command.
     412It's possible to change folder,
     413or launch an asynchrone job in parallel,
     414but one command must block and not be launch in asynchrone (with & or coproc).
     415Example :
     416
     417 cd ./test; ./subjob1.sh
     418 cd ./test; nice -18 du -sk ./ & ./test/subjob1.sh
     419
     420Command C<du -sk ./> will be done in parallel on the same ressource...
    389421
    390422=item B<-d|--dir foldertoiterate>
     
    399431=item B<-c|--cmd commandtolaunch>
    400432
    401 Command (and argument to it) tha will be launch in all sub-folder
    402 parameter folfer C<--dir>
     433Command (and argument to it) that will be launch in all sub-folder
     434parameter folfer C<--dir>.
     435Like for option C<--file>, command can be any valid shell command
     436but one must block.
    403437
    404438=item B<-l|--logtrace tracefile>
Note: See TracChangeset for help on using the changeset viewer.