Changeset 88 for trunk/oarutils
- Timestamp:
- Jun 5, 2012, 10:52:59 PM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/oarutils/oar-parexec
r87 r88 82 82 my ($job_name) = $job_cmd =~ m/#.*?\bname=(\S+?)\b/i; 83 83 $job_name ||= $job_num; 84 push @job, { name => $job_name, cmd => "$job_cmd" }; 84 push @job, { 85 name => $job_name, 86 cmd => "$job_cmd", 87 num => $job_num, 88 }; 85 89 } 86 90 close JOB_LIST; 87 91 } 88 92 else { 93 my $job_num = 0; 89 94 opendir(DIR, $dir) or die "error: can't open folder $dir: $!"; 90 95 while (my $item = readdir(DIR)) { … … 96 101 next if $item =~ m/\.no$/; 97 102 next unless (-d "$dir/$item"); 98 push @job, { name => $item, cmd => "( cd $dir/$item/; $cmd )" }; 103 $job_num++; 104 push @job, { 105 name => $item, 106 cmd => "cd $dir/$item/; $cmd", 107 num => $job_num, 108 }; 99 109 } 100 110 closedir DIR; 111 } 112 113 # assume unique job name 114 { 115 my %seen = (); 116 my $count_unique_name = grep { ! $seen{ $_->{name} }++ } @job; 117 if ($count_unique_name != $#job) { 118 $_->{name} = $_->{num} for @job; 119 } 101 120 } 102 121 … … 243 262 }; 244 263 245 #$job_cmd =~ s/(#.*)$// if $sig_transmit; # suppress comment246 247 264 # set job environment, run it and clean 248 265 if ($job_np > 1) { … … 256 273 $fh->print("unset OAR_MSG_NODEFILE\n"); 257 274 } 275 258 276 $fh->print("cd $current_dir\n"); 277 259 278 if ($sig_transmit) { 260 279 $fh->print("trap 'jobs -p|xargs -r ps -o pid --no-headers --ppid|xargs -r kill -$sig_checkpoint' $sig_checkpoint\n"); 261 280 $fh->print("echo \$\$ > $job_pidfile\n"); 262 $fh->print("(\n");263 $fh->print("$job_cmd\n");264 $fh->print(") $job_stdout $job_stderr \&\n");265 $fh->print("while [ \$(jobs -p | wc -l) -gt 0 ]\n");266 $fh->print("do\n");267 $fh->print(" wait\n");268 $fh->print("done\n");269 $fh->print("rm -f $job_pidfile\n");270 281 } 271 else { 272 $fh->print("{\n"); 273 $fh->print("$job_cmd\n"); 274 $fh->print("} $job_stdout $job_stderr\n"); 275 } 282 283 $fh->print("(\n"); 284 $fh->print("$job_cmd\n"); 285 $fh->print(") $job_stdout $job_stderr \&\n"); 286 $fh->print("while [ \$(jobs -p | wc -l) -gt 0 ]\n"); 287 $fh->print("do\n"); 288 $fh->print(" wait\n"); 289 $fh->print("done\n"); 290 291 $fh->print("rm -f $job_pidfile\n") if $sig_transmit; 276 292 $fh->print("rm -f $job_nodefile\n") if $job_np > 1; 277 293 $fh->print("exit\n"); … … 325 341 =head1 NAME 326 342 327 oar-parexec - parallel execution of many small job343 oar-parexec - parallel execution of many small short or long job 328 344 329 345 =head1 SYNOPSIS … … 332 348 [--logtrace tracefile] [--verbose] \ 333 349 [--jobnp integer] [--nodefile filenode] [--oarsh sssh] \ 334 [--switchio] [--masterio basefileio] 350 [--switchio] [--masterio basefileio] \ 351 [--kill signal] [--transmit] 335 352 336 353 oar-parexec --dir foldertoiterate --cmd commandtolaunch \ 337 354 [--logtrace tracefile] [--verbose] \ 338 355 [--jobnp integer] [--nodefile filenode] [--oarsh sssh] \ 339 [--switchio] [--masterio basefileio] 356 [--switchio] [--masterio basefileio] \ 357 [--kill signal] [--transmit] 340 358 341 359 oar-parexec --help … … 343 361 =head1 DESCRIPTION 344 362 345 C<oar-parexec> can execute lot of small job in parallel inside a cluster.346 Number of parallel job at one time cannot exceed the number of core define in the node file 363 C<oar-parexec> can execute lot of small short or long job in parallel inside a cluster. 364 Number of parallel job at one time cannot exceed the number of core define in the node file. 347 365 C<oar-parexec> is easier to use inside an OAR job environment 348 366 which define automatically these strategics parameters... … … 369 387 by using these variable if you need it. 370 388 389 When use with long job, 390 activate option C<--tranmit> to send OAR checkpoint signal 391 and suspend small job before the walltime cut! 371 392 372 393 =head1 OPTIONS … … 387 408 The key C<name> is case insensitive, 388 409 the associated value cannot have a space... 410 411 The command can be any shell command. 412 It's possible to change folder, 413 or launch an asynchrone job in parallel, 414 but one command must block and not be launch in asynchrone (with & or coproc). 415 Example : 416 417 cd ./test; ./subjob1.sh 418 cd ./test; nice -18 du -sk ./ & ./test/subjob1.sh 419 420 Command C<du -sk ./> will be done in parallel on the same ressource... 389 421 390 422 =item B<-d|--dir foldertoiterate> … … 399 431 =item B<-c|--cmd commandtolaunch> 400 432 401 Command (and argument to it) tha will be launch in all sub-folder 402 parameter folfer C<--dir> 433 Command (and argument to it) that will be launch in all sub-folder 434 parameter folfer C<--dir>. 435 Like for option C<--file>, command can be any valid shell command 436 but one must block. 403 437 404 438 =item B<-l|--logtrace tracefile>
Note: See TracChangeset
for help on using the changeset viewer.