Context Navigation

source: trunk/oarutils/oar-parexec @ 120

Last change on this file since 120 was 120, checked in by g7moreau, 9 years ago
First implementation of fonctional return status code
File size: 20.5 KB

Rev	Line
[13]	1	#!/usr/bin/perl
	2	#
[118]	3	# 2011/11/27 Gabriel Moreau
[13]	4
	5	use strict;
	6
	7	use Getopt::Long();
	8	use Pod::Usage;
	9	use Coro;
	10	use Coro::Semaphore;
	11	use Coro::Signal;
	12	use Coro::Channel;
	13	use Coro::Handle;
	14	use IO::File;
	15	use POSIX qw( WNOHANG WEXITSTATUS );
[32]	16	use Cwd qw( getcwd );
[13]	17
[75]	18	my $file;
	19	my $dir;
	20	my $cmd;
	21	my $logtrace;
[13]	22	my $verbose;
[82]	23	my $job_np = 1;
	24	my $nodefile = $ENV{OAR_NODE_FILE} \|\| '';
[32]	25	my $masterio;
[13]	26	my $switchio;
	27	my $help;
[82]	28	my $oarsh = 'oarsh -q -T';
[75]	29	my $sig_transmit;
	30	my $sig_checkpoint = 'USR2';
[113]	31	my $job_launch_brake = 1; # one second time brake
[13]	32
	33	Getopt::Long::GetOptions(
[47]	34	'file=s' => \$file,
[45]	35	'dir=s' => \$dir,
	36	'cmd=s' => \$cmd,
[43]	37	'logtrace=s' => \$logtrace,
[32]	38	'verbose' => \$verbose,
	39	'help' => \$help,
	40	'oarsh=s' => \$oarsh,
[34]	41	'jobnp=i' => \$job_np,
[32]	42	'nodefile=s' => \$nodefile,
	43	'masterio=s' => \$masterio,
	44	'switchio' => \$switchio,
[75]	45	'transmit' => \$sig_transmit,
	46	'kill=s' => \$sig_checkpoint,
[41]	47	) \|\| pod2usage(-verbose => 0);
	48	pod2usage(-verbose => 2) if $help;
[45]	49	pod2usage(-verbose => 2) if not (
[47]	50	(-e "$file")
[45]	51	or (-d "$dir" and $cmd ne '')
	52	);
[13]	53
[116]	54	my $oar_version = `oarsub -V \| awk '{print \$4}'`;
	55	chomp $oar_version;
	56
[43]	57	# re-run, keep trace of job already done
[38]	58	my %state;
	59	my $log_h = IO::File->new();
[45]	60	if (-e "$logtrace") {
[43]	61	$log_h->open("< $logtrace")
	62	or die "error: can't read log file: $!";
[38]	63	while (<$log_h>) {
[45]	64	$state{$1} = 'start' if m/^start\s+job\s+([^\s]+)\s/;
	65	$state{$1} = 'end' if m/^end\s+job\s+([^\s]+)\s/;
[41]	66	}
[38]	67	$log_h->close();
	68	}
[43]	69	if ($logtrace) {
	70	$log_h->open(">> $logtrace")
	71	or die "error: can't append log file $logtrace: $!";
[40]	72	$log_h->autoflush;
[38]	73	$log_h = unblock $log_h;
	74	}
	75
[43]	76	# job to run
[13]	77	my @job = ();
[47]	78	if (-e "$file") {
[45]	79	my $job_num = 0;
[47]	80	open(JOB_LIST, '<', "$file") or die "error: can't open job file $file: $!";
[77]	81	while (my $job_cmd = <JOB_LIST>) {
	82	chomp $job_cmd;
	83	next if $job_cmd =~ m/^#/;
	84	next if $job_cmd =~ m/^\s*$/;
[45]	85	$job_num++;
[77]	86	my ($job_name) = $job_cmd =~ m/#.*?\bname=(\S+?)\b/i;
	87	$job_name \|\|= $job_num;
[88]	88	push @job, {
	89	name => $job_name,
	90	cmd => "$job_cmd",
	91	num => $job_num,
	92	};
[45]	93	}
	94	close JOB_LIST;
[13]	95	}
[45]	96	else {
[88]	97	my $job_num = 0;
[45]	98	opendir(DIR, $dir) or die "error: can't open folder $dir: $!";
	99	while (my $item = readdir(DIR)) {
	100	next if $item =~ m/^\./;
	101	next if $item =~ m/:/;
	102	next if $item =~ m/\.old$/;
	103	next if $item =~ m/\.sav$/;
	104	next if $item =~ m/\.bak$/;
	105	next if $item =~ m/\.no$/;
	106	next unless (-d "$dir/$item");
[88]	107	$job_num++;
	108	push @job, {
	109	name => $item,
	110	cmd => "cd $dir/$item/; $cmd",
	111	num => $job_num,
	112	};
[45]	113	}
	114	closedir DIR;
	115	}
[13]	116
[88]	117	# assume unique job name
	118	{
	119	my %seen = ();
	120	my $count_unique_name = grep { ! $seen{ $_->{name} }++ } @job;
	121	if ($count_unique_name != $#job) {
	122	$_->{name} = $_->{num} for @job;
	123	}
	124	}
	125
[43]	126	# ressources available
[34]	127	my @ressources = ();
[41]	128	open(NODE_FILE, '<', "$nodefile")
[34]	129	or die "can't open $nodefile: $!";
	130	while (<NODE_FILE>) {
	131	chomp;
	132	next if m/^#/;
	133	next if m/^\s*$/;
[41]	134	push @ressources, $_;
[34]	135	}
	136	close NODE_FILE;
	137
	138	my $ressource_size = scalar(@ressources);
[43]	139	die "error: not enought ressources jobnp $job_np > ressources $ressource_size"
[41]	140	if $job_np > $ressource_size;
[34]	141
	142	my $current_dir = getcwd();
	143
[32]	144	my $stderr = $ENV{OAR_STDERR} \|\| '';
[13]	145	$stderr =~ s/\.stderr$//;
[32]	146	$stderr = $masterio if $masterio;
	147	my $stdout = $ENV{OAR_STDOUT} \|\| '';
[13]	148	$stdout =~ s/\.stdout$//;
[32]	149	$stdout = $masterio if $masterio;
[13]	150
	151	my $finished = new Coro::Signal;
	152	my $job_todo = new Coro::Semaphore 0;
[45]	153	my $job_name_maxlen;
	154	for (@job) {
	155	$job_todo->up;
	156	$job_name_maxlen = length($_->{name}) if length($_->{name}) > $job_name_maxlen;
	157	}
[13]	158
[43]	159	# slice of ressources for parallel job
[13]	160	my $ressources = new Coro::Channel;
[34]	161	for my $slot (1 .. int($ressource_size / $job_np)) {
[41]	162	$ressources->put(
	163	join(',',
	164	@ressources[ (($slot - 1) * $job_np) .. (($slot * $job_np) - 1) ])
	165	);
[13]	166	}
	167
	168	my %scheduled = ();
	169
[43]	170	# OAR checkpoint and default signal SIGUSR2
[39]	171	my $oar_checkpoint = new Coro::Semaphore 0;
[84]	172	my $notify = new Coro::Signal;
[75]	173	$SIG{$sig_checkpoint} = sub {
[42]	174	print "warning: receive checkpoint at "
	175	. time
	176	. ", no new job, just finishing running job\n"
	177	if $verbose;
	178	$oar_checkpoint->up();
[84]	179	$notify->send if $sig_transmit;
[42]	180	};
[39]	181
[81]	182	# asynchrone notify job
	183	async {
	184	while () {
[84]	185	$notify->wait;
[81]	186
[84]	187	for my $job_pid (keys %scheduled) {
	188	my $job_name = $scheduled{$job_pid}->{name};
	189	my $job_pidfile = $scheduled{$job_pid}->{pidfile};
	190	my $node_connect = $scheduled{$job_pid}->{node_connect};
[81]	191
[84]	192	my $fh = IO::File->new();
	193	$fh->open("\| $oarsh $node_connect >/dev/null 2>&1")
	194	or die "error: can't notify subjob: $!";
[81]	195
[84]	196	$fh->autoflush;
	197	$fh = unblock $fh;
[81]	198
[84]	199	$fh->print("kill -$sig_checkpoint \$(cat $job_pidfile)\n");
	200	$fh->print("exit\n");
[81]	201
[84]	202	print "warning: transmit signal $sig_checkpoint"
	203	. " to job $job_name on node $node_connect.\n"
	204	if $verbose;
[82]	205
[84]	206	close $fh;
	207	cede;
[81]	208	}
	209	}
	210	}
	211
[43]	212	# asynchrone start job block
[13]	213	async {
[113]	214	my $timer;
[81]	215	JOB:
[13]	216	for my $job (@job) {
[83]	217	my $job_name = $job->{name};
	218	my $job_cmd = $job->{cmd};
[38]	219
[43]	220	# job has been already run ?
[45]	221	if (exists $state{$job_name}) {
	222	if ($state{$job_name} eq 'start') {
	223	print "warning: job $job_name was not clearly finished, relaunching...\n"
[41]	224	if $verbose;
	225	}
[45]	226	elsif ($state{$job_name} eq 'end') {
	227	delete $state{$job_name}; # free memory
[41]	228	$job_todo->down;
[45]	229	print "warning: job $job_name already run\n" if $verbose;
[41]	230	cede;
[43]	231	next JOB;
[41]	232	}
	233	}
[40]	234
[113]	235	# wait to not re-launch oarstat to fast
	236	# equivalent to sleep $job_launch_brake
	237	$timer = AE::now + $job_launch_brake;
	238	while ( AE::now < $timer ) {
	239	# force update of AE time
	240	AE::now_update;
	241	cede;
	242	}
	243
[43]	244	# take job ressource
[36]	245	my $job_ressource = $ressources->get;
[13]	246
[43]	247	# no more launch job when OAR checkpointing
	248	last JOB if $oar_checkpoint->count() > 0;
[39]	249
[36]	250	my ($node_connect) = split ',', $job_ressource;
[41]	251	my $fh = IO::File->new();
[34]	252	my $job_pid = $fh->open("\| $oarsh $node_connect >/dev/null 2>&1")
[43]	253	or die "error: can't start subjob: $!";
[13]	254
	255	$fh->autoflush;
	256	$fh = unblock $fh;
	257
[113]	258	my $msg = sprintf "start job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
	259	$job_name, $job_pid, time, $ENV{OAR_JOB_ID}, $job_ressource;
[43]	260	$log_h->print($msg) if $logtrace;
[42]	261	print($msg) if $verbose;
[13]	262
[41]	263	my ($job_stdout, $job_stderr);
[45]	264	$job_stdout = "> $stdout-$job_name.stdout" if $stdout ne '' and $switchio;
	265	$job_stderr = "2> $stderr-$job_name.stderr" if $stderr ne '' and $switchio;
[13]	266
[120]	267	my $job_nodefile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name";
	268	my $job_pidfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.pid";
	269	my $job_statusfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.status";
[34]	270
[81]	271	$scheduled{$job_pid} = {
	272	fh => $fh,
	273	node_connect => $node_connect,
	274	ressource => $job_ressource,
	275	name => $job_name,
	276	pidfile => $job_pidfile,
	277	};
	278
	279	# set job environment, run it and clean
[34]	280	if ($job_np > 1) {
[36]	281	$fh->print("printf \""
[41]	282	. join('\n', split(',', $job_ressource,))
	283	. "\" > $job_nodefile\n");
[37]	284	$fh->print("OAR_NODE_FILE=$job_nodefile\n");
[34]	285	$fh->print("OAR_NP=$job_np\n");
[37]	286	$fh->print("export OAR_NODE_FILE\n");
[34]	287	$fh->print("export OAR_NP\n");
	288	$fh->print("unset OAR_MSG_NODEFILE\n");
	289	}
[88]	290
[32]	291	$fh->print("cd $current_dir\n");
[88]	292
[81]	293	if ($sig_transmit) {
[87]	294	$fh->print("trap 'jobs -p\|xargs -r ps -o pid --no-headers --ppid\|xargs -r kill -$sig_checkpoint' $sig_checkpoint\n");
[81]	295	$fh->print("echo \$\$ > $job_pidfile\n");
	296	}
[88]	297
[120]	298	$fh->print("echo 0 > $job_statusfile\n");
[88]	299	$fh->print("(\n");
	300	$fh->print("$job_cmd\n");
[120]	301	$fh->print(") $job_stdout $job_stderr \|\| echo \$? > $job_statusfile \&\n");
[88]	302	$fh->print("while [ \$(jobs -p \| wc -l) -gt 0 ]\n");
	303	$fh->print("do\n");
	304	$fh->print(" wait\n");
	305	$fh->print("done\n");
	306
[120]	307	$fh->print("OAR_SUBJOB_RETCODE=\$(cat $job_statusfile)\n");
	308	$fh->print("rm -f $job_statusfile\n");
[88]	309	$fh->print("rm -f $job_pidfile\n") if $sig_transmit;
[34]	310	$fh->print("rm -f $job_nodefile\n") if $job_np > 1;
[120]	311	$fh->print("exit \$OAR_SUBJOB_RETCODE\n");
[13]	312	cede;
	313	}
	314	}
	315
[43]	316	# asynchrone end job block
[13]	317	async {
	318	while () {
[41]	319	for my $job_pid (keys %scheduled) {
[82]	320	# non blocking PID test
[41]	321	if (waitpid($job_pid, WNOHANG)) {
[120]	322	# get return status code
	323	my $job_retcode0 = $? >> 8;
	324	#print "ERREUR0 $job_pid $job_retcode0\n" if $job_retcode0;
	325
[113]	326	my $msg = sprintf "end job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
[45]	327	$scheduled{$job_pid}->{name},
[113]	328	$job_pid, time, $ENV{OAR_JOB_ID}, $scheduled{$job_pid}->{ressource};
[76]	329
[120]	330	# Job error
	331	$msg =~ s/^end\s+job/error:$job_retcode0 job/
	332	if $job_retcode0 > 0 and $job_retcode0 != 99;
	333
[76]	334	# Job non finish, just suspend if received checkpoint signal
	335	$msg =~ s/^end\s+job/suspend job/
	336	if $sig_transmit and $oar_checkpoint->count() > 0;
	337
[43]	338	$log_h->print($msg) if $logtrace;
[42]	339	print($msg) if $verbose;
[13]	340	close $scheduled{$job_pid}->{fh};
[43]	341	# leave ressources for another job
[41]	342	$ressources->put($scheduled{$job_pid}->{ressource});
[13]	343	$job_todo->down;
	344	delete $scheduled{$job_pid};
	345	}
	346	cede;
	347	}
	348
[43]	349	# checkpointing ! just finishing running job and quit
[42]	350	$finished->send if $oar_checkpoint->count() > 0 and scalar(keys(%scheduled)) == 0;
[39]	351
[42]	352	$finished->send if $job_todo->count() == 0;
[13]	353	cede;
	354	}
	355	}
	356
	357	cede;
	358
[43]	359	# all job have been done
[13]	360	$finished->wait;
	361
[43]	362	# close log trace file
	363	$log_h->close() if $logtrace;
[38]	364
[116]	365	exit 99 if (($oar_checkpoint->count() > 0) and ($oar_version !~ m/^2\.4/));
	366
	367
[13]	368	__END__
	369
	370	=head1 NAME
	371
[88]	372	oar-parexec - parallel execution of many small short or long job
[13]	373
	374	=head1 SYNOPSIS
	375
[47]	376	oar-parexec --file filecommand \
	377	[--logtrace tracefile] [--verbose] \
	378	[--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]	379	[--switchio] [--masterio basefileio] \
	380	[--kill signal] [--transmit]
[46]	381
[47]	382	oar-parexec --dir foldertoiterate --cmd commandtolaunch \
	383	[--logtrace tracefile] [--verbose] \
	384	[--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]	385	[--switchio] [--masterio basefileio] \
	386	[--kill signal] [--transmit]
[46]	387
[13]	388	oar-parexec --help
	389
[32]	390	=head1 DESCRIPTION
	391
[88]	392	C<oar-parexec> can execute lot of small short or long job in parallel inside a cluster.
	393	Number of parallel job at one time cannot exceed the number of core define in the node file.
[32]	394	C<oar-parexec> is easier to use inside an OAR job environment
[44]	395	which define automatically these strategics parameters...
	396	However, it can be used outside OAR.
[32]	397
[47]	398	Option C<--file> or C<--dir> and C<--cmd> are the only mandatory parameters.
[32]	399
	400	Small job will be launch in the same folder as the master job.
[44]	401	Two environment variable are defined for each small job
[37]	402	and only in case of parallel small job (option C<--jobnp> > 1).
[32]	403
[34]	404	OAR_NODE_FILE - file that list node for parallel computing
	405	OAR_NP - number of processor affected
[32]	406
[44]	407	The file define by OAR_NODE_FILE is created in /tmp
	408	on the node before launching the small job
	409	and this file will be delete after job complete.
[34]	410	C<oar-parexec> is a simple script,
	411	OAR_NODE_FILE will not be deleted in case of crash of the master job.
	412
[37]	413	OAR define other variable that are equivalent to OAR_NODE_FILE:
	414	OAR_NODEFILE, OAR_FILE_NODES, OAR_RESOURCE_FILE...
	415	You can use in your script the OAR original file ressources
	416	by using these variable if you need it.
[34]	417
[88]	418	When use with long job,
	419	activate option C<--tranmit> to send OAR checkpoint signal
	420	and suspend small job before the walltime cut!
[82]	421
[13]	422	=head1 OPTIONS
	423
[32]	424	=over 12
[13]	425
[47]	426	=item B<-f\|--file filecommand>
[13]	427
[32]	428	File name which content job list.
[45]	429	For the JOB_NAME definition,
	430	the first valid job in the list will have the number 1 and so on...
[13]	431
[77]	432	It's possible to fix the name inside a comment on the job line.
	433	For example:
	434
	435	$HOME/test/subjob1.sh # name=subjob1
	436
	437	The key C<name> is case insensitive,
	438	the associated value cannot have a space...
	439
[88]	440	The command can be any shell command.
	441	It's possible to change folder,
	442	or launch an asynchrone job in parallel,
	443	but one command must block and not be launch in asynchrone (with & or coproc).
	444	Example :
	445
	446	cd ./test; ./subjob1.sh
[119]	447	cd ./test; nice -18 du -sk ./ & ./subjob1.sh
[88]	448
[119]	449	Commands C<du -sk ./> and C<./subjob1.sh> will be done in parallel on the same ressource...
	450	It's better if C<du -sk ./> is faster than C<./subjob1.sh> !
	451	Do not abuse of that!
[88]	452
[47]	453	=item B<-d\|--dir foldertoiterate>
[45]	454
	455	Command C<--cmd> will be launch in all sub-folder of this master folder.
	456	Files in this folder will be ignored.
[47]	457	Sub-folder name which begin with F<.>
	458	or finish with F<.old>, F<.sav>, F<.bak>, F<.no> will either be ignored...
[45]	459
	460	The JOB_NAME is simply the Sub-folder name.
	461
	462	=item B<-c\|--cmd commandtolaunch>
	463
[88]	464	Command (and argument to it) that will be launch in all sub-folder
	465	parameter folfer C<--dir>.
	466	Like for option C<--file>, command can be any valid shell command
	467	but one must block.
[45]	468
[43]	469	=item B<-l\|--logtrace tracefile>
	470
	471	File which log and trace running job.
[44]	472	In case of running the same master command (after crash for example),
	473	only job that are not mark as done will be run again.
	474	Be careful, job mark as running (start but not finish) will be run again.
[45]	475	Tracing is base on the JOB_NAME between multiple run.
[43]	476
	477	This option is very usefull in case of crash
	478	but also for checkpointing and idempotent OAR job.
	479
[32]	480	=item B<-v\|--verbose>
[13]	481
[34]	482	=item B<-j\|--jobnp integer>
[13]	483
[34]	484	Number of processor to allocated for each small job.
	485	1 by default.
	486
	487	=item B<-n\|--nodefile filenode>
	488
[44]	489	File name that list all the node where job could be launch.
[32]	490	By defaut, it's define automatically by OAR via
	491	environment variable C<OAR_NODE_FILE>.
[13]	492
[32]	493	For example, if you want to use 6 core on your cluster node,
	494	you need to put 6 times the hostname node in this file,
	495	one per line...
	496	It's a very common file in MPI process !
[13]	497
[46]	498	=item B<-o\|-oarsh command>
[13]	499
[46]	500	Command use to launch a shell on a node.
	501	By default
[13]	502
[46]	503	oarsh -q -T
	504
	505	Change it to C<ssh> if you are not using an OAR cluster...
	506
[32]	507	=item B<-s\|--switchio>
[21]	508
[32]	509	Each small job will have it's own output STDOUT and STDERR
[45]	510	base on master OAR job with C<JOB_NAME> inside
[32]	511	(or base on C<basefileio> if option C<masterio>).
	512	Example :
[21]	513
[45]	514	OAR.151524.stdout -> OAR.151524-JOB_NAME.stdout
[21]	515
[32]	516	where 151524 here is the master C<OAR_JOB_ID>
[45]	517	and C<JOB_NAME> is the small job name.
[21]	518
[46]	519	=item B<-m\|--masterio basefileio>
[32]	520
[46]	521	The C<basefileio> will be use in place of environment variable
	522	C<OAR_STDOUT> and C<OAR_STDERR> (without extension) to build the base name of the small job standart output
[117]	523	(only use when option C<switchio> is activated).
[32]	524
[78]	525	=item B<-k\|--kill signal>
	526
	527	Signal to listen and make a clean stop of the current C<oar-parexec> process.
[118]	528	By default, use USR2 signal (see C<kill -l> for a list of possible signal).
[78]	529
	530	=item B<-t\|--transmit>
	531
	532	Resend catch signal to sub-job when receiving it.
	533	By default, no signal is transmis to child process.
	534
	535	It's only valuable if use for long sub-job than can
	536	in return make themselves a clean restart.
	537
	538
[32]	539	=item B<-h\|--help>
	540
	541	=back
	542
	543
	544	=head1 EXAMPLE
	545
[44]	546	=head2 Simple list of sequential job
	547
[47]	548	Content for the job file command (option C<--file>) could have:
[21]	549
[13]	550	- empty line
	551	- comment line begin with #
[86]	552	- valid shell command (can containt comment)
[13]	553
	554	Example where F<$HOME/test/subjob1.sh> is a shell script (executable).
	555
[86]	556	$HOME/test/subjob01.sh # name=subjob01
	557	$HOME/test/subjob02.sh # name=subjob02
	558	$HOME/test/subjob03.sh # name=subjob03
	559	$HOME/test/subjob04.sh # name=subjob04
[32]	560	...
[86]	561	$HOME/test/subjob38.sh # name=subjob38
	562	$HOME/test/subjob39.sh # name=subjob39
	563	$HOME/test/subjob40.sh # name=subjob40
[13]	564
[44]	565	These jobs could be launch by:
[13]	566
[49]	567	oarsub -n test -l /core=6,walltime=04:00:00 \
	568	"oar-parexec -f ./subjob.list.txt"
[13]	569
[47]	570	=head2 Folder job
	571
	572	In a folder F<subjob.d>, create sub-folder with your data inside : F<test1>, <test2>...
	573	The same command will be executed in every sub-folder.
	574	C<oar-parexec> change the current directory to the sub-folder before launching it.
	575
	576	A very simple job could be:
	577
[49]	578	oarsub -n test -l /core=6,walltime=04:00:00 \
	579	"oar-parexec -d ./subjob.d -c 'sleep 10; env'"
[47]	580
	581	The command C<env> will be excuted in all folder F<test1>, F<test2>... after a 10s pause.
	582
	583	Sometime, it's simpler to use file list command,
	584	sometime, jobs by folder with the same command run is more relevant.
	585
[44]	586	=head2 Parallel job
[28]	587
[44]	588	You need to put the number of core each small job need with option C<--jobnp>.
	589	If your job is build on OpenMP or MPI,
	590	you can use OAR_NP and OAR_NODE_FILE variables to configure them.
	591	On OAR cluster, you need to use C<oarsh> or a wrapper like C<oar-envsh>
	592	for connexion between node instead of C<ssh>.
	593
	594	Example with parallel small job on 2 core:
	595
[49]	596	oarsub -n test -l /core=6,walltime=04:00:00 \
	597	"oar-parexec -j 2 -f ./subjob.list.txt"
[44]	598
	599	=head2 Tracing and master crash
	600
	601	If the master node crash after hours of calculus, everything is lost ?
	602	No, with option C<--logtrace>,
	603	it's possible to remember older result
	604	and not re-run these job the second and next time.
	605
[49]	606	oarsub -n test -l /core=6,walltime=04:00:00 \
	607	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]	608
	609	After a crash or an C<oardel> command,
	610	you can then re-run the same command that will end to execute the jobs in the list
	611
[49]	612	oarsub -n test -l /core=6,walltime=04:00:00 \
	613	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]	614
	615	C<logtrace> file are just plain file.
	616	We use the extension '.log' because these files are automatically
	617	eliminate from our backup system!
	618
	619	=head2 Checkpointing and Idempotent
	620
	621	C<oar-parexec> is compatible with the OAR checkpointing.
[89]	622	If you have 2000 small jobs that need 55h to be done on 6 cores,
[44]	623	you can cut this in small parts.
	624
	625	For this example, we suppose that each small job need about 10min...
	626	So, we send a checkpoint 12min before the end of the process
	627	to let C<oar-parexec> finish the jobs started.
	628	After being checkpointed, C<oar-parexec> do not start any new small job.
	629
[49]	630	oarsub -t idempotent -n test \
	631	-l /core=6,walltime=04:00:00 \
	632	--checkpoint 720 \
[44]	633	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	634
	635	After 3h48min, the OAR job will begin to stop launching new small job.
	636	When all running small job are finished, it's exit.
	637	But as the OAR job is type C<idempotent>,
	638	OAR will re-submit it as long as all small job are not executed...
	639
	640	This way, we let other users a chance to use the cluster!
	641
	642	In this last exemple, we use moldable OAR job with idempotent
	643	to reserve many core for a small time or a few cores for a long time:
	644
	645	oarsub -t idempotent -n test \
	646	-l /core=50,walltime=01:05:00 \
	647	-l /core=6,walltime=04:00:00 \
	648	--checkpoint 720 \
	649	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	650
[78]	651	=head2 Signal, recurse and long job
[44]	652
[78]	653	By default, OAR use signal USR2 for checkpointing.
[79]	654	It's possible to change this with option C<--kill>.
[78]	655
	656	When use with long small job, checkpointing could be too long...
[79]	657	More than walltime!
	658	The option C<--transmit> could be use to checkpoint small job!
	659	These long small job will then stop cleanly and will be restarted next time.
[78]	660
	661	In the C<logtrace> file, small job will have the status suspend.
[79]	662	They will be launch with the same command line at the next OAR run.
[78]	663
[89]	664	Example: if you have 50 small jobs that each need 72h to be done on 1 cores,
	665	you can cut this in 24h parts.
	666
	667	For this example, we suppose that each long job loop need about 20min...
	668	So, we send a checkpoint 30min before the end of the process
	669	to let C<oar-parexec> suspend the jobs started.
	670	After being checkpointed, C<oar-parexec> do not start any new small job.
	671
	672	oarsub -t idempotent -n test \
	673	-l /core=6,walltime=24:00:00 \
	674	--checkpoint 1800 \
	675	--transmit \
	676	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	677
	678	After 23h30min, the OAR job will begin to stop launching new small job.
	679	When all running small job are suspend, it's exit.
	680	But as the OAR job is type C<idempotent>,
	681	OAR will re-submit it as long as all small job are not finished...
	682
[21]	683	=head1 SEE ALSO
	684
[44]	685	oar-dispatch, mpilauncher,
	686	orsh, oar-envsh, ssh
[21]	687
	688
[13]	689	=head1 AUTHORS
	690
[21]	691	Written by Gabriel Moreau, Grenoble - France
[13]	692
[21]	693
	694	=head1 LICENSE AND COPYRIGHT
	695
	696	GPL version 2 or later and Perl equivalent
	697
[118]	698	Copyright (C) 2011-2015 Gabriel Moreau / LEGI - CNRS UMR 5519 - France
[21]	699

Note: See TracBrowser for help on using the repository browser.

Download in other formats: