Context Navigation

source: trunk/oarutils/oar-parexec @ 124

Last change on this file since 124 was 124, checked in by g7moreau, 8 years ago
Error in global time cumulative
File size: 22.6 KB

Rev	Line
[13]	1	#!/usr/bin/perl
	2	#
[118]	3	# 2011/11/27 Gabriel Moreau
[13]	4
	5	use strict;
	6
	7	use Getopt::Long();
	8	use Pod::Usage;
	9	use Coro;
	10	use Coro::Semaphore;
	11	use Coro::Signal;
	12	use Coro::Channel;
	13	use Coro::Handle;
	14	use IO::File;
	15	use POSIX qw( WNOHANG WEXITSTATUS );
[32]	16	use Cwd qw( getcwd );
[13]	17
[75]	18	my $file;
	19	my $dir;
	20	my $cmd;
	21	my $logtrace;
[13]	22	my $verbose;
[82]	23	my $job_np = 1;
	24	my $nodefile = $ENV{OAR_NODE_FILE} \|\| '';
[32]	25	my $masterio;
[13]	26	my $switchio;
	27	my $help;
[82]	28	my $oarsh = 'oarsh -q -T';
[75]	29	my $sig_transmit;
	30	my $sig_checkpoint = 'USR2';
[113]	31	my $job_launch_brake = 1; # one second time brake
[13]	32
	33	Getopt::Long::GetOptions(
[47]	34	'file=s' => \$file,
[45]	35	'dir=s' => \$dir,
	36	'cmd=s' => \$cmd,
[43]	37	'logtrace=s' => \$logtrace,
[32]	38	'verbose' => \$verbose,
	39	'help' => \$help,
	40	'oarsh=s' => \$oarsh,
[34]	41	'jobnp=i' => \$job_np,
[32]	42	'nodefile=s' => \$nodefile,
	43	'masterio=s' => \$masterio,
	44	'switchio' => \$switchio,
[75]	45	'transmit' => \$sig_transmit,
	46	'kill=s' => \$sig_checkpoint,
[41]	47	) \|\| pod2usage(-verbose => 0);
	48	pod2usage(-verbose => 2) if $help;
[45]	49	pod2usage(-verbose => 2) if not (
[47]	50	(-e "$file")
[45]	51	or (-d "$dir" and $cmd ne '')
	52	);
[13]	53
[116]	54	my $oar_version = `oarsub -V \| awk '{print \$4}'`;
	55	chomp $oar_version;
	56
[122]	57	# global time
	58	my $global_time_atstart = time;
	59	my $global_time_total = 0;
	60	my $global_time_cumulative = 0;
	61
[43]	62	# re-run, keep trace of job already done
[38]	63	my %state;
	64	my $log_h = IO::File->new();
[45]	65	if (-e "$logtrace") {
[43]	66	$log_h->open("< $logtrace")
	67	or die "error: can't read log file: $!";
[38]	68	while (<$log_h>) {
[122]	69	# log version 1
[45]	70	$state{$1} = 'start' if m/^start\s+job\s+([^\s]+)\s/;
	71	$state{$1} = 'end' if m/^end\s+job\s+([^\s]+)\s/;
[122]	72	# log version 2
	73	$state{$1} = 'start' if m/^start\s+subjob\s+([^\s]+)\s/;
	74	$state{$1} = 'end' if m/^end\s+subjob\s+([^\s]+)\s/;
	75	($global_time_total, $global_time_cumulative) = ($1, $2) if m/^global-time\s.*total\s+(\d+)\s+cumulative\s+(\d+)/;
[41]	76	}
[38]	77	$log_h->close();
	78	}
[43]	79	if ($logtrace) {
	80	$log_h->open(">> $logtrace")
	81	or die "error: can't append log file $logtrace: $!";
[40]	82	$log_h->autoflush;
[38]	83	$log_h = unblock $log_h;
	84	}
	85
[121]	86	# write log format version
[123]	87	$log_h->print("log version 2\n") if $logtrace;
	88	print("log version 2\n") if $verbose;
[121]	89
[43]	90	# job to run
[13]	91	my @job = ();
[47]	92	if (-e "$file") {
[45]	93	my $job_num = 0;
[47]	94	open(JOB_LIST, '<', "$file") or die "error: can't open job file $file: $!";
[77]	95	while (my $job_cmd = <JOB_LIST>) {
	96	chomp $job_cmd;
	97	next if $job_cmd =~ m/^#/;
	98	next if $job_cmd =~ m/^\s*$/;
[45]	99	$job_num++;
[77]	100	my ($job_name) = $job_cmd =~ m/#.*?\bname=(\S+?)\b/i;
	101	$job_name \|\|= $job_num;
[88]	102	push @job, {
	103	name => $job_name,
	104	cmd => "$job_cmd",
	105	num => $job_num,
	106	};
[45]	107	}
	108	close JOB_LIST;
[13]	109	}
[45]	110	else {
[88]	111	my $job_num = 0;
[45]	112	opendir(DIR, $dir) or die "error: can't open folder $dir: $!";
	113	while (my $item = readdir(DIR)) {
	114	next if $item =~ m/^\./;
	115	next if $item =~ m/:/;
	116	next if $item =~ m/\.old$/;
	117	next if $item =~ m/\.sav$/;
	118	next if $item =~ m/\.bak$/;
	119	next if $item =~ m/\.no$/;
	120	next unless (-d "$dir/$item");
[88]	121	$job_num++;
	122	push @job, {
	123	name => $item,
	124	cmd => "cd $dir/$item/; $cmd",
	125	num => $job_num,
	126	};
[45]	127	}
	128	closedir DIR;
	129	}
[13]	130
[88]	131	# assume unique job name
	132	{
	133	my %seen = ();
	134	my $count_unique_name = grep { ! $seen{ $_->{name} }++ } @job;
	135	if ($count_unique_name != $#job) {
	136	$_->{name} = $_->{num} for @job;
	137	}
	138	}
	139
[43]	140	# ressources available
[34]	141	my @ressources = ();
[41]	142	open(NODE_FILE, '<', "$nodefile")
[34]	143	or die "can't open $nodefile: $!";
	144	while (<NODE_FILE>) {
	145	chomp;
	146	next if m/^#/;
	147	next if m/^\s*$/;
[41]	148	push @ressources, $_;
[34]	149	}
	150	close NODE_FILE;
	151
	152	my $ressource_size = scalar(@ressources);
[43]	153	die "error: not enought ressources jobnp $job_np > ressources $ressource_size"
[41]	154	if $job_np > $ressource_size;
[34]	155
	156	my $current_dir = getcwd();
	157
[32]	158	my $stderr = $ENV{OAR_STDERR} \|\| '';
[13]	159	$stderr =~ s/\.stderr$//;
[32]	160	$stderr = $masterio if $masterio;
	161	my $stdout = $ENV{OAR_STDOUT} \|\| '';
[13]	162	$stdout =~ s/\.stdout$//;
[32]	163	$stdout = $masterio if $masterio;
[13]	164
	165	my $finished = new Coro::Signal;
	166	my $job_todo = new Coro::Semaphore 0;
[45]	167	my $job_name_maxlen;
	168	for (@job) {
	169	$job_todo->up;
	170	$job_name_maxlen = length($_->{name}) if length($_->{name}) > $job_name_maxlen;
	171	}
[13]	172
[43]	173	# slice of ressources for parallel job
[13]	174	my $ressources = new Coro::Channel;
[34]	175	for my $slot (1 .. int($ressource_size / $job_np)) {
[41]	176	$ressources->put(
	177	join(',',
	178	@ressources[ (($slot - 1) * $job_np) .. (($slot * $job_np) - 1) ])
	179	);
[13]	180	}
	181
	182	my %scheduled = ();
	183
[43]	184	# OAR checkpoint and default signal SIGUSR2
[39]	185	my $oar_checkpoint = new Coro::Semaphore 0;
[84]	186	my $notify = new Coro::Signal;
[75]	187	$SIG{$sig_checkpoint} = sub {
[42]	188	print "warning: receive checkpoint at "
	189	. time
	190	. ", no new job, just finishing running job\n"
	191	if $verbose;
	192	$oar_checkpoint->up();
[84]	193	$notify->send if $sig_transmit;
[42]	194	};
[39]	195
[81]	196	# asynchrone notify job
	197	async {
	198	while () {
[84]	199	$notify->wait;
[81]	200
[84]	201	for my $job_pid (keys %scheduled) {
	202	my $job_name = $scheduled{$job_pid}->{name};
	203	my $job_pidfile = $scheduled{$job_pid}->{pidfile};
	204	my $node_connect = $scheduled{$job_pid}->{node_connect};
[81]	205
[84]	206	my $fh = IO::File->new();
	207	$fh->open("\| $oarsh $node_connect >/dev/null 2>&1")
	208	or die "error: can't notify subjob: $!";
[81]	209
[84]	210	$fh->autoflush;
	211	$fh = unblock $fh;
[81]	212
[84]	213	$fh->print("kill -$sig_checkpoint \$(cat $job_pidfile)\n");
	214	$fh->print("exit\n");
[81]	215
[84]	216	print "warning: transmit signal $sig_checkpoint"
	217	. " to job $job_name on node $node_connect.\n"
	218	if $verbose;
[82]	219
[84]	220	close $fh;
	221	cede;
[81]	222	}
	223	}
	224	}
	225
[43]	226	# asynchrone start job block
[13]	227	async {
[113]	228	my $timer;
[81]	229	JOB:
[13]	230	for my $job (@job) {
[83]	231	my $job_name = $job->{name};
	232	my $job_cmd = $job->{cmd};
[38]	233
[43]	234	# job has been already run ?
[45]	235	if (exists $state{$job_name}) {
	236	if ($state{$job_name} eq 'start') {
	237	print "warning: job $job_name was not clearly finished, relaunching...\n"
[41]	238	if $verbose;
	239	}
[45]	240	elsif ($state{$job_name} eq 'end') {
	241	delete $state{$job_name}; # free memory
[41]	242	$job_todo->down;
[45]	243	print "warning: job $job_name already run\n" if $verbose;
[41]	244	cede;
[43]	245	next JOB;
[41]	246	}
	247	}
[40]	248
[113]	249	# wait to not re-launch oarstat to fast
	250	# equivalent to sleep $job_launch_brake
	251	$timer = AE::now + $job_launch_brake;
	252	while ( AE::now < $timer ) {
	253	# force update of AE time
	254	AE::now_update;
	255	cede;
	256	}
	257
[43]	258	# take job ressource
[36]	259	my $job_ressource = $ressources->get;
[13]	260
[43]	261	# no more launch job when OAR checkpointing
	262	last JOB if $oar_checkpoint->count() > 0;
[39]	263
[36]	264	my ($node_connect) = split ',', $job_ressource;
[41]	265	my $fh = IO::File->new();
[34]	266	my $job_pid = $fh->open("\| $oarsh $node_connect >/dev/null 2>&1")
[43]	267	or die "error: can't start subjob: $!";
[13]	268
	269	$fh->autoflush;
	270	$fh = unblock $fh;
	271
[122]	272	my $begin_at = time;
	273	#my $msg = sprintf "start job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
[123]	274	my $msg = sprintf "start subjob %${job_name_maxlen}s pid %5i at %s oarjob %i onnode %s\n",
[122]	275	$job_name, $job_pid, $begin_at, $ENV{OAR_JOB_ID}, $job_ressource;
[43]	276	$log_h->print($msg) if $logtrace;
[42]	277	print($msg) if $verbose;
[13]	278
[41]	279	my ($job_stdout, $job_stderr);
[45]	280	$job_stdout = "> $stdout-$job_name.stdout" if $stdout ne '' and $switchio;
	281	$job_stderr = "2> $stderr-$job_name.stderr" if $stderr ne '' and $switchio;
[13]	282
[120]	283	my $job_nodefile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name";
	284	my $job_pidfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.pid";
	285	my $job_statusfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.status";
[34]	286
[81]	287	$scheduled{$job_pid} = {
	288	fh => $fh,
	289	node_connect => $node_connect,
	290	ressource => $job_ressource,
	291	name => $job_name,
	292	pidfile => $job_pidfile,
[122]	293	begin_at => $begin_at,
[81]	294	};
	295
	296	# set job environment, run it and clean
[34]	297	if ($job_np > 1) {
[36]	298	$fh->print("printf \""
[41]	299	. join('\n', split(',', $job_ressource,))
	300	. "\" > $job_nodefile\n");
[37]	301	$fh->print("OAR_NODE_FILE=$job_nodefile\n");
[34]	302	$fh->print("OAR_NP=$job_np\n");
[37]	303	$fh->print("export OAR_NODE_FILE\n");
[34]	304	$fh->print("export OAR_NP\n");
	305	$fh->print("unset OAR_MSG_NODEFILE\n");
	306	}
[88]	307
[32]	308	$fh->print("cd $current_dir\n");
[88]	309
[81]	310	if ($sig_transmit) {
[87]	311	$fh->print("trap 'jobs -p\|xargs -r ps -o pid --no-headers --ppid\|xargs -r kill -$sig_checkpoint' $sig_checkpoint\n");
[81]	312	$fh->print("echo \$\$ > $job_pidfile\n");
	313	}
[88]	314
[120]	315	$fh->print("echo 0 > $job_statusfile\n");
[88]	316	$fh->print("(\n");
	317	$fh->print("$job_cmd\n");
[120]	318	$fh->print(") $job_stdout $job_stderr \|\| echo \$? > $job_statusfile \&\n");
[88]	319	$fh->print("while [ \$(jobs -p \| wc -l) -gt 0 ]\n");
	320	$fh->print("do\n");
	321	$fh->print(" wait\n");
	322	$fh->print("done\n");
	323
[120]	324	$fh->print("OAR_SUBJOB_RETCODE=\$(cat $job_statusfile)\n");
	325	$fh->print("rm -f $job_statusfile\n");
[88]	326	$fh->print("rm -f $job_pidfile\n") if $sig_transmit;
[34]	327	$fh->print("rm -f $job_nodefile\n") if $job_np > 1;
[120]	328	$fh->print("exit \$OAR_SUBJOB_RETCODE\n");
[13]	329	cede;
	330	}
	331	}
	332
[43]	333	# asynchrone end job block
[13]	334	async {
	335	while () {
[41]	336	for my $job_pid (keys %scheduled) {
[82]	337	# non blocking PID test
[41]	338	if (waitpid($job_pid, WNOHANG)) {
[120]	339	# get return status code
	340	my $job_retcode0 = $? >> 8;
	341	#print "ERREUR0 $job_pid $job_retcode0\n" if $job_retcode0;
	342
[122]	343	# job time
	344	my $end_at = time;
	345	my $duration = $end_at - $scheduled{$job_pid}->{begin_at};
[124]	346	$global_time_cumulative += $duration;
[122]	347
	348	#my $msg = sprintf "end job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
	349	my $msg = sprintf "end subjob %${job_name_maxlen}s pid %5i at %s oarjob %i onnode %s duration %i status %i\n",
[45]	350	$scheduled{$job_pid}->{name},
[122]	351	$job_pid, $end_at, $ENV{OAR_JOB_ID}, $scheduled{$job_pid}->{ressource},
	352	$duration, $job_retcode0;
[76]	353
[120]	354	# Job error
[122]	355	$msg =~ s/^end\s+subjob/error subjob/
[120]	356	if $job_retcode0 > 0 and $job_retcode0 != 99;
	357
[76]	358	# Job non finish, just suspend if received checkpoint signal
[122]	359	$msg =~ s/^end\s+subjob/suspend subjob/
[76]	360	if $sig_transmit and $oar_checkpoint->count() > 0;
	361
[43]	362	$log_h->print($msg) if $logtrace;
[42]	363	print($msg) if $verbose;
[13]	364	close $scheduled{$job_pid}->{fh};
[43]	365	# leave ressources for another job
[41]	366	$ressources->put($scheduled{$job_pid}->{ressource});
[13]	367	$job_todo->down;
	368	delete $scheduled{$job_pid};
	369	}
	370	cede;
	371	}
	372
[43]	373	# checkpointing ! just finishing running job and quit
[42]	374	$finished->send if $oar_checkpoint->count() > 0 and scalar(keys(%scheduled)) == 0;
[39]	375
[42]	376	$finished->send if $job_todo->count() == 0;
[13]	377	cede;
	378	}
	379	}
	380
	381	cede;
	382
[43]	383	# all job have been done
[13]	384	$finished->wait;
	385
[122]	386	# global time
	387	$global_time_total += (time - $global_time_atstart);
	388	$log_h->print("global-time total $global_time_total cumulative $global_time_cumulative\n") if $logtrace;
	389	print("global-time total $global_time_total cumulative $global_time_cumulative\n") if $verbose;
	390
[43]	391	# close log trace file
	392	$log_h->close() if $logtrace;
[38]	393
[116]	394	exit 99 if (($oar_checkpoint->count() > 0) and ($oar_version !~ m/^2\.4/));
	395
	396
[13]	397	__END__
	398
	399	=head1 NAME
	400
[88]	401	oar-parexec - parallel execution of many small short or long job
[13]	402
	403	=head1 SYNOPSIS
	404
[47]	405	oar-parexec --file filecommand \
	406	[--logtrace tracefile] [--verbose] \
	407	[--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]	408	[--switchio] [--masterio basefileio] \
	409	[--kill signal] [--transmit]
[46]	410
[47]	411	oar-parexec --dir foldertoiterate --cmd commandtolaunch \
	412	[--logtrace tracefile] [--verbose] \
	413	[--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]	414	[--switchio] [--masterio basefileio] \
	415	[--kill signal] [--transmit]
[46]	416
[13]	417	oar-parexec --help
	418
[32]	419	=head1 DESCRIPTION
	420
[88]	421	C<oar-parexec> can execute lot of small short or long job in parallel inside a cluster.
	422	Number of parallel job at one time cannot exceed the number of core define in the node file.
[32]	423	C<oar-parexec> is easier to use inside an OAR job environment
[44]	424	which define automatically these strategics parameters...
	425	However, it can be used outside OAR.
[32]	426
[47]	427	Option C<--file> or C<--dir> and C<--cmd> are the only mandatory parameters.
[32]	428
	429	Small job will be launch in the same folder as the master job.
[44]	430	Two environment variable are defined for each small job
[37]	431	and only in case of parallel small job (option C<--jobnp> > 1).
[32]	432
[34]	433	OAR_NODE_FILE - file that list node for parallel computing
	434	OAR_NP - number of processor affected
[32]	435
[44]	436	The file define by OAR_NODE_FILE is created in /tmp
	437	on the node before launching the small job
	438	and this file will be delete after job complete.
[34]	439	C<oar-parexec> is a simple script,
	440	OAR_NODE_FILE will not be deleted in case of crash of the master job.
	441
[37]	442	OAR define other variable that are equivalent to OAR_NODE_FILE:
	443	OAR_NODEFILE, OAR_FILE_NODES, OAR_RESOURCE_FILE...
	444	You can use in your script the OAR original file ressources
	445	by using these variable if you need it.
[34]	446
[88]	447	When use with long job,
	448	activate option C<--tranmit> to send OAR checkpoint signal
	449	and suspend small job before the walltime cut!
[82]	450
[13]	451	=head1 OPTIONS
	452
[32]	453	=over 12
[13]	454
[47]	455	=item B<-f\|--file filecommand>
[13]	456
[32]	457	File name which content job list.
[45]	458	For the JOB_NAME definition,
	459	the first valid job in the list will have the number 1 and so on...
[13]	460
[77]	461	It's possible to fix the name inside a comment on the job line.
	462	For example:
	463
	464	$HOME/test/subjob1.sh # name=subjob1
	465
	466	The key C<name> is case insensitive,
	467	the associated value cannot have a space...
	468
[88]	469	The command can be any shell command.
	470	It's possible to change folder,
	471	or launch an asynchrone job in parallel,
	472	but one command must block and not be launch in asynchrone (with & or coproc).
	473	Example :
	474
	475	cd ./test; ./subjob1.sh
[119]	476	cd ./test; nice -18 du -sk ./ & ./subjob1.sh
[88]	477
[119]	478	Commands C<du -sk ./> and C<./subjob1.sh> will be done in parallel on the same ressource...
	479	It's better if C<du -sk ./> is faster than C<./subjob1.sh> !
	480	Do not abuse of that!
[88]	481
[47]	482	=item B<-d\|--dir foldertoiterate>
[45]	483
	484	Command C<--cmd> will be launch in all sub-folder of this master folder.
	485	Files in this folder will be ignored.
[47]	486	Sub-folder name which begin with F<.>
	487	or finish with F<.old>, F<.sav>, F<.bak>, F<.no> will either be ignored...
[45]	488
	489	The JOB_NAME is simply the Sub-folder name.
	490
	491	=item B<-c\|--cmd commandtolaunch>
	492
[88]	493	Command (and argument to it) that will be launch in all sub-folder
	494	parameter folfer C<--dir>.
	495	Like for option C<--file>, command can be any valid shell command
	496	but one must block.
[45]	497
[43]	498	=item B<-l\|--logtrace tracefile>
	499
	500	File which log and trace running job.
[44]	501	In case of running the same master command (after crash for example),
	502	only job that are not mark as done will be run again.
	503	Be careful, job mark as running (start but not finish) will be run again.
[45]	504	Tracing is base on the JOB_NAME between multiple run.
[43]	505
	506	This option is very usefull in case of crash
	507	but also for checkpointing and idempotent OAR job.
	508
[32]	509	=item B<-v\|--verbose>
[13]	510
[34]	511	=item B<-j\|--jobnp integer>
[13]	512
[34]	513	Number of processor to allocated for each small job.
	514	1 by default.
	515
	516	=item B<-n\|--nodefile filenode>
	517
[44]	518	File name that list all the node where job could be launch.
[32]	519	By defaut, it's define automatically by OAR via
	520	environment variable C<OAR_NODE_FILE>.
[13]	521
[32]	522	For example, if you want to use 6 core on your cluster node,
	523	you need to put 6 times the hostname node in this file,
	524	one per line...
	525	It's a very common file in MPI process !
[13]	526
[46]	527	=item B<-o\|-oarsh command>
[13]	528
[46]	529	Command use to launch a shell on a node.
	530	By default
[13]	531
[46]	532	oarsh -q -T
	533
	534	Change it to C<ssh> if you are not using an OAR cluster...
	535
[32]	536	=item B<-s\|--switchio>
[21]	537
[32]	538	Each small job will have it's own output STDOUT and STDERR
[45]	539	base on master OAR job with C<JOB_NAME> inside
[32]	540	(or base on C<basefileio> if option C<masterio>).
	541	Example :
[21]	542
[45]	543	OAR.151524.stdout -> OAR.151524-JOB_NAME.stdout
[21]	544
[32]	545	where 151524 here is the master C<OAR_JOB_ID>
[45]	546	and C<JOB_NAME> is the small job name.
[21]	547
[46]	548	=item B<-m\|--masterio basefileio>
[32]	549
[46]	550	The C<basefileio> will be use in place of environment variable
	551	C<OAR_STDOUT> and C<OAR_STDERR> (without extension) to build the base name of the small job standart output
[117]	552	(only use when option C<switchio> is activated).
[32]	553
[78]	554	=item B<-k\|--kill signal>
	555
	556	Signal to listen and make a clean stop of the current C<oar-parexec> process.
[118]	557	By default, use USR2 signal (see C<kill -l> for a list of possible signal).
[78]	558
	559	=item B<-t\|--transmit>
	560
	561	Resend catch signal to sub-job when receiving it.
	562	By default, no signal is transmis to child process.
	563
	564	It's only valuable if use for long sub-job than can
	565	in return make themselves a clean restart.
	566
	567
[32]	568	=item B<-h\|--help>
	569
	570	=back
	571
	572
	573	=head1 EXAMPLE
	574
[44]	575	=head2 Simple list of sequential job
	576
[47]	577	Content for the job file command (option C<--file>) could have:
[21]	578
[13]	579	- empty line
	580	- comment line begin with #
[86]	581	- valid shell command (can containt comment)
[13]	582
	583	Example where F<$HOME/test/subjob1.sh> is a shell script (executable).
	584
[86]	585	$HOME/test/subjob01.sh # name=subjob01
	586	$HOME/test/subjob02.sh # name=subjob02
	587	$HOME/test/subjob03.sh # name=subjob03
	588	$HOME/test/subjob04.sh # name=subjob04
[32]	589	...
[86]	590	$HOME/test/subjob38.sh # name=subjob38
	591	$HOME/test/subjob39.sh # name=subjob39
	592	$HOME/test/subjob40.sh # name=subjob40
[13]	593
[44]	594	These jobs could be launch by:
[13]	595
[49]	596	oarsub -n test -l /core=6,walltime=04:00:00 \
	597	"oar-parexec -f ./subjob.list.txt"
[13]	598
[47]	599	=head2 Folder job
	600
	601	In a folder F<subjob.d>, create sub-folder with your data inside : F<test1>, <test2>...
	602	The same command will be executed in every sub-folder.
	603	C<oar-parexec> change the current directory to the sub-folder before launching it.
	604
	605	A very simple job could be:
	606
[49]	607	oarsub -n test -l /core=6,walltime=04:00:00 \
	608	"oar-parexec -d ./subjob.d -c 'sleep 10; env'"
[47]	609
	610	The command C<env> will be excuted in all folder F<test1>, F<test2>... after a 10s pause.
	611
	612	Sometime, it's simpler to use file list command,
	613	sometime, jobs by folder with the same command run is more relevant.
	614
[44]	615	=head2 Parallel job
[28]	616
[44]	617	You need to put the number of core each small job need with option C<--jobnp>.
	618	If your job is build on OpenMP or MPI,
	619	you can use OAR_NP and OAR_NODE_FILE variables to configure them.
	620	On OAR cluster, you need to use C<oarsh> or a wrapper like C<oar-envsh>
	621	for connexion between node instead of C<ssh>.
	622
	623	Example with parallel small job on 2 core:
	624
[49]	625	oarsub -n test -l /core=6,walltime=04:00:00 \
	626	"oar-parexec -j 2 -f ./subjob.list.txt"
[44]	627
	628	=head2 Tracing and master crash
	629
	630	If the master node crash after hours of calculus, everything is lost ?
	631	No, with option C<--logtrace>,
	632	it's possible to remember older result
	633	and not re-run these job the second and next time.
	634
[49]	635	oarsub -n test -l /core=6,walltime=04:00:00 \
	636	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]	637
	638	After a crash or an C<oardel> command,
	639	you can then re-run the same command that will end to execute the jobs in the list
	640
[49]	641	oarsub -n test -l /core=6,walltime=04:00:00 \
	642	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]	643
	644	C<logtrace> file are just plain file.
	645	We use the extension '.log' because these files are automatically
	646	eliminate from our backup system!
	647
	648	=head2 Checkpointing and Idempotent
	649
	650	C<oar-parexec> is compatible with the OAR checkpointing.
[89]	651	If you have 2000 small jobs that need 55h to be done on 6 cores,
[44]	652	you can cut this in small parts.
	653
	654	For this example, we suppose that each small job need about 10min...
	655	So, we send a checkpoint 12min before the end of the process
	656	to let C<oar-parexec> finish the jobs started.
	657	After being checkpointed, C<oar-parexec> do not start any new small job.
	658
[49]	659	oarsub -t idempotent -n test \
	660	-l /core=6,walltime=04:00:00 \
	661	--checkpoint 720 \
[44]	662	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	663
	664	After 3h48min, the OAR job will begin to stop launching new small job.
	665	When all running small job are finished, it's exit.
	666	But as the OAR job is type C<idempotent>,
	667	OAR will re-submit it as long as all small job are not executed...
	668
	669	This way, we let other users a chance to use the cluster!
	670
	671	In this last exemple, we use moldable OAR job with idempotent
	672	to reserve many core for a small time or a few cores for a long time:
	673
	674	oarsub -t idempotent -n test \
	675	-l /core=50,walltime=01:05:00 \
	676	-l /core=6,walltime=04:00:00 \
	677	--checkpoint 720 \
	678	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	679
[78]	680	=head2 Signal, recurse and long job
[44]	681
[78]	682	By default, OAR use signal USR2 for checkpointing.
[79]	683	It's possible to change this with option C<--kill>.
[78]	684
	685	When use with long small job, checkpointing could be too long...
[79]	686	More than walltime!
	687	The option C<--transmit> could be use to checkpoint small job!
	688	These long small job will then stop cleanly and will be restarted next time.
[78]	689
	690	In the C<logtrace> file, small job will have the status suspend.
[79]	691	They will be launch with the same command line at the next OAR run.
[78]	692
[89]	693	Example: if you have 50 small jobs that each need 72h to be done on 1 cores,
	694	you can cut this in 24h parts.
	695
	696	For this example, we suppose that each long job loop need about 20min...
	697	So, we send a checkpoint 30min before the end of the process
	698	to let C<oar-parexec> suspend the jobs started.
	699	After being checkpointed, C<oar-parexec> do not start any new small job.
	700
	701	oarsub -t idempotent -n test \
	702	-l /core=6,walltime=24:00:00 \
	703	--checkpoint 1800 \
	704	--transmit \
	705	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	706
	707	After 23h30min, the OAR job will begin to stop launching new small job.
	708	When all running small job are suspend, it's exit.
	709	But as the OAR job is type C<idempotent>,
	710	OAR will re-submit it as long as all small job are not finished...
	711
[121]	712	=head2 Log format
	713
	714	=over
	715
	716	=item B<Version 2>
	717
	718	log version 2
	719	start subjob 1 pid 101468 at 1450482228 oarjob 71725 onnode cl7n001
	720	end subjob 1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
	721	error subjob 1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
	722	suspend subjob 1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
[122]	723	global-time total 555 cumulative 44444
[121]	724
	725	=item B<Version 1>
	726
	727	log version 1
	728	start job 1 / 101468 at 1450482228 oar job 71725 on node cl7n001
	729	end job 1 / 101468 at 1450482556 oar job 71725 on node cl7n001
[122]	730	end job 1 / 101468 at 1450482556 oar job 71725 on node cl7n001
	731	error:retcode job 1 / 101468 at 1450482556 oar job 71725 on node cl7n00
[121]	732
	733	=back
	734
[122]	735
[21]	736	=head1 SEE ALSO
	737
[44]	738	oar-dispatch, mpilauncher,
	739	orsh, oar-envsh, ssh
[21]	740
	741
[13]	742	=head1 AUTHORS
	743
[21]	744	Written by Gabriel Moreau, Grenoble - France
[13]	745
[21]	746
	747	=head1 LICENSE AND COPYRIGHT
	748
	749	GPL version 2 or later and Perl equivalent
	750
[121]	751	Copyright (C) 2011-2017 Gabriel Moreau / LEGI - CNRS UMR 5519 - France

Note: See TracBrowser for help on using the repository browser.

Download in other formats: