Context Navigation

source: trunk/oarutils/oar-parexec @ 114

Last change on this file since 114 was 113, checked in by g7moreau, 9 years ago
Delay launch of each subjob by one second
File size: 19.7 KB

Rev	Line
[13]	1	#!/usr/bin/perl
	2	#
	3	# 2011/11/27 gabriel
	4
	5	use strict;
	6
	7	use Getopt::Long();
	8	use Pod::Usage;
	9	use Coro;
	10	use Coro::Semaphore;
	11	use Coro::Signal;
	12	use Coro::Channel;
	13	use Coro::Handle;
	14	use IO::File;
	15	use POSIX qw( WNOHANG WEXITSTATUS );
[32]	16	use Cwd qw( getcwd );
[13]	17
[75]	18	my $file;
	19	my $dir;
	20	my $cmd;
	21	my $logtrace;
[13]	22	my $verbose;
[82]	23	my $job_np = 1;
	24	my $nodefile = $ENV{OAR_NODE_FILE} \|\| '';
[32]	25	my $masterio;
[13]	26	my $switchio;
	27	my $help;
[82]	28	my $oarsh = 'oarsh -q -T';
[75]	29	my $sig_transmit;
	30	my $sig_checkpoint = 'USR2';
[113]	31	my $job_launch_brake = 1; # one second time brake
[13]	32
	33	Getopt::Long::GetOptions(
[47]	34	'file=s' => \$file,
[45]	35	'dir=s' => \$dir,
	36	'cmd=s' => \$cmd,
[43]	37	'logtrace=s' => \$logtrace,
[32]	38	'verbose' => \$verbose,
	39	'help' => \$help,
	40	'oarsh=s' => \$oarsh,
[34]	41	'jobnp=i' => \$job_np,
[32]	42	'nodefile=s' => \$nodefile,
	43	'masterio=s' => \$masterio,
	44	'switchio' => \$switchio,
[75]	45	'transmit' => \$sig_transmit,
	46	'kill=s' => \$sig_checkpoint,
[41]	47	) \|\| pod2usage(-verbose => 0);
	48	pod2usage(-verbose => 2) if $help;
[45]	49	pod2usage(-verbose => 2) if not (
[47]	50	(-e "$file")
[45]	51	or (-d "$dir" and $cmd ne '')
	52	);
[13]	53
[43]	54	# re-run, keep trace of job already done
[38]	55	my %state;
	56	my $log_h = IO::File->new();
[45]	57	if (-e "$logtrace") {
[43]	58	$log_h->open("< $logtrace")
	59	or die "error: can't read log file: $!";
[38]	60	while (<$log_h>) {
[45]	61	$state{$1} = 'start' if m/^start\s+job\s+([^\s]+)\s/;
	62	$state{$1} = 'end' if m/^end\s+job\s+([^\s]+)\s/;
[41]	63	}
[38]	64	$log_h->close();
	65	}
[43]	66	if ($logtrace) {
	67	$log_h->open(">> $logtrace")
	68	or die "error: can't append log file $logtrace: $!";
[40]	69	$log_h->autoflush;
[38]	70	$log_h = unblock $log_h;
	71	}
	72
[43]	73	# job to run
[13]	74	my @job = ();
[47]	75	if (-e "$file") {
[45]	76	my $job_num = 0;
[47]	77	open(JOB_LIST, '<', "$file") or die "error: can't open job file $file: $!";
[77]	78	while (my $job_cmd = <JOB_LIST>) {
	79	chomp $job_cmd;
	80	next if $job_cmd =~ m/^#/;
	81	next if $job_cmd =~ m/^\s*$/;
[45]	82	$job_num++;
[77]	83	my ($job_name) = $job_cmd =~ m/#.*?\bname=(\S+?)\b/i;
	84	$job_name \|\|= $job_num;
[88]	85	push @job, {
	86	name => $job_name,
	87	cmd => "$job_cmd",
	88	num => $job_num,
	89	};
[45]	90	}
	91	close JOB_LIST;
[13]	92	}
[45]	93	else {
[88]	94	my $job_num = 0;
[45]	95	opendir(DIR, $dir) or die "error: can't open folder $dir: $!";
	96	while (my $item = readdir(DIR)) {
	97	next if $item =~ m/^\./;
	98	next if $item =~ m/:/;
	99	next if $item =~ m/\.old$/;
	100	next if $item =~ m/\.sav$/;
	101	next if $item =~ m/\.bak$/;
	102	next if $item =~ m/\.no$/;
	103	next unless (-d "$dir/$item");
[88]	104	$job_num++;
	105	push @job, {
	106	name => $item,
	107	cmd => "cd $dir/$item/; $cmd",
	108	num => $job_num,
	109	};
[45]	110	}
	111	closedir DIR;
	112	}
[13]	113
[88]	114	# assume unique job name
	115	{
	116	my %seen = ();
	117	my $count_unique_name = grep { ! $seen{ $_->{name} }++ } @job;
	118	if ($count_unique_name != $#job) {
	119	$_->{name} = $_->{num} for @job;
	120	}
	121	}
	122
[43]	123	# ressources available
[34]	124	my @ressources = ();
[41]	125	open(NODE_FILE, '<', "$nodefile")
[34]	126	or die "can't open $nodefile: $!";
	127	while (<NODE_FILE>) {
	128	chomp;
	129	next if m/^#/;
	130	next if m/^\s*$/;
[41]	131	push @ressources, $_;
[34]	132	}
	133	close NODE_FILE;
	134
	135	my $ressource_size = scalar(@ressources);
[43]	136	die "error: not enought ressources jobnp $job_np > ressources $ressource_size"
[41]	137	if $job_np > $ressource_size;
[34]	138
	139	my $current_dir = getcwd();
	140
[32]	141	my $stderr = $ENV{OAR_STDERR} \|\| '';
[13]	142	$stderr =~ s/\.stderr$//;
[32]	143	$stderr = $masterio if $masterio;
	144	my $stdout = $ENV{OAR_STDOUT} \|\| '';
[13]	145	$stdout =~ s/\.stdout$//;
[32]	146	$stdout = $masterio if $masterio;
[13]	147
	148	my $finished = new Coro::Signal;
	149	my $job_todo = new Coro::Semaphore 0;
[45]	150	my $job_name_maxlen;
	151	for (@job) {
	152	$job_todo->up;
	153	$job_name_maxlen = length($_->{name}) if length($_->{name}) > $job_name_maxlen;
	154	}
[13]	155
[43]	156	# slice of ressources for parallel job
[13]	157	my $ressources = new Coro::Channel;
[34]	158	for my $slot (1 .. int($ressource_size / $job_np)) {
[41]	159	$ressources->put(
	160	join(',',
	161	@ressources[ (($slot - 1) * $job_np) .. (($slot * $job_np) - 1) ])
	162	);
[13]	163	}
	164
	165	my %scheduled = ();
	166
[43]	167	# OAR checkpoint and default signal SIGUSR2
[39]	168	my $oar_checkpoint = new Coro::Semaphore 0;
[84]	169	my $notify = new Coro::Signal;
[75]	170	$SIG{$sig_checkpoint} = sub {
[42]	171	print "warning: receive checkpoint at "
	172	. time
	173	. ", no new job, just finishing running job\n"
	174	if $verbose;
	175	$oar_checkpoint->up();
[84]	176	$notify->send if $sig_transmit;
[42]	177	};
[39]	178
[81]	179	# asynchrone notify job
	180	async {
	181	while () {
[84]	182	$notify->wait;
[81]	183
[84]	184	for my $job_pid (keys %scheduled) {
	185	my $job_name = $scheduled{$job_pid}->{name};
	186	my $job_pidfile = $scheduled{$job_pid}->{pidfile};
	187	my $node_connect = $scheduled{$job_pid}->{node_connect};
[81]	188
[84]	189	my $fh = IO::File->new();
	190	$fh->open("\| $oarsh $node_connect >/dev/null 2>&1")
	191	or die "error: can't notify subjob: $!";
[81]	192
[84]	193	$fh->autoflush;
	194	$fh = unblock $fh;
[81]	195
[84]	196	$fh->print("kill -$sig_checkpoint \$(cat $job_pidfile)\n");
	197	$fh->print("exit\n");
[81]	198
[84]	199	print "warning: transmit signal $sig_checkpoint"
	200	. " to job $job_name on node $node_connect.\n"
	201	if $verbose;
[82]	202
[84]	203	close $fh;
	204	cede;
[81]	205	}
	206	}
	207	}
	208
[43]	209	# asynchrone start job block
[13]	210	async {
[113]	211	my $timer;
[81]	212	JOB:
[13]	213	for my $job (@job) {
[83]	214	my $job_name = $job->{name};
	215	my $job_cmd = $job->{cmd};
[38]	216
[43]	217	# job has been already run ?
[45]	218	if (exists $state{$job_name}) {
	219	if ($state{$job_name} eq 'start') {
	220	print "warning: job $job_name was not clearly finished, relaunching...\n"
[41]	221	if $verbose;
	222	}
[45]	223	elsif ($state{$job_name} eq 'end') {
	224	delete $state{$job_name}; # free memory
[41]	225	$job_todo->down;
[45]	226	print "warning: job $job_name already run\n" if $verbose;
[41]	227	cede;
[43]	228	next JOB;
[41]	229	}
	230	}
[40]	231
[113]	232	# wait to not re-launch oarstat to fast
	233	# equivalent to sleep $job_launch_brake
	234	$timer = AE::now + $job_launch_brake;
	235	while ( AE::now < $timer ) {
	236	# force update of AE time
	237	AE::now_update;
	238	cede;
	239	}
	240
[43]	241	# take job ressource
[36]	242	my $job_ressource = $ressources->get;
[13]	243
[43]	244	# no more launch job when OAR checkpointing
	245	last JOB if $oar_checkpoint->count() > 0;
[39]	246
[36]	247	my ($node_connect) = split ',', $job_ressource;
[41]	248	my $fh = IO::File->new();
[34]	249	my $job_pid = $fh->open("\| $oarsh $node_connect >/dev/null 2>&1")
[43]	250	or die "error: can't start subjob: $!";
[13]	251
	252	$fh->autoflush;
	253	$fh = unblock $fh;
	254
[113]	255	my $msg = sprintf "start job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
	256	$job_name, $job_pid, time, $ENV{OAR_JOB_ID}, $job_ressource;
[43]	257	$log_h->print($msg) if $logtrace;
[42]	258	print($msg) if $verbose;
[13]	259
[41]	260	my ($job_stdout, $job_stderr);
[45]	261	$job_stdout = "> $stdout-$job_name.stdout" if $stdout ne '' and $switchio;
	262	$job_stderr = "2> $stderr-$job_name.stderr" if $stderr ne '' and $switchio;
[13]	263
[82]	264	my $job_nodefile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name";
	265	my $job_pidfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.pid";
[34]	266
[81]	267	$scheduled{$job_pid} = {
	268	fh => $fh,
	269	node_connect => $node_connect,
	270	ressource => $job_ressource,
	271	name => $job_name,
	272	pidfile => $job_pidfile,
	273	};
	274
	275	# set job environment, run it and clean
[34]	276	if ($job_np > 1) {
[36]	277	$fh->print("printf \""
[41]	278	. join('\n', split(',', $job_ressource,))
	279	. "\" > $job_nodefile\n");
[37]	280	$fh->print("OAR_NODE_FILE=$job_nodefile\n");
[34]	281	$fh->print("OAR_NP=$job_np\n");
[37]	282	$fh->print("export OAR_NODE_FILE\n");
[34]	283	$fh->print("export OAR_NP\n");
	284	$fh->print("unset OAR_MSG_NODEFILE\n");
	285	}
[88]	286
[32]	287	$fh->print("cd $current_dir\n");
[88]	288
[81]	289	if ($sig_transmit) {
[87]	290	$fh->print("trap 'jobs -p\|xargs -r ps -o pid --no-headers --ppid\|xargs -r kill -$sig_checkpoint' $sig_checkpoint\n");
[81]	291	$fh->print("echo \$\$ > $job_pidfile\n");
	292	}
[88]	293
	294	$fh->print("(\n");
	295	$fh->print("$job_cmd\n");
	296	$fh->print(") $job_stdout $job_stderr \&\n");
	297	$fh->print("while [ \$(jobs -p \| wc -l) -gt 0 ]\n");
	298	$fh->print("do\n");
	299	$fh->print(" wait\n");
	300	$fh->print("done\n");
	301
	302	$fh->print("rm -f $job_pidfile\n") if $sig_transmit;
[34]	303	$fh->print("rm -f $job_nodefile\n") if $job_np > 1;
[13]	304	$fh->print("exit\n");
	305	cede;
	306	}
	307	}
	308
[43]	309	# asynchrone end job block
[13]	310	async {
	311	while () {
[41]	312	for my $job_pid (keys %scheduled) {
[82]	313	# non blocking PID test
[41]	314	if (waitpid($job_pid, WNOHANG)) {
[113]	315	my $msg = sprintf "end job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
[45]	316	$scheduled{$job_pid}->{name},
[113]	317	$job_pid, time, $ENV{OAR_JOB_ID}, $scheduled{$job_pid}->{ressource};
[76]	318
	319	# Job non finish, just suspend if received checkpoint signal
	320	$msg =~ s/^end\s+job/suspend job/
	321	if $sig_transmit and $oar_checkpoint->count() > 0;
	322
[43]	323	$log_h->print($msg) if $logtrace;
[42]	324	print($msg) if $verbose;
[13]	325	close $scheduled{$job_pid}->{fh};
[43]	326	# leave ressources for another job
[41]	327	$ressources->put($scheduled{$job_pid}->{ressource});
[13]	328	$job_todo->down;
	329	delete $scheduled{$job_pid};
	330	}
	331	cede;
	332	}
	333
[43]	334	# checkpointing ! just finishing running job and quit
[42]	335	$finished->send if $oar_checkpoint->count() > 0 and scalar(keys(%scheduled)) == 0;
[39]	336
[42]	337	$finished->send if $job_todo->count() == 0;
[13]	338	cede;
	339	}
	340	}
	341
	342	cede;
	343
[43]	344	# all job have been done
[13]	345	$finished->wait;
	346
[43]	347	# close log trace file
	348	$log_h->close() if $logtrace;
[38]	349
[13]	350	__END__
	351
	352	=head1 NAME
	353
[88]	354	oar-parexec - parallel execution of many small short or long job
[13]	355
	356	=head1 SYNOPSIS
	357
[47]	358	oar-parexec --file filecommand \
	359	[--logtrace tracefile] [--verbose] \
	360	[--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]	361	[--switchio] [--masterio basefileio] \
	362	[--kill signal] [--transmit]
[46]	363
[47]	364	oar-parexec --dir foldertoiterate --cmd commandtolaunch \
	365	[--logtrace tracefile] [--verbose] \
	366	[--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]	367	[--switchio] [--masterio basefileio] \
	368	[--kill signal] [--transmit]
[46]	369
[13]	370	oar-parexec --help
	371
[32]	372	=head1 DESCRIPTION
	373
[88]	374	C<oar-parexec> can execute lot of small short or long job in parallel inside a cluster.
	375	Number of parallel job at one time cannot exceed the number of core define in the node file.
[32]	376	C<oar-parexec> is easier to use inside an OAR job environment
[44]	377	which define automatically these strategics parameters...
	378	However, it can be used outside OAR.
[32]	379
[47]	380	Option C<--file> or C<--dir> and C<--cmd> are the only mandatory parameters.
[32]	381
	382	Small job will be launch in the same folder as the master job.
[44]	383	Two environment variable are defined for each small job
[37]	384	and only in case of parallel small job (option C<--jobnp> > 1).
[32]	385
[34]	386	OAR_NODE_FILE - file that list node for parallel computing
	387	OAR_NP - number of processor affected
[32]	388
[44]	389	The file define by OAR_NODE_FILE is created in /tmp
	390	on the node before launching the small job
	391	and this file will be delete after job complete.
[34]	392	C<oar-parexec> is a simple script,
	393	OAR_NODE_FILE will not be deleted in case of crash of the master job.
	394
[37]	395	OAR define other variable that are equivalent to OAR_NODE_FILE:
	396	OAR_NODEFILE, OAR_FILE_NODES, OAR_RESOURCE_FILE...
	397	You can use in your script the OAR original file ressources
	398	by using these variable if you need it.
[34]	399
[88]	400	When use with long job,
	401	activate option C<--tranmit> to send OAR checkpoint signal
	402	and suspend small job before the walltime cut!
[82]	403
[13]	404	=head1 OPTIONS
	405
[32]	406	=over 12
[13]	407
[47]	408	=item B<-f\|--file filecommand>
[13]	409
[32]	410	File name which content job list.
[45]	411	For the JOB_NAME definition,
	412	the first valid job in the list will have the number 1 and so on...
[13]	413
[77]	414	It's possible to fix the name inside a comment on the job line.
	415	For example:
	416
	417	$HOME/test/subjob1.sh # name=subjob1
	418
	419	The key C<name> is case insensitive,
	420	the associated value cannot have a space...
	421
[88]	422	The command can be any shell command.
	423	It's possible to change folder,
	424	or launch an asynchrone job in parallel,
	425	but one command must block and not be launch in asynchrone (with & or coproc).
	426	Example :
	427
	428	cd ./test; ./subjob1.sh
	429	cd ./test; nice -18 du -sk ./ & ./test/subjob1.sh
	430
	431	Command C<du -sk ./> will be done in parallel on the same ressource...
	432
[47]	433	=item B<-d\|--dir foldertoiterate>
[45]	434
	435	Command C<--cmd> will be launch in all sub-folder of this master folder.
	436	Files in this folder will be ignored.
[47]	437	Sub-folder name which begin with F<.>
	438	or finish with F<.old>, F<.sav>, F<.bak>, F<.no> will either be ignored...
[45]	439
	440	The JOB_NAME is simply the Sub-folder name.
	441
	442	=item B<-c\|--cmd commandtolaunch>
	443
[88]	444	Command (and argument to it) that will be launch in all sub-folder
	445	parameter folfer C<--dir>.
	446	Like for option C<--file>, command can be any valid shell command
	447	but one must block.
[45]	448
[43]	449	=item B<-l\|--logtrace tracefile>
	450
	451	File which log and trace running job.
[44]	452	In case of running the same master command (after crash for example),
	453	only job that are not mark as done will be run again.
	454	Be careful, job mark as running (start but not finish) will be run again.
[45]	455	Tracing is base on the JOB_NAME between multiple run.
[43]	456
	457	This option is very usefull in case of crash
	458	but also for checkpointing and idempotent OAR job.
	459
[32]	460	=item B<-v\|--verbose>
[13]	461
[34]	462	=item B<-j\|--jobnp integer>
[13]	463
[34]	464	Number of processor to allocated for each small job.
	465	1 by default.
	466
	467	=item B<-n\|--nodefile filenode>
	468
[44]	469	File name that list all the node where job could be launch.
[32]	470	By defaut, it's define automatically by OAR via
	471	environment variable C<OAR_NODE_FILE>.
[13]	472
[32]	473	For example, if you want to use 6 core on your cluster node,
	474	you need to put 6 times the hostname node in this file,
	475	one per line...
	476	It's a very common file in MPI process !
[13]	477
[46]	478	=item B<-o\|-oarsh command>
[13]	479
[46]	480	Command use to launch a shell on a node.
	481	By default
[13]	482
[46]	483	oarsh -q -T
	484
	485	Change it to C<ssh> if you are not using an OAR cluster...
	486
[32]	487	=item B<-s\|--switchio>
[21]	488
[32]	489	Each small job will have it's own output STDOUT and STDERR
[45]	490	base on master OAR job with C<JOB_NAME> inside
[32]	491	(or base on C<basefileio> if option C<masterio>).
	492	Example :
[21]	493
[45]	494	OAR.151524.stdout -> OAR.151524-JOB_NAME.stdout
[21]	495
[32]	496	where 151524 here is the master C<OAR_JOB_ID>
[45]	497	and C<JOB_NAME> is the small job name.
[21]	498
[46]	499	=item B<-m\|--masterio basefileio>
[32]	500
[46]	501	The C<basefileio> will be use in place of environment variable
	502	C<OAR_STDOUT> and C<OAR_STDERR> (without extension) to build the base name of the small job standart output
	503	(only use when option C<swithio> is activated).
[32]	504
[78]	505	=item B<-k\|--kill signal>
	506
	507	Signal to listen and make a clean stop of the current C<oar-parexec> process.
	508	By default, use USR2 signal (see C<kill -l>> for a list of possible signal).
	509
	510	=item B<-t\|--transmit>
	511
	512	Resend catch signal to sub-job when receiving it.
	513	By default, no signal is transmis to child process.
	514
	515	It's only valuable if use for long sub-job than can
	516	in return make themselves a clean restart.
	517
	518
[32]	519	=item B<-h\|--help>
	520
	521	=back
	522
	523
	524	=head1 EXAMPLE
	525
[44]	526	=head2 Simple list of sequential job
	527
[47]	528	Content for the job file command (option C<--file>) could have:
[21]	529
[13]	530	- empty line
	531	- comment line begin with #
[86]	532	- valid shell command (can containt comment)
[13]	533
	534	Example where F<$HOME/test/subjob1.sh> is a shell script (executable).
	535
[86]	536	$HOME/test/subjob01.sh # name=subjob01
	537	$HOME/test/subjob02.sh # name=subjob02
	538	$HOME/test/subjob03.sh # name=subjob03
	539	$HOME/test/subjob04.sh # name=subjob04
[32]	540	...
[86]	541	$HOME/test/subjob38.sh # name=subjob38
	542	$HOME/test/subjob39.sh # name=subjob39
	543	$HOME/test/subjob40.sh # name=subjob40
[13]	544
[44]	545	These jobs could be launch by:
[13]	546
[49]	547	oarsub -n test -l /core=6,walltime=04:00:00 \
	548	"oar-parexec -f ./subjob.list.txt"
[13]	549
[47]	550	=head2 Folder job
	551
	552	In a folder F<subjob.d>, create sub-folder with your data inside : F<test1>, <test2>...
	553	The same command will be executed in every sub-folder.
	554	C<oar-parexec> change the current directory to the sub-folder before launching it.
	555
	556	A very simple job could be:
	557
[49]	558	oarsub -n test -l /core=6,walltime=04:00:00 \
	559	"oar-parexec -d ./subjob.d -c 'sleep 10; env'"
[47]	560
	561	The command C<env> will be excuted in all folder F<test1>, F<test2>... after a 10s pause.
	562
	563	Sometime, it's simpler to use file list command,
	564	sometime, jobs by folder with the same command run is more relevant.
	565
[44]	566	=head2 Parallel job
[28]	567
[44]	568	You need to put the number of core each small job need with option C<--jobnp>.
	569	If your job is build on OpenMP or MPI,
	570	you can use OAR_NP and OAR_NODE_FILE variables to configure them.
	571	On OAR cluster, you need to use C<oarsh> or a wrapper like C<oar-envsh>
	572	for connexion between node instead of C<ssh>.
	573
	574	Example with parallel small job on 2 core:
	575
[49]	576	oarsub -n test -l /core=6,walltime=04:00:00 \
	577	"oar-parexec -j 2 -f ./subjob.list.txt"
[44]	578
	579	=head2 Tracing and master crash
	580
	581	If the master node crash after hours of calculus, everything is lost ?
	582	No, with option C<--logtrace>,
	583	it's possible to remember older result
	584	and not re-run these job the second and next time.
	585
[49]	586	oarsub -n test -l /core=6,walltime=04:00:00 \
	587	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]	588
	589	After a crash or an C<oardel> command,
	590	you can then re-run the same command that will end to execute the jobs in the list
	591
[49]	592	oarsub -n test -l /core=6,walltime=04:00:00 \
	593	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]	594
	595	C<logtrace> file are just plain file.
	596	We use the extension '.log' because these files are automatically
	597	eliminate from our backup system!
	598
	599	=head2 Checkpointing and Idempotent
	600
	601	C<oar-parexec> is compatible with the OAR checkpointing.
[89]	602	If you have 2000 small jobs that need 55h to be done on 6 cores,
[44]	603	you can cut this in small parts.
	604
	605	For this example, we suppose that each small job need about 10min...
	606	So, we send a checkpoint 12min before the end of the process
	607	to let C<oar-parexec> finish the jobs started.
	608	After being checkpointed, C<oar-parexec> do not start any new small job.
	609
[49]	610	oarsub -t idempotent -n test \
	611	-l /core=6,walltime=04:00:00 \
	612	--checkpoint 720 \
[44]	613	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	614
	615	After 3h48min, the OAR job will begin to stop launching new small job.
	616	When all running small job are finished, it's exit.
	617	But as the OAR job is type C<idempotent>,
	618	OAR will re-submit it as long as all small job are not executed...
	619
	620	This way, we let other users a chance to use the cluster!
	621
	622	In this last exemple, we use moldable OAR job with idempotent
	623	to reserve many core for a small time or a few cores for a long time:
	624
	625	oarsub -t idempotent -n test \
	626	-l /core=50,walltime=01:05:00 \
	627	-l /core=6,walltime=04:00:00 \
	628	--checkpoint 720 \
	629	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	630
[78]	631	=head2 Signal, recurse and long job
[44]	632
[78]	633	By default, OAR use signal USR2 for checkpointing.
[79]	634	It's possible to change this with option C<--kill>.
[78]	635
	636	When use with long small job, checkpointing could be too long...
[79]	637	More than walltime!
	638	The option C<--transmit> could be use to checkpoint small job!
	639	These long small job will then stop cleanly and will be restarted next time.
[78]	640
	641	In the C<logtrace> file, small job will have the status suspend.
[79]	642	They will be launch with the same command line at the next OAR run.
[78]	643
[89]	644	Example: if you have 50 small jobs that each need 72h to be done on 1 cores,
	645	you can cut this in 24h parts.
	646
	647	For this example, we suppose that each long job loop need about 20min...
	648	So, we send a checkpoint 30min before the end of the process
	649	to let C<oar-parexec> suspend the jobs started.
	650	After being checkpointed, C<oar-parexec> do not start any new small job.
	651
	652	oarsub -t idempotent -n test \
	653	-l /core=6,walltime=24:00:00 \
	654	--checkpoint 1800 \
	655	--transmit \
	656	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	657
	658	After 23h30min, the OAR job will begin to stop launching new small job.
	659	When all running small job are suspend, it's exit.
	660	But as the OAR job is type C<idempotent>,
	661	OAR will re-submit it as long as all small job are not finished...
	662
[21]	663	=head1 SEE ALSO
	664
[44]	665	oar-dispatch, mpilauncher,
	666	orsh, oar-envsh, ssh
[21]	667
	668
[13]	669	=head1 AUTHORS
	670
[21]	671	Written by Gabriel Moreau, Grenoble - France
[13]	672
[21]	673
	674	=head1 LICENSE AND COPYRIGHT
	675
	676	GPL version 2 or later and Perl equivalent
	677
[28]	678	Copyright (C) 2011 Gabriel Moreau / LEGI - CNRS UMR 5519 - France
[21]	679

Note: See TracBrowser for help on using the repository browser.

Download in other formats: