source: trunk/oarutils/oar-parexec @ 34

Last change on this file since 34 was 34, checked in by g7moreau, 12 years ago
  • Parallel sub-job !
File size: 6.8 KB
Line 
1#!/usr/bin/perl
2#
3# 2011/11/27 gabriel
4
5use strict;
6
7use Getopt::Long();
8use Pod::Usage;
9use Coro;
10use Coro::Semaphore;
11use Coro::Signal;
12use Coro::Channel;
13use Coro::Handle;
14use IO::File;
15use POSIX qw( WNOHANG WEXITSTATUS );
16use Cwd qw( getcwd );
17
18my $file = '';
19my $verbose;
20my $job_np = 1;
21my $nodefile = $ENV{OAR_NODE_FILE} || '';
22my $masterio;
23my $switchio;
24my $help;
25my $oarsh = 'oarsh -q -T';
26
27Getopt::Long::GetOptions(
28   'file=s'     => \$file,
29   'verbose'    => \$verbose,
30   'help'       => \$help,
31   'oarsh=s'    => \$oarsh,
32   'jobnp=i'    => \$job_np,
33   'nodefile=s' => \$nodefile,
34   'masterio=s' => \$masterio,
35   'switchio'   => \$switchio,
36   ) || pod2usage( -verbose => 0 );
37pod2usage( -verbose => 2 ) if $help;
38pod2usage( -verbose => 2 ) if not -e $file;
39
40my @job = ();
41open( JOB_LIST, '<', "$file" ) or die "can't open $file: $!";
42while (<JOB_LIST>) {
43   chomp;
44   next if m/^#/;
45   next if m/^\s*$/;
46   push @job, $_ ;
47   }
48close JOB_LIST;
49
50my @ressources = ();
51open( NODE_FILE, '<', "$nodefile" )
52   or die "can't open $nodefile: $!";
53while (<NODE_FILE>) {
54   chomp;
55   next if m/^#/;
56   next if m/^\s*$/;
57   push @ressources, $_ ;
58   }
59close NODE_FILE;
60
61my $ressource_size = scalar(@ressources);
62die "not enought ressources jobnp $job_np > ressources $ressource_size" if not $job_np > $ressource_size;
63
64my $current_dir = getcwd();
65
66my $stderr = $ENV{OAR_STDERR} || '';
67$stderr =~ s/\.stderr$//;
68$stderr = $masterio if $masterio;
69my $stdout = $ENV{OAR_STDOUT} || '';
70$stdout =~ s/\.stdout$//;
71$stdout = $masterio if $masterio;
72
73
74my $finished = new Coro::Signal;
75my $job_todo = new Coro::Semaphore 0;
76$job_todo->up for (@job);
77
78my $ressources = new Coro::Channel;
79for my $slot (1 .. int($ressource_size / $job_np)) {
80   $ressources->put( [ @ressources[(($slot - 1) * $job_np) .. (($slot * $job_np) - 1)] ] );
81   }
82
83
84my $job_num   = 0;
85my %scheduled = ();
86
87async {
88   for my $job (@job) {
89      my $nodes = $ressources->get;
90
91      $job_num++;
92
93      my $node_connect = $nodes->[0];
94      my $fh      = IO::File->new();
95      my $job_pid = $fh->open("| $oarsh $node_connect >/dev/null 2>&1")
96         or die "don't start subjob: $!";
97
98      $fh->autoflush;
99      $fh = unblock $fh;
100
101      $scheduled{$job_pid} = { fh => $fh, node => $node_connect, num => $job_num };
102
103      printf "start job %5i / %5i on node %s at %s\n",
104         $job_num, $job_pid, $node_connect, time
105         if $verbose;
106
107      my ( $job_stdout, $job_stderr );
108      $job_stdout = ">  $stdout-$job_num.stdout" if $stdout ne '' and $switchio;
109      $job_stderr = "2> $stderr-$job_num.stderr" if $stderr ne '' and $switchio;
110
111      my $job_nodefile = "/tmp/oar-parexec-$ENV{USERNAME}-$job_num";
112
113      if ($job_np > 1) {
114         $fh->print("printf \".join('\n',@{$nodes}).\" > $job_nodefile\n");
115         $fh->print("OAR_NODE_FILE=$job_nodefile\n");
116         $fh->print("OAR_NP=$job_np\n");
117         $fh->print("export OAR_NODE_FILE\n");
118         $fh->print("export OAR_NP\n");
119         $fh->print("unset OAR_MSG_NODEFILE\n");
120         }
121      $fh->print("cd $current_dir\n");
122      $fh->print("$job $job_stdout $job_stderr\n");
123      $fh->print("rm -f $job_nodefile\n") if $job_np > 1;
124      $fh->print("exit\n");
125      cede;
126      }
127   }
128
129async {
130   while () {
131      for my $job_pid ( keys %scheduled ) {
132         if ( waitpid( $job_pid, WNOHANG ) ) {
133            printf "end   job %5i / %5i on node %s at %s\n",
134               $scheduled{$job_pid}->{num},
135               $job_pid, $scheduled{$job_pid}->{node}, time
136               if $verbose;
137            close $scheduled{$job_pid}->{fh};
138            $ressources->put( $scheduled{$job_pid}->{node} );
139            $job_todo->down;
140            delete $scheduled{$job_pid};
141            }
142         cede;
143         }
144
145      $finished->send if $job_todo->count == 0;
146      cede;
147      }
148   }
149
150cede;
151
152$finished->wait;
153
154__END__
155
156=head1 NAME
157
158oar-parexec - parallel execute lot of small job
159
160=head1 SYNOPSIS
161
162 oar-parexec --file filecommand [--verbose] [--jobnp integer] [--nodefile filenode] [--masterio basefileio] [--switchio] [--oarsh sssh]
163 oar-parexec --help
164
165=head1 DESCRIPTION
166
167C<oar-parexec> execute lot of small job.in parallel inside a cluster.
168Number of parallel job at one time cannot excede core number in the node file.
169C<oar-parexec> is easier to use inside an OAR job environment
170which define automatically theses strategics parameters...
171
172Option C<--file> is the only mandatory one.
173
174Small job will be launch in the same folder as the master job.
175Two environment variable are define for each small job
176only in case of parallel small job (option C<--jobnp> > 1).
177
178 OAR_NODE_FILE - file that list node for parallel computing
179 OAR_NP        - number of processor affected
180
181The file define by OAR_NODE_FILE is created on the node before launching
182the small job in /tmp and will be delete after...
183C<oar-parexec> is a simple script,
184OAR_NODE_FILE will not be deleted in case of crash of the master job.
185
186
187=head1 OPTIONS
188
189=over 12
190
191=item B<-f|--file       filecommand>
192
193File name which content job list.
194
195=item B<-v|--verbose>
196
197=item B<-j|--jobnp integer>
198
199Number of processor to allocated for each small job.
2001 by default.
201
202=item B<-n|--nodefile filenode>
203
204File name that list all the node to launch job.
205By defaut, it's define automatically by OAR via
206environment variable C<OAR_NODE_FILE>.
207
208For example, if you want to use 6 core on your cluster node,
209you need to put 6 times the hostname node in this file,
210one per line...
211It's a very common file in MPI process !
212
213=item B<-m|--masterio basefileio>
214
215The C<basefileio> will be use in place of environment variable
216C<OAR_STDOUT> and C<OAR_STDERR> (without extension) to build the base name of the small job standart output
217(only when option C<swithio> is activated).
218
219=item B<-s|--switchio>
220
221Each small job will have it's own output STDOUT and STDERR
222base on master OAR job with C<JOB_NUM> inside
223(or base on C<basefileio> if option C<masterio>).
224Example :
225
226 OAR.151524.stdout -> OAR.151524-JOB_NUM.stdout
227
228where 151524 here is the master C<OAR_JOB_ID>
229and C<JOB_NUM> is the small job nnumber.
230
231=item B<-o|-oarsh command>
232
233Command use to launch a shell on a node.
234By default
235
236        oarsh -q -T
237
238=item B<-h|--help>
239
240=back
241
242
243=head1 EXAMPLE
244
245Content for the job file (option C<--file>) could have:
246
247 - empty line
248 - comment line begin with #
249 - valid shell command
250
251Example where F<$HOME/test/subjob1.sh> is a shell script (executable).
252
253 $HOME/test/subjob1.sh
254 $HOME/test/subjob2.sh
255 $HOME/test/subjob3.sh
256 $HOME/test/subjob4.sh
257 ...
258 $HOME/test/subjob38.sh
259 $HOME/test/subjob39.sh
260 $HOME/test/subjob40.sh
261
262These jobs could be launch by
263
264 oarsub -n test -l /core=6,walltime=00:35:00 "oar-parexec -f ./subjob.list.txt"
265
266
267=head1 SEE ALSO
268
269oar-dispatch, mpilauncher
270
271
272=head1 AUTHORS
273
274Written by Gabriel Moreau, Grenoble - France
275
276
277=head1 LICENSE AND COPYRIGHT
278
279GPL version 2 or later and Perl equivalent
280
281Copyright (C) 2011 Gabriel Moreau / LEGI - CNRS UMR 5519 - France
282
Note: See TracBrowser for help on using the repository browser.