source: trunk/oarutils/oar-parexec @ 37

Last change on this file since 37 was 37, checked in by g7moreau, 12 years ago
  • Revert to OAR_NODE_FILE to be uniform
File size: 7.2 KB
Line 
1#!/usr/bin/perl
2#
3# 2011/11/27 gabriel
4
5use strict;
6
7use Getopt::Long();
8use Pod::Usage;
9use Coro;
10use Coro::Semaphore;
11use Coro::Signal;
12use Coro::Channel;
13use Coro::Handle;
14use IO::File;
15use POSIX qw( WNOHANG WEXITSTATUS );
16use Cwd qw( getcwd );
17
18my $file = '';
19my $verbose;
20my $job_np = 1;
21my $nodefile = $ENV{OAR_NODE_FILE} || '';
22my $masterio;
23my $switchio;
24my $help;
25my $oarsh = 'oarsh -q -T';
26
27Getopt::Long::GetOptions(
28   'file=s'     => \$file,
29   'verbose'    => \$verbose,
30   'help'       => \$help,
31   'oarsh=s'    => \$oarsh,
32   'jobnp=i'    => \$job_np,
33   'nodefile=s' => \$nodefile,
34   'masterio=s' => \$masterio,
35   'switchio'   => \$switchio,
36   ) || pod2usage( -verbose => 0 );
37pod2usage( -verbose => 2 ) if $help;
38pod2usage( -verbose => 2 ) if not -e $file;
39
40my @job = ();
41open( JOB_LIST, '<', "$file" ) or die "can't open $file: $!";
42while (<JOB_LIST>) {
43   chomp;
44   next if m/^#/;
45   next if m/^\s*$/;
46   push @job, $_ ;
47   }
48close JOB_LIST;
49
50my @ressources = ();
51open( NODE_FILE, '<', "$nodefile" )
52   or die "can't open $nodefile: $!";
53while (<NODE_FILE>) {
54   chomp;
55   next if m/^#/;
56   next if m/^\s*$/;
57   push @ressources, $_ ;
58   }
59close NODE_FILE;
60
61my $ressource_size = scalar(@ressources);
62die "not enought ressources jobnp $job_np > ressources $ressource_size" if $job_np > $ressource_size;
63
64my $current_dir = getcwd();
65
66my $stderr = $ENV{OAR_STDERR} || '';
67$stderr =~ s/\.stderr$//;
68$stderr = $masterio if $masterio;
69my $stdout = $ENV{OAR_STDOUT} || '';
70$stdout =~ s/\.stdout$//;
71$stdout = $masterio if $masterio;
72
73
74my $finished = new Coro::Signal;
75my $job_todo = new Coro::Semaphore 0;
76$job_todo->up for (@job);
77
78my $ressources = new Coro::Channel;
79for my $slot (1 .. int($ressource_size / $job_np)) {
80   $ressources->put( join(',', @ressources[(($slot - 1) * $job_np) .. (($slot * $job_np) - 1)] ) );
81   }
82
83
84my $job_num   = 0;
85my %scheduled = ();
86
87async {
88   for my $job (@job) {
89      my $job_ressource = $ressources->get;
90
91      $job_num++;
92
93      my ($node_connect) = split ',', $job_ressource;
94      my $fh      = IO::File->new();
95      my $job_pid = $fh->open("| $oarsh $node_connect >/dev/null 2>&1")
96         or die "don't start subjob: $!";
97
98      $fh->autoflush;
99      $fh = unblock $fh;
100
101      $scheduled{$job_pid} = { fh => $fh, node_connect => $node_connect, ressource => $job_ressource, num => $job_num };
102
103      printf "start job %5i / %5i at %s on node %s\n",
104         $job_num, $job_pid, time, $job_ressource
105         if $verbose;
106
107      my ( $job_stdout, $job_stderr );
108      $job_stdout = ">  $stdout-$job_num.stdout" if $stdout ne '' and $switchio;
109      $job_stderr = "2> $stderr-$job_num.stderr" if $stderr ne '' and $switchio;
110
111      my $job_nodefile = "/tmp/oar-parexec-$ENV{LOGNAME}-$job_num";
112
113      if ($job_np > 1) {
114         $fh->print("printf \""
115            . join('\n',split(',',$job_ressource,))
116            . "\" > $job_nodefile\n");
117         $fh->print("OAR_NODE_FILE=$job_nodefile\n");
118         $fh->print("OAR_NP=$job_np\n");
119         $fh->print("export OAR_NODE_FILE\n");
120         $fh->print("export OAR_NP\n");
121         $fh->print("unset OAR_MSG_NODEFILE\n");
122         }
123      $fh->print("cd $current_dir\n");
124      $fh->print("$job $job_stdout $job_stderr\n");
125      $fh->print("rm -f $job_nodefile\n") if $job_np > 1;
126      $fh->print("exit\n");
127      cede;
128      }
129   }
130
131async {
132   while () {
133      for my $job_pid ( keys %scheduled ) {
134         if ( waitpid( $job_pid, WNOHANG ) ) {
135            printf "end   job %5i / %5i at %s on node %s\n",
136               $scheduled{$job_pid}->{num},
137               $job_pid, time,
138               $scheduled{$job_pid}->{ressource}
139               if $verbose;
140            close $scheduled{$job_pid}->{fh};
141            $ressources->put( $scheduled{$job_pid}->{ressource} );
142            $job_todo->down;
143            delete $scheduled{$job_pid};
144            }
145         cede;
146         }
147
148      $finished->send if $job_todo->count == 0;
149      cede;
150      }
151   }
152
153cede;
154
155$finished->wait;
156
157__END__
158
159=head1 NAME
160
161oar-parexec - parallel execute lot of small job
162
163=head1 SYNOPSIS
164
165 oar-parexec --file filecommand [--verbose] [--jobnp integer] [--nodefile filenode] [--masterio basefileio] [--switchio] [--oarsh sssh]
166 oar-parexec --help
167
168=head1 DESCRIPTION
169
170C<oar-parexec> execute lot of small job.in parallel inside a cluster.
171Number of parallel job at one time cannot excede core number in the node file.
172C<oar-parexec> is easier to use inside an OAR job environment
173which define automatically theses strategics parameters...
174
175Option C<--file> is the only mandatory one.
176
177Small job will be launch in the same folder as the master job.
178Two environment variable are define for each small job
179and only in case of parallel small job (option C<--jobnp> > 1).
180
181 OAR_NODE_FILE - file that list node for parallel computing
182 OAR_NP        - number of processor affected
183
184The file define by OAR_NODE_FILE is created on the node before launching
185the small job in /tmp and will be delete after...
186C<oar-parexec> is a simple script,
187OAR_NODE_FILE will not be deleted in case of crash of the master job.
188
189OAR define other variable that are equivalent to OAR_NODE_FILE:
190OAR_NODEFILE, OAR_FILE_NODES, OAR_RESOURCE_FILE...
191You can use in your script the OAR original file ressources
192by using these variable if you need it.
193 
194
195=head1 OPTIONS
196
197=over 12
198
199=item B<-f|--file       filecommand>
200
201File name which content job list.
202
203=item B<-v|--verbose>
204
205=item B<-j|--jobnp integer>
206
207Number of processor to allocated for each small job.
2081 by default.
209
210=item B<-n|--nodefile filenode>
211
212File name that list all the node to launch job.
213By defaut, it's define automatically by OAR via
214environment variable C<OAR_NODE_FILE>.
215
216For example, if you want to use 6 core on your cluster node,
217you need to put 6 times the hostname node in this file,
218one per line...
219It's a very common file in MPI process !
220
221=item B<-m|--masterio basefileio>
222
223The C<basefileio> will be use in place of environment variable
224C<OAR_STDOUT> and C<OAR_STDERR> (without extension) to build the base name of the small job standart output
225(only use when option C<swithio> is activated).
226
227=item B<-s|--switchio>
228
229Each small job will have it's own output STDOUT and STDERR
230base on master OAR job with C<JOB_NUM> inside
231(or base on C<basefileio> if option C<masterio>).
232Example :
233
234 OAR.151524.stdout -> OAR.151524-JOB_NUM.stdout
235
236where 151524 here is the master C<OAR_JOB_ID>
237and C<JOB_NUM> is the small job nnumber.
238
239=item B<-o|-oarsh command>
240
241Command use to launch a shell on a node.
242By default
243
244        oarsh -q -T
245
246=item B<-h|--help>
247
248=back
249
250
251=head1 EXAMPLE
252
253Content for the job file (option C<--file>) could have:
254
255 - empty line
256 - comment line begin with #
257 - valid shell command
258
259Example where F<$HOME/test/subjob1.sh> is a shell script (executable).
260
261 $HOME/test/subjob1.sh
262 $HOME/test/subjob2.sh
263 $HOME/test/subjob3.sh
264 $HOME/test/subjob4.sh
265 ...
266 $HOME/test/subjob38.sh
267 $HOME/test/subjob39.sh
268 $HOME/test/subjob40.sh
269
270These jobs could be launch by
271
272 oarsub -n test -l /core=6,walltime=00:35:00 "oar-parexec -f ./subjob.list.txt"
273
274
275=head1 SEE ALSO
276
277oar-dispatch, mpilauncher
278
279
280=head1 AUTHORS
281
282Written by Gabriel Moreau, Grenoble - France
283
284
285=head1 LICENSE AND COPYRIGHT
286
287GPL version 2 or later and Perl equivalent
288
289Copyright (C) 2011 Gabriel Moreau / LEGI - CNRS UMR 5519 - France
290
Note: See TracBrowser for help on using the repository browser.