source: trunk/oarutils/oar-dispatch @ 93

Last change on this file since 93 was 93, checked in by g7moreau, 11 years ago
  • Create a thread by job to catch the end of that job
File size: 4.7 KB
Line 
1#!/usr/bin/perl
2#
3# 2011/11/03 gabriel
4
5use strict;
6
7use Getopt::Long();
8use Pod::Usage;
9use Coro;
10use Coro::Signal;
11use Coro::Semaphore;
12use Coro::Timer qw(sleep);
13
14my $task = 0;
15my $overload = 1.1;
16my $file = '';
17my $verbose;
18my $help;
19
20Getopt::Long::GetOptions(
21   'task=i'       => \$task,
22   'overload=f'   => \$overload,
23   'file=s'       => \$file,
24   'verbose'      => \$verbose,
25   'help'         => \$help,
26   ) || pod2usage(-verbose => 0);
27pod2usage(-verbose => 2) if $help;
28
29if ($task == 0) {
30   open(NODE_FILE, '<', "$ENV{OAR_NODE_FILE}") or die "can't open ENV{OAR_NODE_FILE}: $!";
31   $task++ while <NODE_FILE>;
32   close NODE_FILE;
33   }
34
35# job to run
36my @job = ();
37open (JOB_LIST, '<', "$file") or die "can't open $file: $!";
38while (<JOB_LIST>) {
39   chomp;
40   next if m/^#/;
41   push @job, $_ if m/^\s*oarsub/;
42   }
43close JOB_LIST;
44
45my $container_id=$ENV{OAR_JOB_ID};
46my $insert_oar_option = "-t inner=$container_id";
47
48# interactive job
49if (not $container_id > 1) {
50   $insert_oar_option = '';
51   $overload = 1;
52   }
53
54
55my $finished = new Coro::Signal;
56my $job_active = new Coro::Semaphore 0;
57my $job_todo = new Coro::Semaphore 0;
58$job_todo->up for (@job);
59
60my %scheduled = ();
61
62# asynchrone start job block
63async {
64   JOB:
65   for my $job (@job) {
66      while ($job_active->count >= $task*$overload) {
67         cede;
68         }
69      $job =~ s/^\s*oarsub//;
70      print "oarsub $insert_oar_option $job" if $verbose;
71      my $job_id = `oarsub $insert_oar_option $job|grep ^OAR_JOB_ID|cut -f 2 -d '='`;
72      chomp $job_id;
73      if ($job_id > 1) {
74         $scheduled{$job_id}++;
75         $job_active->up;
76         }
77      cede;
78     
79      # asynchrone guard for job end
80      async {
81         my $job_id = shift;
82         GUARD:
83         while () {
84            sleep 15; # async, do not re-launch oarstat to fast
85            my $is_finish = `oarstat -s -j $job_id`;
86            chomp $is_finish;
87            last GUARD if $is_finish =~ m/Terminated/;
88            }
89         delete $scheduled{$job_id};
90         $job_active->down;
91         $job_todo->down;
92         } $job_id;
93      }
94   };
95
96async {
97   while () {
98#      for my $job_id (keys %scheduled) {
99#         my $is_finish = `oarstat -s -j $job_id`;
100#         chomp $is_finish;
101#         if ($is_finish =~ m/Terminated/) {
102#            delete $scheduled{$job_id};
103#            $job_active->down;
104#            $job_todo->down;
105#            }
106#         cede;
107#         }
108
109      $finished->send if $job_todo->count == 0;
110      cede;
111      }
112   };
113
114cede;
115   
116# all job have been done
117$finished->wait;
118
119
120__END__
121
122=head1 NAME
123
124oar-dispatch - dispatch lot of small oar job
125
126=head1 SYNOPSIS
127
128 oar-dispatch [--task integer] [--overload real] --file filecommand [--verbose]
129 oar-dispatch --help
130
131=head1 OPTIONS
132
133=over 12
134
135=item B<[-t|--task integer]>
136
137Number of task to do in parallel.
138Default to the line number of the file OAR_NODE_FILE.
139 
140=item B<[-o|--overload real]>
141
142Number of OAR job to create / number of task.
143Some job are create in advance to start whenever it's possible.
1441.1 by default.
145
146=item B<[-f|--file filecommand]>
147
148File name which content OAR job list
149
150=item B<[-v|--verbose]>
151 
152=item B<[-h|--help]>
153
154=back
155
156Input job file name content can have
157
158 - empty line
159 - comment line begin with #
160 - oarsub command without -t option
161 
162C<oar-dispatch> will add C<-t inner=container_id> in this command line,
163just after C<oarsub>.
164
165=head1 EXAMPLE
166
167Example where the file F<$HOME/test/subjob.txt> is a list of OAR script job (and can be executable but not need here).
168
169 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob1.oar
170 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob2.oar
171 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob3.oar
172 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob4.oar
173 ...
174 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob38.oar
175 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob39.oar
176 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob40.oar
177
178These jobs could be launch with
179
180 oarsub -t container -n test-container -l /core=6,walltime=00:35:00 "oar-dispatch -f ./subjob.list.txt"
181
182Total C<walltime> is defined by the formula:
183
184 total_walltime = subjob_walltime * total_subjob / core + global_delay
185
186In practise, C<oar-dispatch> take few second and each subjob run in less than it's walltime so
187
188 total_walltime < subjob_walltime * total_subjob / core
189
190If launch in interactif, C<overload> parameter is equal to 1,
191C<task> must be define
192and no inner container is add to the C<oarsub> command line.
193
194
195=head1 SEE ALSO
196
197oar-parexec, mpilauncher
198
199
200=head1 AUTHORS
201
202Written by Gabriel Moreau, Grenoble - France
203
204
205=head1 LICENSE AND COPYRIGHT
206
207GPL version 2 or later and Perl equivalent
208
209Copyright (C) 2011 Gabriel Moreau / LEGI - CNRS UMR 5519 - France
210
Note: See TracBrowser for help on using the repository browser.