source: trunk/oar/oar-dispatch @ 12

Last change on this file since 12 was 12, checked in by g7moreau, 12 years ago
  • Just add depends on Coro::Signal
File size: 3.6 KB
Line 
1#!/usr/bin/perl
2#
3# 2011/11/03 gabriel
4
5use strict;
6
7use Getopt::Long();
8use Pod::Usage;
9use Coro;
10use Coro::Signal;
11use Coro::Semaphore;
12
13my $task = 0;
14my $overload = 1.1;
15my $file = '';
16my $verbose;
17my $help;
18
19Getopt::Long::GetOptions(
20        'task=i'                => \$task,
21        'overload=f'    => \$overload,
22        'file=s'                => \$file,
23        'verbose'       => \$verbose,
24        'help'          => \$help,
25        ) || pod2usage(-verbose => 0);
26pod2usage(-verbose => 2) if $help;
27
28if ($task == 0) {
29        open(NODE_FILE, '<', "$ENV{OAR_NODE_FILE}") or die "can't open ENV{OAR_NODE_FILE}: $!";
30        $task++ while <NODE_FILE>;
31        close NODE_FILE;
32}
33
34my @job = ();
35open (JOB_LIST, '<', "$file") or die "can't open $file: $!";
36while (<JOB_LIST>) {
37        chomp;
38        next if m/^#/;
39        push @job, $_ if m/^\s*oarsub/;
40}
41close JOB_LIST;
42
43my $container_id=$ENV{OAR_JOB_ID};
44my $insert_oar_option = "-t inner=$container_id";
45
46# interactive job
47if (not $container_id > 1) {
48        $insert_oar_option = '';
49        $overload = 1;
50}
51
52
53my $finished = new Coro::Signal;
54my $job_active = new Coro::Semaphore 0;
55my $job_todo = new Coro::Semaphore 0;
56$job_todo->up for (@job);
57
58my %scheduled = ();
59
60async {
61        for my $job (@job) {
62                while ($job_active->count >= $task*$overload) {
63                        cede;
64                }
65                $job =~ s/^\s*oarsub//;
66                print "oarsub $insert_oar_option $job" if $verbose;
67                my $job_id = `oarsub $insert_oar_option $job|grep ^OAR_JOB_ID|cut -f 2 -d '='`;
68                chomp $job_id;
69                if ($job_id > 1) {
70                        $scheduled{$job_id}++;
71                        $job_active->up;
72                }
73                cede;
74        }
75}
76
77async {
78        while () {
79                for my $job_id (keys %scheduled) {
80                        my $is_finish = `oarstat -s -j $job_id`;
81                        chomp $is_finish;
82                        if ($is_finish =~ m/Terminated/) {
83                                delete $scheduled{$job_id};
84                                $job_active->down;
85                                $job_todo->down;
86                        }
87                        cede;
88                }
89
90                $finished->send if $job_todo->count == 0;
91                cede;
92        }
93}
94
95cede;
96   
97$finished->wait;
98
99
100__END__
101
102=head1 NAME
103
104oar-dispatch - dispatch lot of small oar job
105
106=head1 SYNOPSIS
107
108 oar-dispatch [--core integer] [--overload real] --file filepath [--verbose]
109 oar-dispatch --help
110
111=head1 OPTIONS
112
113 --task number of task to do in parallel.
114                        default to line number of file OAR_NODE_FILE.
115 
116 --overload     number of OAR job to create / number of task.
117                        Some job are create in advance to start whenever it's possible.
118                        1.1 by default.
119
120 --file file name which content OAR job list
121
122 --verbose
123 
124 --help
125
126Input job file name content can have
127
128 - empty line
129 - comment line begin with #
130 - oarsub command without -t option
131 
132C<oar-dispatch> will add C<-t inner=container_id> in this command line,
133just after C<oarsub>.
134
135Example where F<$HOME/test/subjob.txt> is a list of OAR script job (and can be executable but not need here).
136
137 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob1.oar
138 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob2.oar
139 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob3.oar
140 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob4.oar
141 ...
142 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob38.oar
143 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob39.oar
144 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob40.oar
145
146These jobs could be launch with
147
148 oarsub -t container -n test-container -l /core=6,walltime=00:35:00 "oar-dispatch -f ./subjob.list.txt"
149
150Total C<walltime> is defined by the formula:
151
152 total_walltime = subjob_walltime * total_subjob / core + global_delay
153
154In practise, C<oar-dispatch> take few second and each subjob run in less than it's walltime so
155
156 total_walltime < subjob_walltime * total_subjob / core
157
158If launch in interactif, C<overload> parameter is equal to 1,
159C<task> must be define
160and no inner container is add to the C<oarsub> command line.
161
162
163=head1 AUTHORS
164
165Gabriel Moreau (C) 2011
Note: See TracBrowser for help on using the repository browser.