source: trunk/oar/oar-dispatch @ 10

Last change on this file since 10 was 10, checked in by g7moreau, 12 years ago
  • Replace parameter load by overload
File size: 3.6 KB
Line 
1#!/usr/bin/perl
2#
3# 2011/11/03 gabriel
4
5use strict;
6
7use Getopt::Long();
8use Pod::Usage;
9use Coro;
10use Coro::Semaphore;
11
12my $task = 0;
13my $overload = 1.1;
14my $file = '';
15my $verbose;
16my $help;
17
18Getopt::Long::GetOptions(
19        'task=i'                => \$task,
20        'overload=f'    => \$overload,
21        'file=s'                => \$file,
22        'verbose'       => \$verbose,
23        'help'          => \$help,
24        ) || pod2usage(-verbose => 0);
25pod2usage(-verbose => 2) if $help;
26
27if ($task == 0) {
28        open(NODE_FILE, '<', "$ENV{OAR_NODE_FILE}") or die "can't open ENV{OAR_NODE_FILE}: $!";
29        $task++ while <NODE_FILE>;
30        close NODE_FILE;
31}
32
33my @job = ();
34open (JOB_LIST, '<', "$file") or die "can't open $file: $!";
35while (<JOB_LIST>) {
36        chomp;
37        next if m/^#/;
38        push @job, $_ if m/^\s*oarsub/;
39}
40close JOB_LIST;
41
42my $container_id=$ENV{OAR_JOB_ID};
43my $insert_oar_option = "-t inner=$container_id";
44
45# interactive job
46if (not $container_id > 1) {
47        $insert_oar_option = '';
48        $overload = 1;
49}
50
51
52my $finished = new Coro::Signal;
53my $job_active = new Coro::Semaphore 0;
54my $job_todo = new Coro::Semaphore 0;
55$job_todo->up for (@job);
56
57my %scheduled = ();
58
59async {
60        for my $job (@job) {
61                while ($job_active->count >= $task*$overload) {
62                        cede;
63                }
64                $job =~ s/^\s*oarsub//;
65                print "oarsub $insert_oar_option $job" if $verbose;
66                my $job_id = `oarsub $insert_oar_option $job|grep ^OAR_JOB_ID|cut -f 2 -d '='`;
67                chomp $job_id;
68                if ($job_id > 1) {
69                        $scheduled{$job_id}++;
70                        $job_active->up;
71                }
72                cede;
73        }
74}
75
76async {
77        while () {
78                for my $job_id (keys %scheduled) {
79                        my $is_finish = `oarstat -s -j $job_id`;
80                        chomp $is_finish;
81                        if ($is_finish =~ m/Terminated/) {
82                                delete $scheduled{$job_id};
83                                $job_active->down;
84                                $job_todo->down;
85                        }
86                        cede;
87                }
88
89                $finished->send if $job_todo->count == 0;
90                cede;
91        }
92}
93
94cede;
95   
96$finished->wait;
97
98
99__END__
100
101=head1 NAME
102
103oar-dispatch - dispatch lot of small oar job
104
105=head1 SYNOPSIS
106
107 oar-dispatch [--core integer] [--overload real] --file filepath [--verbose]
108 oar-dispatch --help
109
110=head1 OPTIONS
111
112 --task number of task to do in parallel.
113                        default to line number of file OAR_NODE_FILE.
114 
115 --overload     number of OAR job to create / number of task.
116                        Some job are create in advance to start whenever it's possible.
117                        1.1 by default.
118
119 --file file name which content OAR job list
120
121 --verbose
122 
123 --help
124
125File name content can have
126
127 - empty line
128 - comment line begin with #
129 - oarsub command without -t option
130 
131C<oar-dispatch> will add C<-t inner=container_id> in this command line,
132just after C<oarsub>.
133
134Example where F<$HOME/test/subjob1.oar> is an OAR script job (executable).
135
136 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob1.oar
137 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob2.oar
138 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob3.oar
139 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob4.oar
140 ...
141 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob38.oar
142 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob39.oar
143 oarsub -n test -l /core=1,walltime=00:05:00 $HOME/test/subjob40.oar
144
145These jobs could be launch by
146
147 oarsub -t container -n test-container -l /core=6,walltime=00:35:00 "oar-dispatch -f ./subjob.list.txt"
148
149Total C<walltime> is define by the formula:
150
151 total_walltime = subjob_walltime * total_subjob / core + global_delay
152
153In practise, C<oar-dispatch> take few second and each subjob run less than it's walltime so
154
155 total_walltime < subjob_walltime * total_subjob / core
156
157If launch in interactif, C<overload> parameter is equal to 1,
158C<task> must be define
159and no inner container is add to the C<oarsub> command line.
160
161
162=head1 AUTHORS
163
164Gabriel Moreau (C) 2011
Note: See TracBrowser for help on using the repository browser.