Changeset 94 for trunk/oarutils
- Timestamp:
- Jan 24, 2013, 7:17:46 PM (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/oarutils/oar-dispatch
r93 r94 11 11 use Coro::Semaphore; 12 12 use Coro::Timer qw(sleep); 13 use Coro::Handle; 14 use IO::File; 13 15 14 16 my $task = 0; 15 17 my $overload = 1.1; 16 18 my $file = ''; 19 my $logtrace; 17 20 my $verbose; 18 21 my $help; 22 my $sig_transmit; 23 my $sig_checkpoint = 'USR2'; 19 24 20 25 Getopt::Long::GetOptions( … … 22 27 'overload=f' => \$overload, 23 28 'file=s' => \$file, 29 'logtrace=s' => \$logtrace, 24 30 'verbose' => \$verbose, 25 31 'help' => \$help, 32 'transmit' => \$sig_transmit, 33 'kill=s' => \$sig_checkpoint, 26 34 ) || pod2usage(-verbose => 0); 27 35 pod2usage(-verbose => 2) if $help; … … 31 39 $task++ while <NODE_FILE>; 32 40 close NODE_FILE; 41 } 42 43 # re-run, keep trace of job already done 44 my %state; 45 my $log_h = IO::File->new(); 46 if (-e "$logtrace") { 47 $log_h->open("< $logtrace") 48 or die "error: can't read log file: $!"; 49 while (<$log_h>) { 50 $state{$1} = 'start' if m/^start\s+job\s+([^\s]+)\s/; 51 $state{$1} = 'end' if m/^end\s+job\s+([^\s]+)\s/; 52 } 53 $log_h->close(); 54 } 55 if ($logtrace) { 56 $log_h->open(">> $logtrace") 57 or die "error: can't append log file $logtrace: $!"; 58 $log_h->autoflush; 59 $log_h = unblock $log_h; 33 60 } 34 61 … … 60 87 my %scheduled = (); 61 88 89 # OAR checkpoint and default signal SIGUSR2 90 my $oar_checkpoint = new Coro::Semaphore 0; 91 my $notify = new Coro::Signal; 92 $SIG{$sig_checkpoint} = sub { 93 print "warning: receive checkpoint at " 94 . time 95 . ", no new job, just finishing running job\n" 96 if $verbose; 97 $oar_checkpoint->up(); 98 $notify->send if $sig_transmit; 99 }; 100 101 # asynchrone notify job 102 async { 103 while () { 104 $notify->wait; 105 106 for my $job_pid (keys %scheduled) { 107 system "oardel --checkpoint --signal $sig_checkpoint $job_pid"; 108 cede; 109 } 110 } 111 } 112 62 113 # asynchrone start job block 63 114 async { … … 67 118 cede; 68 119 } 120 121 # no more launch job when OAR checkpointing 122 last JOB if $oar_checkpoint->count() > 0; 123 69 124 $job =~ s/^\s*oarsub//; 70 125 print "oarsub $insert_oar_option $job" if $verbose; … … 107 162 # } 108 163 164 # checkpointing ! just finishing running job and quit 165 $finished->send if $oar_checkpoint->count() > 0 and scalar(keys(%scheduled)) == 0; 166 109 167 $finished->send if $job_todo->count == 0; 110 168 cede; … … 116 174 # all job have been done 117 175 $finished->wait; 176 177 # close log trace file 178 $log_h->close() if $logtrace; 118 179 119 180
Note: See TracChangeset
for help on using the changeset viewer.