Changeset 81 for trunk/oarutils
- Timestamp:
- Jun 1, 2012, 9:54:12 AM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/oarutils/oar-parexec
r80 r81 153 153 if $verbose; 154 154 $oar_checkpoint->up(); 155 kill $sig_checkpoint => keys %scheduled if $sig_transmit;156 print "warning: transmit signal $sig_checkpoint to: " . join(' ', keys %scheduled) if $sig_transmit and $verbose;157 155 }; 156 157 # asynchrone notify job 158 async { 159 while () { 160 next if $oar_checkpoint->count() == 0; 161 162 # only notify with transmit flag 163 if (not $sig_transmit) { 164 cede; 165 next; 166 } 167 168 for my $job_pid (keys %scheduled) { 169 my $job_name = $scheduled{$job_pid}->{name}; 170 my $job_pidfile = $scheduled{$job_pid}->{pidfile}; 171 my $node_connect = $scheduled{$job_pid}->{node_connect}; 172 173 my $fh = IO::File->new(); 174 $fh->open("| $oarsh $node_connect >/dev/null 2>&1") 175 or die "error: can't notify subjob: $!"; 176 177 $fh->autoflush; 178 $fh = unblock $fh; 179 180 $fh->print("kill -$sig_checkpoint \$(cat $job_pidfile)"); 181 $fh->print("exit\n"); 182 183 print "warning: transmit signal $sig_checkpoint" 184 . " to $job_name on $node_connect.\n" 185 if $verbose; 186 187 close $fh; 188 cede; 189 } 190 } 191 } 158 192 159 193 # asynchrone start job block 160 194 async { 161 195 JOB: 162 196 for my $job (@job) { 163 197 my $job_name = $job->{name}; … … 193 227 $fh = unblock $fh; 194 228 229 my $msg = sprintf "start job %${job_name_maxlen}s / %5i at %s on node %s\n", 230 $job_name, $job_pid, time, $job_ressource; 231 $log_h->print($msg) if $logtrace; 232 print($msg) if $verbose; 233 234 my ($job_stdout, $job_stderr); 235 $job_stdout = "> $stdout-$job_name.stdout" if $stdout ne '' and $switchio; 236 $job_stderr = "2> $stderr-$job_name.stderr" if $stderr ne '' and $switchio; 237 238 my $job_nodefile = "/tmp/oar-parexec-$ENV{LOGNAME}-$job_name"; 239 my $job_pidfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$job_name.pid"; 240 195 241 $scheduled{$job_pid} = { 196 242 fh => $fh, 197 243 node_connect => $node_connect, 198 244 ressource => $job_ressource, 199 name => $job_name 245 name => $job_name, 246 pidfile => $job_pidfile, 200 247 }; 201 248 202 my $msg = sprintf "start job %${job_name_maxlen}s / %5i at %s on node %s\n", 203 $job_name, $job_pid, time, $job_ressource; 204 $log_h->print($msg) if $logtrace; 205 print($msg) if $verbose; 206 207 my ($job_stdout, $job_stderr); 208 $job_stdout = "> $stdout-$job_name.stdout" if $stdout ne '' and $switchio; 209 $job_stderr = "2> $stderr-$job_name.stderr" if $stderr ne '' and $switchio; 210 211 my $job_nodefile = "/tmp/oar-parexec-$ENV{LOGNAME}-$job_name"; 212 213 # set job environment, run it and clean 249 # set job environment, run it and clean 214 250 if ($job_np > 1) { 215 251 $fh->print("printf \"" … … 223 259 } 224 260 $fh->print("cd $current_dir\n"); 225 $fh->print("$job_cmd $job_stdout $job_stderr\n"); 261 if ($sig_transmit) { 262 $fh->print("trap 'kill -$sig_checkpoint \$(jobs -p)' $sig_checkpoint\n"); 263 $fh->print("echo \$\$ > $job_pidfile\n"); 264 $fh->print("$job_cmd $job_stdout $job_stderr &\n"); 265 $fh->print("while [ \$(jobs -p | wc -l) -gt 0 ]\n"); 266 $fh->print("do\n"); 267 $fh->print(" wait\n"); 268 $fh->print("done\n"); 269 $fh->print("rm -f $job_pidfile\n"); 270 } 271 else { 272 $fh->print("$job_cmd $job_stdout $job_stderr\n"); 273 } 226 274 $fh->print("rm -f $job_nodefile\n") if $job_np > 1; 227 275 $fh->print("exit\n");
Note: See TracChangeset
for help on using the changeset viewer.