Ignore:
Timestamp:
Mar 23, 2025, 11:13:38 AM (4 weeks ago)
Author:
sommeria
Message:

a few bug repairs and cleaning

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/cluster_command.m

    r1171 r1179  
    1 %'cluster_command': creates the command string for launching jobs in the cluster
    2 %------------------------------------------------------------------------
     1%'cluster_command': creates the command string for launching jobs in the cluster system 'oar'
     2% other cluster options 'pbs' and 'psmn' are available in cluster_command_pbs and
     3% cluster_command_psmn. The choice is made in the xml file for parameters: series.xml
     4%-- ----------------------------------------------------------------------
    35% function cmd=cluster_command(ListProcess,ActionFullName,DirLog,NbProcess, NbCore,CPUTimeProcess)
    46%
     
    1921filename_log=fullfile(DirLog,'job_list.stdout'); % file for output messages of the master oar process
    2022filename_errors=fullfile(DirLog,'job_list.stderr'); % file for error messages of the master oar process
    21         if NbProcess>=6
    22             bigiojob_string=['+{type = ' char(39) 'bigiojob' char(39) '}/licence=1'];% char(39) is quote - bigiojob limit UVmat parallel launch on cluster to avoid saturation of disk access to data
    23         else
    24             bigiojob_string='';
    25         end
     23if NbProcess>=6
     24    bigiojob_string=['+{type = ' char(39) 'bigiojob' char(39) '}/licence=1'];% char(39) is quote - bigiojob limit UVmat parallel launch on cluster to avoid saturation of disk access to data
     25else
     26    bigiojob_string='';
     27end
    2628
    2729WallTimeMax=23;% absolute limit on computation time (in hours)
    28 WallTimeTotal=min(WallTimeMax,4*CPUTimeProcess/60);% chosen limit on computation time (in hours),possibly smaller than the absolute limit to favor job priority in the system. 
    29 WallTimeOneProcess=min(4*CPUTimeProcess+10,WallTimeTotal*60/2); % estimated max time of an individual process, used for checkpoint: 
    30                                                                 %if less than this time remains before walltime, the job is stopped and a new one can be launched ( by the option 'idempotent')
     30WallTimeTotal=min(WallTimeMax,4*CPUTimeProcess/60);% chosen limit on computation time (in hours),possibly smaller than the absolute limit to favor job priority in the system.
     31WallTimeOneProcess=min(4*CPUTimeProcess+10,WallTimeTotal*60/2); % estimated max time of an individual process, used for checkpoint:
     32%if less than this time remains before walltime, the job is stopped and a new one can be launched ( by the option 'idempotent')
    3133
    32       if NbCore==1
    33           corestring='cpu=1/core=4'; %increases the allowed memory in case of single core job
    34       else
    35          % corestring=['/core=' num2str(NbCore)];
    36          corestring=['{cluster=''calcul8''}/core=' num2str(NbCore)];
    37       end
    38                                                                 cmd=['oarsub -n UVmat_' ActionFullName ' '...
    39             '-t idempotent --checkpoint ' num2str(WallTimeOneProcess*60) ' '...
    40             '-l "' corestring bigiojob_string... % char(39) is quote - bigiojob limit UVmat parallel launch on cluster
    41             ',walltime=' datestr(WallTimeTotal/24,13) '" '...
    42             '-E ' filename_errors ' '...
    43             '-O ' filename_log ' '...
    44             '"oar-parexec -s -f ' ListProcessFile ' '...
    45             '-l ' ListProcessFile '.log"'];
     34% if NbCore==1
     35%     corestring='cpu=1/core=4'; %increases the allowed memory in case of single core job
     36% else
     37    corestring=['{cluster=''calcul8''}/core=' num2str(max(NbCore,4))];
     38% end
     39cmd=['oarsub -n UVmat_' ActionFullName ' '...
     40    '-t idempotent --checkpoint ' num2str(WallTimeOneProcess*60) ' '...
     41    '-l "' corestring bigiojob_string... % char(39) is quote - bigiojob limit UVmat parallel launch on cluster
     42    ',walltime=' datestr(WallTimeTotal/24,13) '" '...
     43    '-E ' filename_errors ' '...
     44    '-O ' filename_log ' '...
     45    '"oar-parexec -s -f ' ListProcessFile ' '...
     46    '-l ' ListProcessFile '.log"'];
Note: See TracChangeset for help on using the changeset viewer.