Changeset 904 for trunk


Ignore:
Timestamp:
May 30, 2015, 1:26:33 PM (9 years ago)
Author:
sommeria
Message:

walltime estimate introduced in series, and option of result file overwrite

Location:
trunk/src
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/series.m

    r903 r904  
    16081608    check_create=1; %need to create the result directory by default
    16091609    while detect
    1610         answer=msgbox_uvmat('INPUT_Y-N-Cancel',['use existing ouput directory: ' fullfile(Param.InputTable{1,1},SubDirOutNew) ', possibly delete previous data']);
     1610        if Param.CheckOverwrite
     1611            comment=', possibly overwrite previous data';
     1612        else
     1613            comment=', will complement existing result files (no overwriting)';
     1614        end
     1615        answer=msgbox_uvmat('INPUT_Y-N-Cancel',['use existing ouput directory: ' fullfile(Param.InputTable{1,1},SubDirOutNew) comment]);
    16111616        if strcmp(answer,'Cancel')
    16121617            return
     
    17011706else
    17021707    ref_i=first_i:incr_i:last_i;
    1703     if isempty(incr_j)
     1708    if isempty(incr_j)% automatic finding of the existing j indices
    17041709        [ref_j,tild]=find(squeeze(SeriesData.i1_series{1}(1,:,:)));
    17051710        ref_j=ref_j-1;
     
    17091714    end
    17101715end
    1711 
     1716CPUTime=5;% job time estimated at 5 min per iteration (on index i and j) by default
     1717if isfield(Param, 'CPUTime') && ~isempty(Param.CPUTime)
     1718    CPUTime=Param.CPUTime;%Note: CpUTime for one iteration ref_i has to be multiplied by the number of j indices nbfield_j
     1719end
     1720nbfield_j=numel(ref_j); % number of j indices
    17121721if isempty(Param.IndexRange.NbSlice)
    1713     NbProcess=NbCore;% choose one process per core if NbSlice is not imposed
     1722    NbProcess=NbCore;% choose one process per core by default if NbSlice is not imposed
    17141723    switch RunMode
    17151724        case 'cluster_oar'
    1716             NbProcess=numel(ref_i); % split big list witdh oar-parexec (Gabriel Moreau)
     1725            BlockLength= ceil(20/(CPUTime*nbfield_j));% short iterations are grouped such that the minimum time of a process is 20 min.
     1726            NbProcess=ceil(numel(ref_i)/BlockLength) ; % nbre of processes sent to oar
    17171727    end
    17181728else
     
    17201730    NbCore=min(NbCore,NbProcess);% reduces the number of cores if it exceeds the number of processes
    17211731end
    1722 BlockLength=ceil(numel(ref_i)/NbProcess);% nbre of input fields in each process
    1723 nbfield_j=numel(ref_j); % number of j indices
     1732
     1733%BlockLength=ceil(numel(ref_i)/NbProcess);% nbre of input fields in each process
     1734%nbfield_j=numel(ref_j); % number of j indices
    17241735
    17251736%% record nbre of output files and starting time for computation for status
     
    18371848        t=set(t,1,'name','Series');
    18381849        filexml=fullfile_uvmat(DirXml,'',Param.InputTable{1,3},'.xml',OutputNomType,...
    1839             Param.IndexRange.first_i,Param.IndexRange.last_i,first_j,last_j);
     1850            Param.IndexRange.first_i,Param.IndexRange.last_i,first_j,last_j)
    18401851        save(t,filexml);% save the parameter file
    18411852       
     
    19211932            end
    19221933        end
    1923         max_walltime=3600*20; % 20h max total calculation (cannot exceed 24 h)
    1924         walltime_onejob=1800; % seconds, max estimated time for asingle file index value
    1925         filename_joblist=fullfile(DirOAR,'job_list.txt');%create name of the global executable file
     1934        filename_joblist=fullfile(DirOAR,'0_job_list.txt');%create name of the global executable file
    19261935        fid=fopen(filename_joblist,'w');
    19271936        for p=1:length(batch_file_list)
     
    19301939        fclose(fid);
    19311940        system(['chmod +x ' filename_joblist]);% set the file to executable
     1941       
     1942        % the command job_list.txt contains the list of NbProcess independent individual jobs
     1943        % in which the total calculation has been split. Those are written as executable files .sh in the folder /O_EXE.
     1944        %  These individual jobs are grouped by the system as oar jobs on the NbCore processors.
     1945        %  For each processor, the oar job must stop after the walltime which has been set, which is limited to 24 h.
     1946        %  However, the oar job is automatically restarted (option 'idempotent') provided the individual jobs are
     1947        % shorter than the wall time: in the time interval 'checkpoint' (WallTimeOneJob) before the end of the allowed duration,
     1948        %  the oar job restarts when an individual job ends.
     1949        JobTime=CPUTime*BlockLength*nbfield_j% estimated time for one individual job (in minutes)
     1950        % wall time (in hours ) for each oar job, allowing 10 individual jobs, but limited to 23 h:
     1951        WallTimeTotal=min(23,4*JobTime/60);
     1952        disp(['WallTimeTotal: ' num2str(WallTimeTotal) ' hours'])
     1953        % estimated time of an individual job (in min), with a margin of error
     1954        WallTimeOneJob=min(4*JobTime+10,WallTimeTotal*60/2);% estimated max time of an individual job for checkpoint
     1955        disp(['WallTimeOneJob: ' num2str(WallTimeOneJob) ' minutes'])
    19321956        oar_command=['oarsub -n UVmat_' ActionName ' '...
    1933             '-t idempotent --checkpoint ' num2str(walltime_onejob+60) ' '...
     1957            '-t idempotent --checkpoint ' num2str(WallTimeOneJob*60) ' '...
    19341958            '-l /core=' num2str(NbCore) ','...
    1935             'walltime=' datestr(min(1.05*walltime_onejob/86400*max(NbProcess*BlockLength*nbfield_j,NbCore)/NbCore,max_walltime/86400),13) ' '...
     1959            'walltime=' datestr(WallTimeOneJob/24,13) ' '...
    19361960            '-E ' regexprep(filename_joblist,'\.txt\>','.stderr') ' '...
    19371961            '-O ' regexprep(filename_joblist,'\.txt\>','.stdout') ' '...
     
    19391963            '"oar-parexec -s -f ' filename_joblist ' '...
    19401964            '-l ' filename_joblist '.log"\n'];
    1941         filename_oarcommand=fullfile(DirOAR,'oar_command');
     1965       
     1966        filename_oarcommand=fullfile(DirOAR,'0_oar_command');
    19421967        fid=fopen(filename_oarcommand,'w');
    19431968        fprintf(fid,oar_command);
     
    19451970        fprintf(oar_command);% display in command line
    19461971        system(oar_command); 
    1947         msgbox_uvmat('CONFIRMATION',[ActionName ' launched in cluster: press STATUS to see results'])
     1972        msgbox_uvmat('CONFIRMATION',[ActionName ' launched as  ' num2str(NbProcess) ' processes in cluster: press STATUS to see results'])
    19481973    case 'cluster_pbs' % for LMFA Kepler machine
    19491974        %create subdirectory for pbs command and log files
     
    23022327end
    23032328
     2329%% Introduce visibility of file overwrite option
     2330if isfield(ParamOut,'CheckOverwriteVisible')&& strcmp(ParamOut.CheckOverwriteVisible,'on')
     2331    set(handles.CheckOverwrite,'Visible','on')
     2332else
     2333    set(handles.CheckOverwrite,'Visible','off')
     2334end
    23042335
    23052336%% Check whether alphabetical sorting of input Subdir is allowed by the Action fct  (for multiples series entries)
     
    24622493set(handles.OutputDirExt,'Visible',OutputDirVisible)
    24632494set(handles.OutputSubDir,'Visible',OutputDirVisible)
     2495%set(handles.CheckOverwrite,'Visible',OutputDirVisible)
    24642496set(handles.OutputDir_title,'Visible',OutputDirVisible)
    24652497SeriesData.ActionName=ActionName;%record ActionName for next use
     
    35613593function OutputSubDir_Callback(hObject, eventdata, handles)
    35623594set(handles.OutputSubDir,'BackgroundColor',[1 1 1])
    3563 % hObject    handle to OutputSubDir (see GCBO)
     3595
     3596
     3597% --- Executes on button press in CheckOverwrite.
     3598function CheckOverwrite_Callback(hObject, eventdata, handles)
     3599% hObject    handle to CheckOverwrite (see GCBO)
    35643600% eventdata  reserved - to be defined in a future version of MATLAB
    35653601% handles    structure with handles and user data (see GUIDATA)
    35663602
    3567 % Hints: get(hObject,'String') returns contents of OutputSubDir as text
    3568 %        str2double(get(hObject,'String')) returns contents of OutputSubDir as a double
     3603% Hint: get(hObject,'Value') returns toggle state of CheckOverwrite
     3604
     3605
     3606% --- Executes on button press in TestCPUTime.
     3607function TestCPUTime_Callback(hObject, eventdata, handles)
     3608% hObject    handle to TestCPUTime (see GCBO)
     3609% eventdata  reserved - to be defined in a future version of MATLAB
     3610% handles    structure with handles and user data (see GUIDATA)
     3611
     3612
     3613
     3614
     3615
  • trunk/src/series/civ_series.m

    r897 r904  
    7070    Data.OutputSubDirMode='last'; %select the last subDir in the input table as root of the output subdir name (option 'all'/'first'/'last', 'all' by default)
    7171    Data.OutputFileMode='NbInput_i';% one output file expected per value of i index (used for waitbar)
     72    Data.CheckOverwriteVisible='on'; % manage the overwrite of existing files (default=1)
    7273    return
    7374end
     
    325326maskname='';% initiate the mask name
    326327tic;
     328CheckOverwrite=1;%default
     329if isfield(Param,'CheckOverwrite')
     330    CheckOverwrite=Param.CheckOverwrite;
     331end
    327332for ifield=1:NbField
    328333    if ~isempty(RUNHandle)% update the waitbar in interactive mode with GUI series  (checkrun=1)
     
    350355                    j1_series_Civ1(ifield),j2_series_Civ1(ifield));
    351356            end
     357        end
     358        if ~CheckOverwrite && exist(ncfile,'file')         
     359        continue% skip iteration if the mode overwrite is desactivated and the result file already exists
    352360        end
    353361    end
     
    861869            disp(errormsg)
    862870        end
    863         disp(['ellapsed time ' num2str(toc) ' s'])
     871        disp(['ellapsed time ' num2str(toc/60,2) ' minutes'])
    864872    end
    865873end
  • trunk/src/series/ima_levels.m

    r810 r904  
    9595%% read input parameters from an xml file if input is a file name (batch mode)
    9696ParamOut=[];
     97RUNHandle=[];
     98WaitbarHandle=[];
    9799checkrun=1;
    98100if ischar(Param)
    99101    Param=xml2struct(Param);% read Param as input file (batch case)
    100102    checkrun=0;
    101 end
    102 hseries=findobj(allchild(0),'Tag','series');
    103 RUNHandle=findobj(hseries,'Tag','RUN');%handle of RUN button in GUI series
    104 WaitbarHandle=findobj(hseries,'Tag','Waitbar');%handle of waitbar in GUI series
     103else% interactive mode in Matlab
     104    hseries=findobj(allchild(0),'Tag','series');
     105    RUNHandle=findobj(hseries,'Tag','RUN');%handle of RUN button in GUI series
     106    WaitbarHandle=findobj(hseries,'Tag','Waitbar');%handle of waitbar in GUI series
     107end
    105108
    106109%% subdirectory for output files
     
    171174for ifile=1:nbfield
    172175            update_waitbar(WaitbarHandle,ifile/nbfield)
    173     if ~isempty(RUNHandle)&& ~strcmp(get(RUNHandle,'BusyAction'),'queue')
     176    if ~isempty(RUNHandle) && ~strcmp(get(RUNHandle,'BusyAction'),'queue')
    174177        disp('program stopped by user')
    175178        return
  • trunk/src/series/sub_background.m

    r897 r904  
    189189%% read input parameters from an xml file if input is a file name (batch mode)
    190190checkrun=1;
     191RUNHandle=[];
     192WaitbarHandle=[];
    191193if ischar(Param)
    192194    Param=xml2struct(Param);% read Param as input file (batch case)
    193195    checkrun=0;
    194 end
     196else
    195197hseries=findobj(allchild(0),'Tag','series');
    196198RUNHandle=findobj(hseries,'Tag','RUN');%handle of RUN button in GUI series
    197199WaitbarHandle=findobj(hseries,'Tag','Waitbar');%handle of waitbar in GUI series
     200end
    198201
    199202%% input preparation
     
    352355        for ifield = step*(halfnbaver+1):step:nbfield_series-step*(halfnbaver+1)% ifield +iburst=index of the current processed image
    353356            update_waitbar(WaitbarHandle,ifield/nbfield_series)
    354             if  ~strcmp(get(RUNHandle,'BusyAction'),'queue')
     357            if  ~isempty(RUNHandle)&&~strcmp(get(RUNHandle,'BusyAction'),'queue')
    355358                disp('program stopped by user')
    356359                return
Note: See TracChangeset for help on using the changeset viewer.