Changeset 904 for trunk/src/series.m


Ignore:
Timestamp:
May 30, 2015, 1:26:33 PM (9 years ago)
Author:
sommeria
Message:

walltime estimate introduced in series, and option of result file overwrite

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/series.m

    r903 r904  
    16081608    check_create=1; %need to create the result directory by default
    16091609    while detect
    1610         answer=msgbox_uvmat('INPUT_Y-N-Cancel',['use existing ouput directory: ' fullfile(Param.InputTable{1,1},SubDirOutNew) ', possibly delete previous data']);
     1610        if Param.CheckOverwrite
     1611            comment=', possibly overwrite previous data';
     1612        else
     1613            comment=', will complement existing result files (no overwriting)';
     1614        end
     1615        answer=msgbox_uvmat('INPUT_Y-N-Cancel',['use existing ouput directory: ' fullfile(Param.InputTable{1,1},SubDirOutNew) comment]);
    16111616        if strcmp(answer,'Cancel')
    16121617            return
     
    17011706else
    17021707    ref_i=first_i:incr_i:last_i;
    1703     if isempty(incr_j)
     1708    if isempty(incr_j)% automatic finding of the existing j indices
    17041709        [ref_j,tild]=find(squeeze(SeriesData.i1_series{1}(1,:,:)));
    17051710        ref_j=ref_j-1;
     
    17091714    end
    17101715end
    1711 
     1716CPUTime=5;% job time estimated at 5 min per iteration (on index i and j) by default
     1717if isfield(Param, 'CPUTime') && ~isempty(Param.CPUTime)
     1718    CPUTime=Param.CPUTime;%Note: CpUTime for one iteration ref_i has to be multiplied by the number of j indices nbfield_j
     1719end
     1720nbfield_j=numel(ref_j); % number of j indices
    17121721if isempty(Param.IndexRange.NbSlice)
    1713     NbProcess=NbCore;% choose one process per core if NbSlice is not imposed
     1722    NbProcess=NbCore;% choose one process per core by default if NbSlice is not imposed
    17141723    switch RunMode
    17151724        case 'cluster_oar'
    1716             NbProcess=numel(ref_i); % split big list witdh oar-parexec (Gabriel Moreau)
     1725            BlockLength= ceil(20/(CPUTime*nbfield_j));% short iterations are grouped such that the minimum time of a process is 20 min.
     1726            NbProcess=ceil(numel(ref_i)/BlockLength) ; % nbre of processes sent to oar
    17171727    end
    17181728else
     
    17201730    NbCore=min(NbCore,NbProcess);% reduces the number of cores if it exceeds the number of processes
    17211731end
    1722 BlockLength=ceil(numel(ref_i)/NbProcess);% nbre of input fields in each process
    1723 nbfield_j=numel(ref_j); % number of j indices
     1732
     1733%BlockLength=ceil(numel(ref_i)/NbProcess);% nbre of input fields in each process
     1734%nbfield_j=numel(ref_j); % number of j indices
    17241735
    17251736%% record nbre of output files and starting time for computation for status
     
    18371848        t=set(t,1,'name','Series');
    18381849        filexml=fullfile_uvmat(DirXml,'',Param.InputTable{1,3},'.xml',OutputNomType,...
    1839             Param.IndexRange.first_i,Param.IndexRange.last_i,first_j,last_j);
     1850            Param.IndexRange.first_i,Param.IndexRange.last_i,first_j,last_j)
    18401851        save(t,filexml);% save the parameter file
    18411852       
     
    19211932            end
    19221933        end
    1923         max_walltime=3600*20; % 20h max total calculation (cannot exceed 24 h)
    1924         walltime_onejob=1800; % seconds, max estimated time for asingle file index value
    1925         filename_joblist=fullfile(DirOAR,'job_list.txt');%create name of the global executable file
     1934        filename_joblist=fullfile(DirOAR,'0_job_list.txt');%create name of the global executable file
    19261935        fid=fopen(filename_joblist,'w');
    19271936        for p=1:length(batch_file_list)
     
    19301939        fclose(fid);
    19311940        system(['chmod +x ' filename_joblist]);% set the file to executable
     1941       
     1942        % the command job_list.txt contains the list of NbProcess independent individual jobs
     1943        % in which the total calculation has been split. Those are written as executable files .sh in the folder /O_EXE.
     1944        %  These individual jobs are grouped by the system as oar jobs on the NbCore processors.
     1945        %  For each processor, the oar job must stop after the walltime which has been set, which is limited to 24 h.
     1946        %  However, the oar job is automatically restarted (option 'idempotent') provided the individual jobs are
     1947        % shorter than the wall time: in the time interval 'checkpoint' (WallTimeOneJob) before the end of the allowed duration,
     1948        %  the oar job restarts when an individual job ends.
     1949        JobTime=CPUTime*BlockLength*nbfield_j% estimated time for one individual job (in minutes)
     1950        % wall time (in hours ) for each oar job, allowing 10 individual jobs, but limited to 23 h:
     1951        WallTimeTotal=min(23,4*JobTime/60);
     1952        disp(['WallTimeTotal: ' num2str(WallTimeTotal) ' hours'])
     1953        % estimated time of an individual job (in min), with a margin of error
     1954        WallTimeOneJob=min(4*JobTime+10,WallTimeTotal*60/2);% estimated max time of an individual job for checkpoint
     1955        disp(['WallTimeOneJob: ' num2str(WallTimeOneJob) ' minutes'])
    19321956        oar_command=['oarsub -n UVmat_' ActionName ' '...
    1933             '-t idempotent --checkpoint ' num2str(walltime_onejob+60) ' '...
     1957            '-t idempotent --checkpoint ' num2str(WallTimeOneJob*60) ' '...
    19341958            '-l /core=' num2str(NbCore) ','...
    1935             'walltime=' datestr(min(1.05*walltime_onejob/86400*max(NbProcess*BlockLength*nbfield_j,NbCore)/NbCore,max_walltime/86400),13) ' '...
     1959            'walltime=' datestr(WallTimeOneJob/24,13) ' '...
    19361960            '-E ' regexprep(filename_joblist,'\.txt\>','.stderr') ' '...
    19371961            '-O ' regexprep(filename_joblist,'\.txt\>','.stdout') ' '...
     
    19391963            '"oar-parexec -s -f ' filename_joblist ' '...
    19401964            '-l ' filename_joblist '.log"\n'];
    1941         filename_oarcommand=fullfile(DirOAR,'oar_command');
     1965       
     1966        filename_oarcommand=fullfile(DirOAR,'0_oar_command');
    19421967        fid=fopen(filename_oarcommand,'w');
    19431968        fprintf(fid,oar_command);
     
    19451970        fprintf(oar_command);% display in command line
    19461971        system(oar_command); 
    1947         msgbox_uvmat('CONFIRMATION',[ActionName ' launched in cluster: press STATUS to see results'])
     1972        msgbox_uvmat('CONFIRMATION',[ActionName ' launched as  ' num2str(NbProcess) ' processes in cluster: press STATUS to see results'])
    19481973    case 'cluster_pbs' % for LMFA Kepler machine
    19491974        %create subdirectory for pbs command and log files
     
    23022327end
    23032328
     2329%% Introduce visibility of file overwrite option
     2330if isfield(ParamOut,'CheckOverwriteVisible')&& strcmp(ParamOut.CheckOverwriteVisible,'on')
     2331    set(handles.CheckOverwrite,'Visible','on')
     2332else
     2333    set(handles.CheckOverwrite,'Visible','off')
     2334end
    23042335
    23052336%% Check whether alphabetical sorting of input Subdir is allowed by the Action fct  (for multiples series entries)
     
    24622493set(handles.OutputDirExt,'Visible',OutputDirVisible)
    24632494set(handles.OutputSubDir,'Visible',OutputDirVisible)
     2495%set(handles.CheckOverwrite,'Visible',OutputDirVisible)
    24642496set(handles.OutputDir_title,'Visible',OutputDirVisible)
    24652497SeriesData.ActionName=ActionName;%record ActionName for next use
     
    35613593function OutputSubDir_Callback(hObject, eventdata, handles)
    35623594set(handles.OutputSubDir,'BackgroundColor',[1 1 1])
    3563 % hObject    handle to OutputSubDir (see GCBO)
     3595
     3596
     3597% --- Executes on button press in CheckOverwrite.
     3598function CheckOverwrite_Callback(hObject, eventdata, handles)
     3599% hObject    handle to CheckOverwrite (see GCBO)
    35643600% eventdata  reserved - to be defined in a future version of MATLAB
    35653601% handles    structure with handles and user data (see GUIDATA)
    35663602
    3567 % Hints: get(hObject,'String') returns contents of OutputSubDir as text
    3568 %        str2double(get(hObject,'String')) returns contents of OutputSubDir as a double
     3603% Hint: get(hObject,'Value') returns toggle state of CheckOverwrite
     3604
     3605
     3606% --- Executes on button press in TestCPUTime.
     3607function TestCPUTime_Callback(hObject, eventdata, handles)
     3608% hObject    handle to TestCPUTime (see GCBO)
     3609% eventdata  reserved - to be defined in a future version of MATLAB
     3610% handles    structure with handles and user data (see GUIDATA)
     3611
     3612
     3613
     3614
     3615
Note: See TracChangeset for help on using the changeset viewer.