Source code for ncTools

#!/usr/bin/env python
########################################################################
# NetCDF I/O and parallelization library                               #
#                                                                      #
# 2013, G. SERAZIN, PhD student at MEOM/LGGE Grenoble                  #
########################################################################
"""
The package ncTools
"""
#=======================================================================
# Module importation
#=======================================================================
import pickle
import copy
import sys
import os
try:
	import netCDF4_utils
	from netCDF4 import Dataset
except:
	print 'Package netCDF4 is not available...'
	exit()
try:
    import numpy as np
except:
	print 'Package numpy is not available...'
	exit()

#=======================================================================
# Dictionnary of supported data types
#=======================================================================
geodtype = dict()
#-----------------------------------------------------------------------
# Orca
#-----------------------------------------------------------------------
dim_name = {'T':'time_counter', 
            'Z':'deptht', 
            'Y':'y', 
            'X':'x'}	
dim_var = {'T':'time_counter', 
           'Z':'deptht',
           'Y':'nav_lat', 
           'X':'nav_lon'}
latlon2D = True
geodtype['orca'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time', 
            'Z':'z', 
            'Y':'y', 
            'X':'x'}	
dim_var = {'T':'time', 
           'Z':'nav_lev', 
           'Y':'nav_lat', 
           'X':'nav_lon'}
latlon2D = True
geodtype['orcat'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time_counter', 
            'Z':'deptht', 
            'Y':'y', 
            'X':'x'}	
dim_var = {'T':'time_counter', 
           'Z':'deptht', 
           'Y':'nav_lat', 
           'X':'nav_lon'}
latlon2D = True
geodtype['orcau'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time_counter', 
            'Z':'depthu',
            'Y':'y', 
            'X':'x'}	
dim_var = {'T':'time_counter', 
           'Z':'depthu',
           'Y':'nav_lat', 
           'X':'nav_lon'}
latlon2D = True
geodtype['orcav'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time_counter', 
            'Z':'depthv', 
            'Y':'y', 
            'X':'x'}	
dim_var = {'T':'time_counter', 
           'Z':'depthv', 
           'Y':'nav_lat', 
           'X':'nav_lon'}
latlon2D = True
geodtype['maskorca025'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'t', 
            'Z':'z', 
            'Y':'y', 
            'X':'x'}	
dim_var = {'T':'time_counter', 
           'Z':'nav_lev',
           'Y':'nav_lat', 
           'X':'nav_lon'}
latlon2D = True
geodtype['maskorca12'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time_counter', 
            'Z':'evn', 
            'Y':'y', 
            'X':'x'}
dim_var = {'T':'time_counter', 
           'Z':'evn',
           'Y':'lat', 
           'X':'lon'}
latlon2D = False
geodtype['eof1d_orca'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time', 
            'Z':'evn', 
            'Y':'lat', 
            'X':'lon'}
dim_var = {'T':'time', 
           'Z':'evn',
           'Y':'lat', 
           'X':'lon'}
latlon2D = False
geodtype['eof1d_aviso'] = (dim_name, dim_var, latlon2D)
#-----------------------------------------------------------------------
# Aviso
#-----------------------------------------------------------------------
dim_name = {'T':'time', 'Y':'lat', 'X':'lon'}			
dim_var = {'T':'time', 'Y':'lat', 'X':'lon'}
latlon2D = False
geodtype['aviso'] = (dim_name, dim_var, latlon2D)
#-----------------------------------------------------------------------
# Spectrum
#-----------------------------------------------------------------------
dim_name = {'T':'omega', 'Z':'kappa', 'Y':'l', 'X':'k'}
dim_var = {'T':'omega', 'Z':'kappa', 'Y':'l', 'X':'k'}
latlon2D = False
geodtype['spectrum'] = (dim_name, dim_var, latlon2D)
del dim_name, dim_var, latlon2D
#-----------------------------------------------------------------------
# Examples
#-----------------------------------------------------------------------
dim_name = {'T':'t', 'Z':'z', 'Y':'y', 'X':'x'}
dim_var = {'T':'extime', 'Z':'exdepth', 'Y':'exlat', 'X':'exlon'}
latlon2D = True
geodtype['example2d'] = (dim_name, dim_var, latlon2D)
del dim_name, dim_var, latlon2D
#=======================================================================
# Class chunk
#=======================================================================
[docs]class chunk(dict): """ This class is used to specify a chunk in a NetCDF file that corresponds to geophysical data. The keyword used are: Attributes ---------- t_min : int The minimum index along the time dimension t_max : int The maximum index along the time dimension z_min : int The minimum index along the vertical dimension z_max : int The maximum index along the vertical dimension lat_min : int The minimum index along the latitudinal dimension lat_max : int The maximum index along the latitudinal dimension lon_min : int The minimum index along the longitudinal dimension lon_max : int The maximum index along the longitudinal dimension The chunk class is derived from the main python dictionnary class. The values associated with each keywords are set to None by default. """ def __init__(self, t_min=None, t_max=None, z_min=None, z_max=None, lat_min=None, lat_max=None, lon_min=None, lon_max=None): """ Arguments --------- t_min : int, optional The minimum index along the time dimension t_max : int, optional The maximum index along the time dimension z_min : int, optional The minimum index along the vertical dimension z_max : int, optional The maximum index along the vertical dimension lat_min : int, optional The minimum index along the latitudinal dimension lat_max : int, optional The maximum index along the latitudinal dimension lon_min : int, optional The minimum index along the longitudinal dimension lon_max : int, optional The maximum index along the longitudinal dimension """ dict.__init__(self, [('t_min',t_min), ('t_max',t_max), ('z_min',z_min), ('z_max',z_max), ('lat_min',lat_min), ('lat_max',lat_max), ('lon_min',lon_min), ('lon_max',lon_max)]) def __getitem__(self, key): val = dict.__getitem__(self, key) return val def __setitem__(self, key, val): dict.__setitem__(self, key, val) def __str__(self): string = ('---------------------\n' '| t_min | t_max |\n| %7s | %7s |\n' '---------------------\n' '| z_min | z_max |\n| %7s | %7s |\n' '---------------------\n' '| lat_min | lat_max |\n| %7s | %7s |\n' '---------------------\n' '| lon_min | lon_max |\n| %7s | %7s |\n' '---------------------' % (self['t_min'], self['t_max'], self['z_min'], self['z_max'], self['lat_min'], self['lat_max'], self['lon_min'], self['lon_max']) ) return string
[docs] def size_dim(self, dim): """ Get the size of the chunk along one dimension Parameters ---------- dim : {'T', 'Z', 'Y', 'X'} The key of the dimension Returns ------- n : int The size of the chunk along the corresponding dimension Examples -------- >>> chk = chunk(t_min=0, t_max=25, z_min=4, z_max=7, ... lat_min=23, lat_max=125, lon_min=65, lon_max=243) >>> chk.size_dim('T') 25 >>> chk.size_dim('Z') 3 >>> chk.size_dim('Y') 102 >>> chk.size_dim('X') 178 """ if not isinstance(dim,str): raise TypeError, 'dim input must be a string' if dim == 'T': if (self['t_min'] is not None and self['t_max'] is not None): n_t = self['t_max']-self['t_min'] else: n_t = 1 return n_t elif dim == 'Z': if (self['z_min'] is not None and self['z_max'] is not None): n_z = self['z_max']-self['z_min'] else: n_z = 1 return n_z elif dim == 'Y': if (self['lat_min'] is not None and self['lat_max'] is not None): n_lat = self['lat_max']-self['lat_min'] else: n_lat = 1 return n_lat elif dim == 'X': if (self['lon_min'] is not None and self['lon_max'] is not None): n_lon = self['lon_max']-self['lon_min'] else: n_lon = 1 return n_lon else: raise ValueError, ("Not a valid dimension ('T', 'Z', 'Y' or" " 'X')")
[docs] def size_time(self): """ Get the size of the chunk along the time dimension Returns ------- n_t : int The size of the chunk along the time dimension Examples -------- >>> chk=chunk(t_min=0, t_max=25) >>> chk.size_time() 25 >>> chk=chunk(t_max=25) >>> chk.size_time() 1 """ n_t = self.size_dim('T') return n_t
[docs] def size_depth(self): """ Get the size of the chunk along the depth dimension Returns ------- n_z : int The size of the chunk along the depth dimension Examples -------- >>> chk = chunk(z_min=4, z_max=7) >>> chk.size_depth() 3 >>> chk = chunk(z_min=4) >>> chk.size_depth() 1 """ n_z = self.size_dim('Z') return n_z
[docs] def size_latlon(self): """ Get the size of the chunk along the latitudinal and longitudinal dimensions Returns ------- n_lat : int The size of the chunk along the latitudinal dimension n_lon : int The size of the chunk along the longitudinal dimension Examples -------- >>> chk = chunk(lat_min=23, lat_max=125, lon_min=65, ... lon_max=243) >>> chk.size_latlon() (102, 178) """ n_lat = self.size_dim('Y') n_lon = self.size_dim('X') return n_lat, n_lon
[docs] def size_lat(self): """ Get the size of the chunk along the latitudinal dimension Returns ------- n_lat : int The size of the chunk along the latitudinal dimension Examples -------- >>> chk = chunk(lat_min=23, lat_max=125) >>> chk.size_lat() 102 >>> chk = chunk(lat_max=125) >>> chk.size_lat() 1 """ n_lat = self.size_dim('Y') return n_lat
[docs] def size_lon(self): """ Get the size of the chunk along the longitudinal dimension Returns ------- n_lon : int The size of the chunk along the longitudinal dimension Examples -------- >>> chk = chunk(lon_min=65, lon_max=243) >>> chk.size_lon() 178 >>> chk = chunk(lon_min=65) >>> chk.size_lon() 1 """ n_lon = self.size_dim('X') return n_lon
[docs] def total_size(self): """ Get the total size of the chunk Returns ------- n : int The total size of the chunk dimension """ n_t = self.size_dim('T') n_z = self.size_dim('Z') n_lat = self.size_dim('Y') n_lon = self.size_dim('X') return n_t*n_lat*n_lon*n_z
[docs] def min(self, dim): """ Return the minimum of the chunk value along one dimension Parameters ---------- dim : {'T', 'Z', 'Y', 'X'} Key of the dimension Return ------ n : int Minimum of the chunk along the dimension Examples -------- >>> chk = chunk(t_min=0, t_max=25, lat_min=23, lat_max=125, ... lon_min=65, lon_max=243) >>> chk.min('T') 0 >>> chk.min('Z') >>> chk.min('Y') 23 >>> chk.min('X') 65 """ if not isinstance(dim, str): raise TypeError, 'dim input must be a string' if dim == 'T': n = self['t_min'] elif dim == 'Z': n = self['z_min'] elif dim == 'Y': n = self['lat_min'] elif dim == 'X': n = self['lon_min'] else: raise ValueError, dim + (" is not a valid dimension ('T', " "'Z', 'Y' or 'X')") return n
[docs] def max(self, dim): """ Return the maximum of the chunk value along one dimension Parameters ---------- dim : {'T', 'Z', 'Y', 'X'} Key of the dimension Return ------ n : int Maximum of the chunk along the dimension Examples -------- >>> chk = chunk(t_min=0, t_max=25, lat_min=23, lat_max=125, ... lon_min=65, lon_max=243) >>> chk.max('T') 25 >>> chk.max('Z') >>> chk.max('Y') 125 >>> chk.max('X') 243 """ if not isinstance(dim, str): raise TypeError, 'dim input must be a string' if dim == 'T': n = self['t_max'] elif dim == 'Z': n = self['z_max'] elif dim == 'Y': n = self['lat_max'] elif dim == 'X': n = self['lon_max'] else: raise ValueError, dim + (" is not a valid dimension ('T', " "'Z', 'Y' or 'X')") return n
[docs] def copy(self): """ Copy a chunk Return ------ chk : chunk A copy of the original instance """ return copy.copy(self)
[docs] def isempty(self): """ Test if the chunk is empty Return ------ val : bool True if the chunk is empty, False otherwise Example ------- >>> chk = chunk(t_min=0, t_max=25) >>> chk.isempty() False >>> chk = chunk() >>> chk.isempty() True """ val = self.values() return val==[None]*len(val) #======================================================================= # Class geovar #=======================================================================
[docs]class geovar: """ The class geovar is similar to the netCDF class Variables but it does not include the value of the variable, it is only used to described the name of the variable and the associated metadata. """ def __init__(self, varname, dtype='f4', fill_value=-99999., **kwargs): """ Arguments --------- name : str The variable name dtype : {'f4', ...} The type of the variable fill_value : double The value of the attribute fill_value Example ------- >>> gvar = geovar('sla', dtype='f4', fill_value=-99999, ... axis='TYX', long_name='Sea Level Anomaly') >>> print gvar sla (f4): fill_value: -99999 long_name: Sea Level Anomaly axis: TYX """ if isinstance(varname, str): self.name = varname self.dtype = dtype self.fill_value = fill_value else: raise TypeError, 'arguments must be string type' for key, value in kwargs.iteritems(): setattr(self, key, value) #----------------------------------------------------------------------- # SPECIAL METHODS #----------------------------------------------------------------------- def __call__(self): return self.name def __repr__(self): return self.name def __str__(self): title = self.name + ' (' + str(self.dtype) + '):' + ' \n' list_attr = ' fill_value: %s' %self.fill_value for key, val in zip(self.attrs().keys(), self.attrs().values()): list_attr += ('\n ' + key + ': %s') %val return title + list_attr + '\n' #----------------------------------------------------------------------- # PUBLIC METHODS #----------------------------------------------------------------------- def attrs(self): dict_attr = copy.copy(self.__dict__) del dict_attr['name'] del dict_attr['dtype'] del dict_attr['fill_value'] return dict_attr def get_attr(self, name): return getattr(self, name) def set_attr(self, name, value): setattr(self, name, value) #======================================================================= # Class geodata #=======================================================================
[docs]class geodata: """ The class geodate is designed to work with geophysical data stored in NetCDF file having the possible dimensions: 'T' : the time counter 'Z' : the vertical dimension 'Y' : the latitudinal dimension 'X' : the longitudinal dimension Dimension names and dimension variables names are stored for a lot of models and dataset. The class is able to recognize the appropriate type to use. It allows as easiest access to NetCDF file for which the dimension may be unkown and some functions are designed to rapidly create, duplicate and modify the NetCDF. The additional use of chunks may reveal very powerful when using it with the calculation class. Attributes ---------- filename : str The name of the netcdf file. iotype : {'r', 'w'} Read or Write mode. dtype : str The type of the geophysical data dimensions : str A series of dimension keys ('T', 'Z', 'Y', 'X') time_unlimited : bool A boolean that tells if the record dimension is unlimited or not chk : chunk The default chunk on which perform operations format : str The netCDF format ('NETCDF3', 'NETCDF3_64BIT', 'NETCDF4', 'NETCDF4_CLASSIC') """ def __init__(self, filename, iotype='r', dtype='', dimensions='', time_unlimited=True, chk=None, format='NETCDF3_64BIT', init=True): """ Parameters ---------- filename : string The name of the input file. iotype : {'r', 'w'}, optional Define if the file is opened in read or write mode. Default is read only. dtype : string, optional Define the geodata type of the netCDF file. Supported types are 'orca', 'aviso' and 'spectrum'. Note that in read mode, the geodata type will be automatically recognized. dimensions : string, optional Define the dimensions of geodata. Dimensions can only contain the values T, Z, Y and X. Note that in read mode, the dimensions will be automatically recognized. time_unlimited : bool, optional If the netCDF does not exist, it will define the time dimension as unlimited or not during its creation. Default is True. chk : chunk, optional Define a chunk by default. Operations will only be made on this chunk of the netCDF. If the netCDF does not exist, the chunk will be used to define the size of the dimensions to be created. Default is None, which means no chunk by default. format : {'NETCDF3', 'NETCDF3_64BIT', 'NETCDF4', 'NETCDF4_CLASSIC'}, optional If the netCDF does not exist, it specifies the format to use. Default is 'NETCDF3_64BIT'. """ # Filename if isinstance(filename, str): self.filename = filename else: raise TypeError, 'filename must be a string' # I/O type if iotype in ['r', 'w']: self.iotype = iotype else: raise ValueError, 'iotype must be either "r" or "w"' # Datatype if isinstance(dtype, str): self.dtype = dtype else: raise TypeError, 'dtype must be a string' # Dimensions if isinstance(dimensions, str): self.dimensions = dimensions else: raise TypeError, 'dimensions must be a string' # Time dimension unlimited or not if isinstance(time_unlimited, bool): self.time_unlimited = time_unlimited else: raise TypeError, 'time_unlimited must be a boolean' # Chunk if chk is None or isinstance(chk, chunk): self.chk = chk else: raise TypeError, 'chk must be a chunk' # Format if format in ['NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', 'NETCDF4']: self.format = format else: raise ValueError, "Not a correct format" self.opened = False if not os.path.isfile(filename): if not iotype == 'r': if init: self.nc_create() self._nc_close() if self.dtype == '': self.dtype = 'orca' self.dim_name = geodtype[self.dtype][0] self.dim_var = geodtype[self.dtype][1] self.latlon2D = geodtype[self.dtype][2] if self.dimensions == '': self.dimensions = 'TZYX' else: raise IOError, 'The file in read mode does not exist' else: if dtype == '': self._build_dtype() self.dim_name = geodtype[self.dtype][0] self.dim_var = geodtype[self.dtype][1] self.latlon2D = geodtype[self.dtype][2] if self.dimensions == '': self._build_dimensions() #----------------------------------------------------------------------- # SPECIAL METHODS -> /!\ To improve #----------------------------------------------------------------------- def __getitem__(self, varname): self.get_var(varname) def __setitem__(self, varname, values): self.save_var(varname, values) def __str__(self): """ Nice print for the geodata instance. It is similart to the famous ncdump -h command. """ bound = ('=====================================================' + '===========\n') #Main attributes header = ('Filename : %s \n' 'Format : %s \n' 'I/O access : %s \n' 'Data type : %s \n' 'Dimensions : %s \n' 'Unlimited time dimension : %s \n' % (self.filename, self.format, self.iotype, self.dtype, self.dimensions, self.time_unlimited) ) #Chunk chunk = ('Default chunk: \n' + '-------------- \n' + self.chk.__str__() + '\n\n') #Dimensions dimensions = ('Dimensions: \n' + '----------- \n') if self._check_dim(): for dim in self.dimensions: dimensions += (dim + ' - ' + self.dim_name[dim] + ': ' + str(self.get_dim(dim)) + '\n') dimensions += '\n' #Variables variables = ('Variables: \n' + '---------- \n') self._nc_open() for varname in self.netcdf.variables: gvar = self.build_gvar(str(varname)) variables += (gvar.__str__() + '\n') self._nc_close() return (bound + header + bound + chunk + dimensions + variables + bound) #def __add__(self, gdat): #add(self, gdat, list_gvar) #def __sub__(self, gdat): #def __mul__(self, gdat): #def __div__(self, gdat): #def __pow__(self, power): def __del__(self): self._nc_close() #----------------------------------------------------------------------- # PRIVATE METHODS #----------------------------------------------------------------------- def _build_dimensions(self): dimensions='' self._nc_open() for key in self.netcdf.dimensions.keys(): if key in self.dim_name.values(): dimensions += self.dim_name.keys()\ [self.dim_name.values().index(key)] if 'T' in dimensions: self.dimensions+='T' if 'Z' in dimensions: self.dimensions+='Z' if 'Y' in dimensions: self.dimensions+='Y' if 'X' in dimensions: self.dimensions+='X' self._nc_close() def _build_dtype(self): """ Construct the corresponding dtype for geodata by reading the dimension and the dimension variable names """ best_score = 0 self._nc_open() for dtype in geodtype: score = 0 dim_name = geodtype[dtype][0] dim_var = geodtype[dtype][0] for key in self.netcdf.dimensions.keys(): if key in dim_name.values(): score += 1 for var in self.netcdf.variables.keys(): if var in dim_var.values(): score +=1 if score > best_score: best_score = copy.copy(score) self.dtype = dtype self.dim_name = geodtype[self.dtype][0] self.dim_var = geodtype[self.dtype][1] self.latlon2D = geodtype[self.dtype][2] self._nc_close() def _check_datatype(self, gdat): """ Check if the datatype of the current instance is consistent with the input gdat Parameters ---------- gdat : geodata The input geodata instance to test with Raises ------ TypeError If the types do not match. """ if not self.dtype == gdat.get_dtype(): raise TypeError, ('gdat must the same type as self: ' + self.dtype) def _check_dim(self): """ Check if the dimensions already exists in the nectCDF file Returns ------- out : bool True if the dimensions already exists """ self._nc_open() dim_namelist = self.netcdf.dimensions.keys() for dim in self.dimensions: if not self.dim_name[dim] in dim_namelist: return False self._nc_close() return True def _checkIO(self): """ Check if it is allowed to write in the netCDF file. Raises ------ IOError If it is allowed to write in the file. """ if self.iotype == 'r': raise IOError, "Not allowed to write in the file " def _nc_close(self): """ Close the netCDF file """ if self.opened == True: self.netcdf.close() self.opened = False def _nc_open(self): """ Open the netCDF file """ if self.opened == False: if self.iotype == 'r': self.netcdf=Dataset(self.filename, 'r') elif self.iotype == 'w': self.netcdf = Dataset(self.filename, 'a') self.opened = True #----------------------------------------------------------------------- # PUBLIC METHODS #-----------------------------------------------------------------------
[docs] def build_gvar(self, varname): """ Construct a geovar instance from the corresponding variable in the netCDF file Parameters ---------- varname : str Name of the variable in the netCDF file Returns ------- gvar : geovar The geovar instance with all the attributes of the variable Examples -------- >>> gdat = example_geodata() >>> gvar = gdat.build_gvar('exlon') >>> print gvar exlon (float32): fill_value: -9999.0 axis: YX """ gvar = geovar(varname, dtype=self.get_var_dtype(varname), fill_value=self.get_var_FillValue(varname), axis=self.get_var_axis(varname)) self._nc_open() for attr in self.netcdf.variables[varname].ncattrs(): if not attr == '_FillValue': gvar.set_attr(attr, self.get_var_attr(varname, attr)) self._nc_close() return gvar
[docs] def changeIO(self, iotype): """ Change the read/write mode of the netCDF file. Parameters ---------- iotype : {'r', 'w'} Read or write mode. Examples -------- >>> gdat = example_geodata() >>> print gdat.iotype w >>> gdat.changeIO('r') >>> print gdat.iotype r """ if iotype in ['r', 'w']: self.iotype = iotype else: raise ValueError, 'iotype must be either "r" or "w"' self._nc_close() self.iotype = iotype
[docs] def check_chunk(self, chk): """ Check if the chunk is consistent with the dimensions of the netCDF file otherwise reset the value to the min/max. Parameters ---------- chk : chunk The chunk to check Examples -------- """ if chk == None: if not self.chk == None: chk = self.chk.copy() else: chk = chunk() if chk['t_min'] == None: chk['t_min'] = 0 if chk['t_max'] == None and (not self.time_unlimited or self.iotype=='r'): chk['t_max'] = self.get_dim_time() if chk['z_min'] == None: chk['z_min'] = 0 if chk['z_max'] == None: chk['z_max'] = self.get_dim_depth() if chk['lat_min'] == None: chk['lat_min'] = 0 if chk['lat_max'] == None: chk['lat_max'] = self.get_dim_lat() if chk['lon_min'] == None: chk['lon_min'] = 0 if chk['lon_max'] == None: chk['lon_max'] = self.get_dim_lon() return chk
[docs] def get_nc(self): """ Return the associated netCDF4 instance """ self._nc_open() return self.netcdf
[docs] def get_dtype(self): """ Return the associated type of geodata """ return self.dtype
[docs] def get_dimensions(self): """ Return the dimensions """ return self.dimensions
[docs] def get_filename(self): """ Return the filename """ return self.filename #----------------------------------------------------------------------- # I - I/O functions #-----------------------------------------------------------------------
[docs] def nc_create(self): """ Create the netCDF file """ self._checkIO() self.netcdf = Dataset(self.filename, 'w', format=self.format) self.opened = True #----------------------------------------------------------------------- # C1 - Chunk functions #-----------------------------------------------------------------------
def get_chunk(self): chk = self.check_chunk(self.chk) return chk.copy() def set_chunk(self, chk): self.chk = chk #----------------------------------------------------------------------- # C2 - Copying functions #----------------------------------------------------------------------- # 1 - Dimensions # ~~~~~~~~~~~~~~~
[docs] def copy_dim(self, gdat): """ Copy the dimensions from another geodata instance Parameters ---------- gdat : geodata The input instance from which the dimensions are copied Examples -------- >>> gdat_in = example_geodata() >>> gdat_out = geodata('../examples/example_copy.nc', ... iotype='w', dtype='example2d') >>> print gdat_out ================================================================ Filename : ../examples/example_copy.nc Format : NETCDF3_64BIT I/O access : w Data type : example2d Dimensions : TZYX Unlimited time dimension : True ================================================================ Default chunk: -------------- None <BLANKLINE> Dimensions: ----------- <BLANKLINE> Variables: ---------- ================================================================ <BLANKLINE> >>> gdat_out.copy_dim(gdat_in) >>> print gdat_out ================================================================ Filename : ../examples/example_copy.nc Format : NETCDF3_64BIT I/O access : w Data type : example2d Dimensions : TZYX Unlimited time dimension : True ================================================================ Default chunk: -------------- None <BLANKLINE> Dimensions: ----------- T - t: 0 Z - z: 9 Y - y: 37 X - x: 73 <BLANKLINE> Variables: ---------- ================================================================ <BLANKLINE> """ if not isinstance(gdat, geodata): raise TypeError, 'gdat input must be a geodata' self._checkIO() self._nc_open() self._check_datatype(gdat) # Check if gdat is compatible with the current instance gdimensions = gdat.get_dimensions() for dim in 'TZYX': if dim in self.dimensions and dim in gdimensions: if dim=='T'and self.time_unlimited==True: # Particular processing for forcing the time record to # be unlimited self.netcdf.createDimension(self.dim_name['T'],None) else: self.netcdf.createDimension(self.dim_name[dim], gdat.get_dim(dim)) self._nc_close() # 2 - Dimension variables # ~~~~~~~~~~~~~~~~~~~~~~~~
[docs] def copy_dimvarstruct(self, gdat, dim): """ Create and copy the structure of a dimension variable. Arguments --------- gdat : geodata Input data from which the dimension will be read dim : {'T', 'Z', 'Y', 'X'} Key of the dimension dim : {} """ self._checkIO() self._check_datatype(gdat) nc = gdat.get_nc() if hasattr(nc.variables[self.dim_var[dim]], '_FillValue'): fval = nc.variables[self.dim_var[dim]]._FillValue else: fval = 0. dtyp = nc.variables[self.dim_var[dim]].dtype self.create_dimvar(dim, dtype=dtyp, fill_value=fval) self._nc_open() for attr in nc.variables[self.dim_var[dim]].ncattrs(): if attr != '_FillValue': # /!\ The attribute _FillValue cannot be changed in # netCDF4 !!! self.netcdf.variables[self.dim_var[dim]]\ .__setattr__(attr, nc.variables[self.dim_var[dim]]\ .getncattr(attr)) self.netcdf.variables[self.dim_var[dim]].\ set_auto_maskandscale(False) self._nc_close()
[docs] def copy_coord(self, gdat, chk_in=None, chk_out=None): """ Copy all the coordinate variables from the instance gdat into the current instance Parameters ---------- gdata : geodata The geodata instance from which the coordinate variables are copied chk_in : chunk Define a subset of the input instance chk_out : chunk The corresponding subset in which the coordinate variables are copied """ for dim in self.dimensions: self.copy_dimvar(gdat, dim, chk_in=chk_in, chk_out=chk_out)
[docs] def copy_dimvar(self, gdat, dim, chk_in=None, chk_out=None): """ Copy the variables associated with the dimensions """ if not isinstance(dim, str): raise TypeError, 'dim input must be a string' if not isinstance(gdat, geodata): raise TypeError, 'gdat input must be a geodata' self._checkIO() if not self._check_dim(): if chk_out == None and self.chk == None: # Case : chk_out is not defined and there is no default # chunk in the instance self.copy_dim(gdat) # -> The dimension are copied from the gdat else: # Other case: call create_dimension function with chk set # to chk_out self.create_dim(chk=chk_out) self.copy_dimvarstruct(gdat, dim) chk_in = gdat.check_chunk(chk_in) chk_out = self.check_chunk(chk_out) nc = gdat.get_nc() self._nc_open() if self.latlon2D == True and (dim == 'Y' or dim == 'X'): # Special processing for 2D lat/lon fields self.netcdf.variables[self.dim_var[dim]]\ [chk_out.min('Y'):chk_out.max('Y'),\ chk_out.min('X'):chk_out.max('X')]\ = \ np.array(nc.variables[self.dim_var[dim]]\ [chk_in.min('Y'):chk_in.max('Y'),\ chk_in.min('X'):chk_in.max('X')]) else: self.netcdf.variables[self.dim_var[dim]]\ [chk_out.min(dim):chk_out.max(dim)]\ = \ np.array(nc.variables[self.dim_var[dim]]\ [chk_in.min(dim):chk_in.max(dim)]) self._nc_close()
def copy_time(self,gdat,chk_in=None,chk_out=None): self.copy_dimvar(gdat,'T',chk_in=chk_in,chk_out=chk_out) def copy_depth(self,gdat,chk_in=None,chk_out=None): self.copy_dimvar(gdat,'Z',chk_in=chk_in,chk_out=chk_out) def copy_lat(self,gdat,chk_in=None,chk_out=None): self.copy_dimvar(gdat,'Y',chk_in=chk_in,chk_out=chk_out) def copy_lon(self,gdat,chk_in=None,chk_out=None): self.copy_dimvar(gdat,'X',chk_in=chk_in,chk_out=chk_out) def copy_latlon(self,gdat,chk_in=None,chk_out=None): self.copy_lat(gdat,chk_in=chk_in,chk_out=chk_out) self.copy_lon(gdat,chk_in=chk_in,chk_out=chk_out) # 2 - Variables # ~~~~~~~~~~~~~~
[docs] def copy_varstruct(self, gdat, gvar_in, gvar_out=None): """ Copy the structure of the variable in the geodata instance geodat to a new variable in self """ # Check the arugments if isinstance(gvar_in, geovar): pass elif isinstance(gvar_in, str): gvar_in = geovar(gvar_in, dtype=gdat.get_var_dtype(gvar_in), fill_value=gdat.get_var_FillValue(gvar_in), axis = gdat.get_var_axis(gvar_in)) else: raise TypeError, "Parameter must be either string or geovar" if gvar_out == None: gvar_out = gvar_in elif isinstance(gvar_out, geovar): pass elif isinstance(gvar_out, str): gvar_out = geovar(gvar_out, dtype=gvar_in.dtype, fill_value=gvar_in.fill_value, axis = gvar_in.axis) else: raise TypeError, "Parameter must be either string or geovar" self._checkIO() self._check_datatype(gdat) nc = gdat.get_nc() for attr in nc.variables[gvar_in.name].ncattrs(): if not attr in gvar_out.attrs(): gvar_out.set_attr(attr, gdat.get_var_attr(gvar_in, attr)) self.create_var(gvar_out)
def copy_var(self, gdat, gvar_in, gvar_out=None, chk_in=None, chk_out=None): # Check the arugments if isinstance(gvar_in, geovar): pass elif isinstance(gvar_in, str): gvar_in = geovar(gvar_in, dtype=gdat.get_var_dtype(gvar_in), fill_value=gdat.get_var_FillValue(gvar_in), axis = gdat.get_var_axis(gvar_in)) else: raise TypeError, "Parameter must be either string or geovar" if gvar_out == None: gvar_out = gvar_in elif isinstance(gvar_out, geovar): pass elif isinstance(gvar_out, str): gvar_out = geovar(gvar_out, dtype=gvar_in.dtype, fill_value=gvar_in.fill_value, axis = gvar_in.axis) else: raise TypeError, "Parameter must be either string or geovar" self._checkIO() self._check_datatype(gdat) chk_in = gdat.check_chunk(chk_in) chk_out = self.check_chunk(chk_out) gdimensions = gdat.get_dimensions() # Copy the structure of the input variable into the output # variable if the variable is not already in the netCDF # file if not gvar_out.name in self.netcdf.variables: self.copy_varstruct(gdat, gvar_in, gvar_out=gvar_out) nc = gdat.get_nc() dim = self.get_var_axis(gvar_out.name) self._nc_open() # 1D case if len(dim) == 1: self.netcdf.variables[gvar_out.name]\ [chk_out.min(dim[0]):chk_out.max(dim[0])]\ = \ np.array(nc.variables[gvar_in.name]\ [chk_in.min(dim[0]):chk_in.max(dim[0])]) # 2D case elif len(dim) == 2: self.netcdf.variables[gvar_out.name]\ [chk_out.min(dim[0]):chk_out.max(dim[0]),\ chk_out.min(dim[1]):chk_out.max(dim[1])] \ = \ np.array(nc.variables[gvar_in.name]\ [chk_in.min(dim[0]):chk_in.max(dim[0]),\ chk_in.min(dim[1]):chk_in.max(dim[1])]) # 3D case elif len(dim) == 3: for i in range(chk_in.size_dim(dim[0])): self.netcdf.variables[gvar_out.name]\ [chk_out.min(dim[0])+i,\ chk_out.min(dim[1]):chk_out.max(dim[1]),\ chk_out.min(dim[2]):chk_out.max(dim[2])]\ = \ np.array(nc.variables[gvar_in.name]\ [chk_in.min(dim[0])+i,\ chk_in.min(dim[1]):chk_in.max(dim[1]),\ chk_in.min(dim[2]):chk_in.max(dim[2])]) # 4D case elif len(dim) == 4: for i in range(chk_in.size_dim(dim[0])): self.netcdf.variables[gvar_out.name]\ [chk_out.min(dim[0])+i,\ chk_out.min(dim[1]):chk_out.max(dim[1]),\ chk_out.min(dim[2]):chk_out.max(dim[2]),\ chk_out.min(dim[3]):chk_out.max(dim[3])]\ = \ np.array(nc.variables[gvar_in.name]\ [chk_in.min(dim[0])+i,\ chk_in.min(dim[1]):chk_in.max(dim[1]),\ chk_in.min(dim[2]):chk_in.max(dim[2]),\ chk_in.min(dim[3]):chk_in.max(dim[3])]) self._nc_close() #----------------------------------------------------------------------- # E - Extraction functions #-----------------------------------------------------------------------
[docs] def extract_struct(self, gdat, list_gvar_in, list_gvar_out=None, chk_in=None, chk_out=None): """ Extract the structure of a netCDF file. Parameters ---------- gdat : geodata Input data from which the extraction will be made list_gvar_in : list of string or geovar Names of the variables of the input netCDF file for which the structure will be copied list_gvar_out: list of string or geovar, optional Names of the variables of the output netCDF file in which the structure will be copied chk_in : chunk, optional Define the chunk to read in the input file for the extraction chk_out : chunk, optional Define the chunk to write the variable in the output file extraction fill_value : float, file The fill_value that will be used for the variables in the output netCDF file """ self.copy_coord(gdat, chk_in=chk_in, chk_out=chk_out) if list_gvar_out == None: list_gvar_out = list_gvar_in for (gvar_in, gvar_out) in zip(list_gvar_in, list_gvar_out): self.copy_varstruct(gdat, gvar_in, gvar_out=gvar_out)
[docs] def extract(self, gdat, list_gvar_in, list_gvar_out=None, chk_in=None, chk_out=None): """ Extract the structure and the variables of a netCDF file. Parameters ---------- gdat : geodata Input data from which the extraction will be made list_gvar_in : list of string or geovar Names of the variables to be copied list_gvar_out: list of string or geovar, optional Names of the variable that will be copied in the output file. The same names as the input variables are used by default. chk_in : chunk, optional Define the chunk to read in the input file for the extraction chk_out : chunk, optional Define the chunk to write the variable in the output file extraction fill_value : float, file The fill_value that will be used for the variables in the output netCDF file """ self.copy_coord(gdat, chk_in=chk_in, chk_out=chk_out) if list_gvar_out == None: list_gvar_out = list_gvar_in for (gvar_in, gvar_out) in zip(list_gvar_in, list_gvar_out): self.copy_var(gdat, gvar_in, gvar_out=gvar_out, chk_in=chk_in, chk_out=chk_out) #----------------------------------------------------------------------- # M - Miscellaenous functions #-----------------------------------------------------------------------
def get_size(self): chk = self.get_chunk() return chk.total_size() #----------------------------------------------------------------------- # R - Reading functions #----------------------------------------------------------------------- # 1 - Dimensions # ~~~~~~~~~~~~~~~
[docs] def get_dim(self, dim): """ Get the size of a dimension int the netCDF4 instance nc_in along one dimension (T, Z, Y or X) """ if not isinstance(dim, str): raise TypeError, 'dim input must be a string' self._nc_open() if dim in self.dimensions: n = len(self.netcdf.dimensions[self.dim_name[dim]]) else: n = 1 return n self._nc_close()
[docs] def get_dim_time(self): """ Get the dimension of the time axis Returns ------- n_t : int The dimension of the time axis """ n_t = self.get_dim('T') return n_t
[docs] def get_dim_depth(self): """ Return the dimension of the vertical axis Returns ------- n_z : int The dimension of the vertical axis """ n_z = self.get_dim('Z') return n_z
[docs] def get_dim_lat(self): """ Return the dimension of the latitudinal axis Returns ------- n_lat : int The dimension of the latitudinal axis """ n_lat = self.get_dim('Y') return n_lat
[docs] def get_dim_lon(self): """ Return the dimension of the longitudinal axis Returns ------- n_lon : int The dimension of the longitudinal axis """ n_lon = self.get_dim('X') return self.get_dim('X')
[docs] def get_dim_latlon(self): """ Return the dimension of the latitudinal and longitudinal axis Returns ------- n_lon : int The dimension of the longitudinal axis n_lat : int The dimension of the latitudinal axis """ n_lat, n_lon = self.get_dim_lat(),self.get_dim_lon() return n_lat, n_lon # 2 - Dimension variables # ~~~~~~~~~~~~~~~~~~~~~~~~
[docs] def get_dimvar(self, dim, chk=None): ''' Return the coordinates associated with the dimension dim from the netCDF4 instance nc_in or only a subset optionally defined by a chunk instance ''' if not isinstance(dim,str): raise TypeError, 'dim input must be a string' self._nc_open() chk = self.check_chunk(chk) if dim in self.dimensions: if self.latlon2D==True and (dim=='Y' or dim=='X'): # Special processing for 2D lat/lon fields dimvar = \ np.array(self.netcdf.variables[self.dim_var[dim]]\ [chk.min('Y'):chk.max('Y'),chk.min('X'):chk.max('X')]) else: dimvar = \ np.array(self.netcdf.variables[self.dim_var[dim]]\ [chk.min(dim):chk.max(dim)]) return np.squeeze(dimvar) else: raise ValueError, ('There is no dimension ' + dim + 'in the dimensions: ' + self.dimensions) self._nc_close()
[docs] def get_time(self, chk=None): """ Return the time record, optionally a subset defined by a chunk Parameters ---------- chk : chunk, optional Define the chunk to read in the NetCDF file Returns ------- time : 1darray The time 1d/2d array read in the NetCDF file """ time = self.get_dimvar('T', chk=chk) return time
[docs] def get_depth(self, chk=None): """ Return the vertical 1d array, optionally a subset defined by a chunk Parameters ---------- chk : chunk, optional Define the chunk to read in the NetCDF file Returns ------- depth : 1darray The depth 1d array read in the NetCDF file """ return self.get_dimvar('Z', chk=chk)
[docs] def get_lat(self, chk=None): """ Return the latitudinal 1d/2d array, optionally a subset defined by a chunk chk Parameters ---------- chk : chunk, optional Define the chunk to read in the NetCDF file Returns ------- lat : 1darray/2darray The latitude 1d/2d array read in the NetCDF file """ lat = self.get_dimvar('Y', chk=chk) return lat
[docs] def get_lon(self, chk=None): """ Return the longitudinal 1d/2d array, optionally a subset defined by a chunk Parameters ---------- chk : chunk, optional Define the chunk to read in the NetCDF file Returns ------- lon : 1darray/2darray The longitude 1d/2d array read in the NetCDF file """ lon = self.get_dimvar('X', chk=chk) return lon
[docs] def get_latlon(self, chk=None): """ Return the latitudinal and longitudinal 1d/2d array, optionally a subset defined by a chunk Parameters ---------- chk : chunk, optional Define the chunk to read in the NetCDF file Returns ------- lat : 1darray/2darray The latitude 1d/2d array read in the NetCDF file lon : 1darray/2darray The longitude 1d/2d array read in the NetCDF file """ lat, lon = self.get_lat(chk=chk), self.get_lon(chk=chk) return lat, lon # 3 - Variables # ~~~~~~~~~~~~~~
[docs] def get_var(self, gvar, chk=None, dtype='f8'): """ Read a variable in the netCDF file Parameters ---------- gvar : string or geovar Input variable can only be described by its name or by a a more complex instance geovar chk : chunk, optional Specify a subset to read, all the data is read by default dtype : data-type, optional The desired data-type of the numpy array in which the data is stored. Default is double precision Returns ------- out : ndarray A numpy array of the dimension of the variable, note that dimensions of length 1 are removed. """ if isinstance(gvar, geovar): var_name = gvar.name elif isinstance(gvar, str): var_name = gvar else: raise TypeError, 'var must be either a string or a geovar' dim = self.get_var_axis(var_name) self._nc_open() chk = self.check_chunk(chk) if len(dim)==1: out = \ np.array(self.netcdf.variables[var_name]\ [chk.min(dim[0]):chk.max(dim[0])],\ dtype=dtype) elif len(dim)==2: out = \ np.array(self.netcdf.variables[var_name]\ [chk.min(dim[0]):chk.max(dim[0]),\ chk.min(dim[1]):chk.max(dim[1])],\ dtype=dtype) elif len(dim)==3: out = \ np.array(self.netcdf.variables[var_name]\ [chk.min(dim[0]):chk.max(dim[0]),\ chk.min(dim[1]):chk.max(dim[1]),\ chk.min(dim[2]):chk.max(dim[2])],\ dtype=dtype) elif len(dim)==4: out = \ np.array(self.netcdf.variables[var_name]\ [chk.min(dim[0]):chk.max(dim[0]),\ chk.min(dim[1]):chk.max(dim[1]),\ chk.min(dim[2]):chk.max(dim[2]),\ chk.min(dim[3]):chk.max(dim[3])],\ dtype=dtype) self._nc_close() return np.squeeze(out)
[docs] def get_var_FillValue(self, gvar): """ Return the value of the FillValue attribute Parameters ---------- gvar : string or geovar Input variable can only be described by its name or by a a more complex instance geovar attr_name : string Name of the variable attribute in the NetCDF file Returns ------- out : unknown The value of the attribute """ if isinstance(gvar, geovar): var_name = gvar.name elif isinstance(gvar, str): var_name = gvar else: raise TypeError, "Parameter must be either string or geovar" self._nc_open() if hasattr(self.netcdf.variables[var_name], '_FillValue'): fval = self.netcdf.variables[var_name]._FillValue elif hasattr(self.netcdf.variables[var_name], 'missing_value'): fval = self.netcdf.variables[var_name].missing_value else: fval = None self._nc_close() return fval
[docs] def get_var_attr(self, gvar, attr_name): """ Return the value of the variable attribute Parameters ---------- gvar : string or geovar Input variable can only be described by its name or by a a more complex instance geovar attr_name : string Name of the variable attribute in the NetCDF file Returns ------- out : unknown The value of the attribute """ if isinstance(gvar, geovar): var_name = gvar.name elif isinstance(gvar, str): var_name = gvar else: raise TypeError, "Parameter must be either string or geovar" self._nc_open() if hasattr(self.netcdf.variables[var_name], attr_name): attr = self.netcdf.variables[var_name].getncattr(attr_name) else: attr = None self._nc_close() return attr
[docs] def get_var_dtype(self, gvar): """ Return the type of the variable in the netcdf file Parameters ---------- gvar : string or geovar Input variable can only be described by its name or by a a more complex instance geovar Returns ------- out : string The type of the variable in the standard convention (ex: 'f4') """ if isinstance(gvar, geovar): var_name = gvar.name elif isinstance(gvar, str): var_name = gvar else: raise TypeError, "Parameter must be either string or geovar" self._nc_open() out = self.netcdf.variables[var_name].dtype self._nc_close() return out
[docs] def get_var_names(self): """ Return the list of the variable names without the dimension variables Returns ------- out : list of string A list of string that contains the variable names """ self._nc_open() dict_var = self.netcdf.variables for dim in self.dimensions: del dict_var[self.dim_var[dim]] self._nc_close() # Force the string type because of a common use of unicode type out = [str(elem) for elem in dict_var.keys()] return out
[docs] def get_var_axis(self, gvar): """ Return the axis attribute of the variable in the NetCDF file Parameters ---------- gvar : string or geovar Input variable can only be described by its name or by a a more complex instance geovar Returns ------- axis : string The axis of the variable containing which is a combination of the available axis {'T', 'Z', 'Y', 'X'} """ if isinstance(gvar, geovar): var_name = gvar.name elif isinstance(gvar, str): var_name = gvar else: raise TypeError, 'var must be either a string or a geovar' axis = '' self._nc_open() for key in self.netcdf.variables[var_name].dimensions: if key in self.dim_name.values(): axis += \ self.dim_name.keys()[self.dim_name.values().index(key)] self._nc_close() return axis #----------------------------------------------------------------------- # W - Writing functions #----------------------------------------------------------------------- # 1 - Dimensions # ~~~~~~~~~~~~~~~
[docs] def create_dim(self, chk=None): """ Create the dimensions in the netCDF file from a chunk Arguments --------- chk : chunk, optional The size of the dimensions to create """ if chk == None and self.chk == None: raise RuntimeError, "The dimension cannot be defined" self._checkIO() self._nc_open() chk = self.check_chunk(chk) for dim in self.dimensions: if dim == 'T' and self.time_unlimited == True: self.netcdf.createDimension(self.dim_name[dim], None) else: self.netcdf.createDimension(self.dim_name[dim], chk.size_dim(dim)) self._nc_close()
[docs] def create_dimvar(self, dim, dtype='f4', fill_value=-9999.): """ Create the dimension variables in the netCDF file """ self._checkIO() self._nc_open() if self.latlon2D == True and (dim == 'Y' or dim == 'X'): # Special processing for 2D lat/lon fields self.netcdf.createVariable(self.dim_var[dim], dtype, (self.dim_name['Y'], self.dim_name['X'], ), fill_value=fill_value) else: self.netcdf.createVariable(self.dim_var[dim], dtype, (self.dim_name[dim], ), fill_value=fill_value) # Force auto_maskandscale to False self.netcdf.variables[self.dim_var[dim]]\ .set_auto_maskandscale(False) self._nc_close()
[docs] def create_coord(self, dtype='f4', fill_value=-9999.): """ Create all the dimension variables in the netCDF file """ for dim in self.dimensions: self.create_dimvar(dim, dtype=dtype, fill_value=fill_value)
[docs] def save_dimvar(self, dim, value, chk=None): """ Write the values of a dimension variable in the netCDF file Arguments --------- dim : {'T', 'Z', 'Y', 'X'} Key string of the dimensions values : ndarray The values of variable to save chk : chunk, optional The subset of the NetCDF file where the arrays will be saved """ self._checkIO() # Try to create the variable, if it already exists an # exception is raised and it goes directly to the next step try: self.create_dimvar(dim) except: pass self._nc_open() dimvar = self.netcdf.variables[self.dim_var[dim]] chk = self.check_chunk(chk) # 2D latitude/longitude case if self.latlon2D == True and (dim == 'Y' or dim == 'X'): dimvar[\ chk.min('Y'):chk.max('Y'),\ chk.min('X'):chk.max('X')]\ = value # 1D case else: dimvar[\ chk.min(dim):chk.max(dim)]\ = value # Force auto_maskandscale to False dimvar.set_auto_maskandscale(False) self._nc_close()
[docs] def save_time(self, time, chk=None): """ Write the values of the time variable in the netCDF file Arguments --------- time : 1darray Values of the time variable chk : chunk, optional The subset of the NetCDF file where the array will be saved """ self.save_dimvar('T', time, chk=chk)
[docs] def save_depth(self, depth, chk=None): """ Write the values of the vertical variable in the netCDF file Arguments --------- depth : 1darray Values of the vertical variable chk : chunk, optional The subset of the NetCDF file where the array will be saved """ self.save_dimvar('Z', depth, chk=chk)
[docs] def save_lat(self, lat, chk=None): """ Write the values of the latitudinal variable in the netCDF file Arguments --------- lat : 1darray/2darray Values of the latitudinal variable chk : chunk, optional The subset of the NetCDF file where the array will be saved """ self.save_dimvar('Y', lat, chk=chk)
[docs] def save_lon(self, lon, chk=None): """ Write the values of the longitudinal variable in the netCDF file Arguments --------- lon : 1darray/2darray Values of the longitudinal variable chk : chunk, optional The subset of the NetCDF file where the array will be saved """ self.save_dimvar('X', lon, chk=chk)
[docs] def save_latlon(self, lat, lon, chk=None): """ Write the values of latidudinal and longitudinal variable in the netCDF file Arguments --------- lat : 1darray/2darray Values of the latitudinal variable lon : 1darray/2darray Values of the longitudinal variable chk : chunk, optional The subset of the NetCDF file where the arrays will be saved """ self.save_dimvar('Y', lat, chk=chk) self.save_dimvar('X', lon, chk=chk) # 3 - Variables # ~~~~~~~~~~~~~~
[docs] def create_var(self, gvar): """ Create the variable with its attributes in the netCDF file Parameters ---------- gvar : geovar or string The variable to create in the netCDF file defined by a string or a complex object geovar """ if isinstance(gvar, geovar): pass elif isinstance(gvar, str): gvar = geovar(gvar) else: raise TypeError, 'var must be either a string or a geovar' self._checkIO() # Check write access # Check if the dimensions have been created if not self._check_dim(): self.create_dim() self._nc_open() var_name = gvar.name fval = gvar.fill_value dtype = gvar.dtype # Check if the variable already exists in the NetCDF file if var_name in self.netcdf.variables: self._nc_close() return # If gvar has the attribute 'axis', create the variable # according to those dimensions, otherwise create the variable # according to the dimensions of the NetCDF file if 'axis' in gvar.attrs(): dim = gvar.get_attr('axis') else: dim = self.dimensions # 1D case if len(dim) == 1: self.netcdf.createVariable(var_name, dtype, (self.dim_name[dim[0]],), fill_value=fval) # 2D case elif len(dim) == 2: self.netcdf.createVariable(var_name, dtype, (self.dim_name[dim[0]], self.dim_name[dim[1]],), fill_value=fval) # 3D case elif len(dim) == 3: self.netcdf.createVariable(var_name, dtype, (self.dim_name[dim[0]], self.dim_name[dim[1]], self.dim_name[dim[2]],), fill_value=fval) # 4D case elif len(dim) == 4: self.netcdf.createVariable(var_name, dtype, (self.dim_name[dim[0]], self.dim_name[dim[1]], self.dim_name[dim[2]], self.dim_name[dim[3]],), fill_value=fval) # Copy the attributes of gvar in the corresponding variables in # the NetCDF file # /!\ The attribute _FillValue can only be set at the creation # of the variable for attr in gvar.attrs(): if not attr == '_FillValue': self.netcdf.variables[var_name].__setattr__\ (attr, gvar.get_attr(attr)) # Force auto_maskandscale to False self.netcdf.variables[var_name].set_auto_maskandscale(False) self._nc_close()
[docs] def save_var(self, gvar, values, chk=None): """ Write the variable values in the corresponding variable of the netCDF file Arguments --------- gvar : geovar or str The geovar or name of the variable in the NetCDF file values : ndarray The values of variable to save chk : chunk, optional The subset of the NetCDF file where the arrays will be saved """ # Check arguments if isinstance(gvar, geovar): var_name = gvar.name elif isinstance(gvar, str): var_name = gvar else: raise TypeError, 'var must be either a string or a geovar' chk = self.check_chunk(chk) self._checkIO() # Try to create the variable, if it already exists an # exception is raised and it goes directly to the next step try: self.create_var(gvar) except: pass dim = self.get_var_axis(var_name) self._nc_open() var_nc = self.netcdf.variables[var_name] # 1D case if len(dim) == 1: var_nc[chk.min(dim[0]):chk.max(dim[0])] = values # 2D case elif len(dim) == 2: var_nc[chk.min(dim[0]):chk.max(dim[0]), chk.min(dim[1]):chk.max(dim[1])] = values # 3D case elif len(dim) == 3: var_nc[chk.min(dim[0]):chk.max(dim[0]), chk.min(dim[1]):chk.max(dim[1]), chk.min(dim[2]):chk.max(dim[2])] = values # 4D case elif len(dim) == 4: var_nc[chk.min(dim[0]):chk.max(dim[0]), chk.min(dim[1]):chk.max(dim[1]), chk.min(dim[2]):chk.max(dim[2]), chk.min(dim[3]):chk.max(dim[3])] = values # Force auto_maskandscale to False var_nc.set_auto_maskandscale(False) self._nc_close() #======================================================================= # Class calculation #=======================================================================
[docs]class calculation: """ Attributes ---------- data_in : geodata Input geophysical data data_out : geodata Output geophysiscal data nb_tempfiles : Number of temporary files - parallelized <'boolean'>: Define if the calculation is parallelized or not - nb_proc <'int'>: total number of processors - proc_indx <'in'>: index of current processor - overwrite <'boolean'>: overwrite or not temporary and output files - chk_xtrac <'chunk'>: chunk defining the structure to extract from the input file to generate the output file - fcounter <'string'>: path of temporary counter file - chunking <'dictionnary['in', <'chunk'>, 'temp_in', <'chunk'>, 'temp_out', <'chunk'>, 'out',<'chunk'>]'> Constructor: - data_in <'geodata'>: characteristics of input data - data_out <'geodata'>: characteristics of output data - chk <'chunk'>: chunk defining the structure to extract from the input file to generate the output file - parallelized <'boolean'>: Define if the calculation is parallelized or not - overwrite <'boolean'>: overwrite or not temporary files """ def __init__(self, gdat_in, gdat_out, list_gvar_in, list_gvar_out=None, chk_in=None, chk_out=None, parallelized=False, overwrite=False): """ Arguments --------- gdat_in : geodata Input data gdat_out : geodata Output data list_gvar_in : List of input variables list_gvar_out : optional List of output variables chk_in : chunk, optional chk_out : chunk, optional parallelized : bool, optional Set to True to use MPI parallelization overwrite : bool, optional Define existing files must be overwritten """ # Input geodata if isinstance(gdat_in, geodata): self.gdat_in = gdat_in else: raise TypeError, 'gdat_in must be a geodata type' # Output geodata if isinstance(gdat_out, geodata): self.gdat_out = gdat_out else: raise TypeError, 'gdat_out must be a geodata type' # List of input/output variables self.list_gvar_in = list_gvar_in if list_gvar_out == None: self.list_gvar_out = list_gvar_in else: self.list_gvar_out = list_gvar_out # Input/output chunks if chk_in == None: self.chk_in = self.gdat_in.get_chunk() else: self.chk_in = gdat_in.check_chunk(chk_in) if chk_out == None: self.chk_out = self.chk_in.copy() else: self.chk_out = chk_out # Number of temporary files self.nb_tempfiles = 1 # Get the total number of cores and the current core ID # if parallelized compuation self.parallelized = parallelized if parallelized == True: from mpi4py import MPI #Parallelisation initialization self.comm = MPI.COMM_WORLD self.nb_proc = self.comm.Get_size() self.proc_indx = self.comm.Get_rank() else: self.nb_proc = 1 self.proc_indx = 0 # Overwrite property self.overwrite = overwrite # File counter self.fcounter = (gdat_out.filename[:-3] + '_temp#' + str(self.proc_indx)+'.p') if not os.path.isfile(self.fcounter) or overwrite: # Initialize a backup file for indexing current operating # file self.write_counter(0,0) # Temporary input/output self.gdat_temp_in = gdat_in self.gdat_temp_out = gdat_out self.chunking = {'in':self.chk_in, 'temp':self.chk_out, 'out':self.chk_out}
[docs] def optimize_1D(self, nb_var, dim='lon', free_mem=None): """ Optimize the computation by defining slices along the dimension specified """ if free_mem == None: import subprocess # Get the available RAM free = subprocess.Popen(("free -t | grep buffers/cache | " "awk '{ print $NF }'"), shell=True, stdout=subprocess.PIPE) free_mem = int(free.communicate()[0]) else: free_mem = int(free_mem) self.disp("Memory available: %.0f Mo " % (free_mem / 1000)) # Compute the memory occupied by 64-bit variable var_mem = self.chk_in.total_size() * 8 / 1000 # Determine the number and sizes of slices for processing # the dataset according to the number of variables used in the # computation nb_slic_lon = (nb_var * var_mem) / free_mem + 1 # Copy chk_in = self.chk_in.copy() chk_out = self.chk_out.copy() chk_temp = chk_out.copy() self.chunking = [] n_lat, n_lon = chk_in.size_latlon() if dim == 'lon': nb_slic_lat = self.nb_proc # Get the minimum and maximum longitudal index i_min = self.chk_in['lon_min'] i_max = self.chk_in['lon_max'] j_min = self.chk_in['lat_min'] j_max = self.chk_in['lat_max'] n_step_lon = n_lon / nb_slic_lon n_step_lat = n_lat / nb_slic_lat self.nb_tempfiles = nb_slic_lon * nb_slic_lat for i in range(nb_slic_lon): chk_in['lon_min'] = i_min + i * n_step_lon chk_out['lon_min'] = i * n_step_lon chk_temp['lon_min'] = 0 if not i == (nb_slic_lon - 1): chk_in['lon_max'] = i_min + (i + 1) * n_step_lon chk_out['lon_max'] = (i + 1) * n_step_lon chk_temp['lon_max'] = n_step_lon else: chk_in['lon_max'] = i_max chk_out['lon_max'] = n_lon chk_temp['lon_max'] = n_lon - i * n_step_lon for j in range(nb_slic_lat): chk_in['lat_min'] = j_min + j * n_step_lat chk_out['lat_min'] = j * n_step_lat chk_temp['lat_min'] = 0 if not j == (nb_slic_lat - 1): chk_in['lat_max'] = j_min + (j + 1) * n_step_lat chk_out['lat_max'] = (j + 1) * n_step_lat chk_temp['lat_max'] = n_step_lat else: chk_in['lat_max'] = j_max chk_out['lat_max'] = n_lat chk_temp['lat_max'] = (n_lat - j * n_step_lat) self.chunking.append({'in':chk_in.copy(), 'temp':chk_temp.copy(), 'out':chk_out.copy()}) elif dim == 'time': nb_slic = chk_in.size_time() for k in range(nb_slic): self.nb_tempfiles = nb_slic chk_in['t_min'] = k chk_in['t_max'] = k + 1 chk_out['t_min'] = k chk_out['t_max'] = k + 1 chk_temp['t_min'] = 0 chk_temp['t_max'] = 1 self.chunking.append({'in':chk_in.copy(), 'temp':chk_temp.copy(), 'out':chk_out.copy()})
[docs] def start(self): """ """ self.disp("Generation of " + str(self.nb_tempfiles) + " temporary files...") self.generate_temp_in() self.generate_temp_out() self.wait() self.disp("Temporary files have been correctly generated")
[docs] def stop(self): """ """ self.wait() #Delete temporary NetCDF files if self.proc_indx == 0: for i in range(self.nb_tempfiles): ftemp_out = (self.gdat_out.filename[:-3] + '_temp_out#' + str(i) + '.nc') os.remove(ftemp_out) #Delete os.remove(self.fcounter)
[docs] def wait(self): """ """ if self.parallelized==True: #Wait for all processus to finish self.comm.Barrier()
[docs] def disp(self, txt): """ """ if self.proc_indx == 0: print txt
[docs] def delete(self, filename): """ """ if self.proc_indx == 0: os.remove(filename)
[docs] def generate_file_out(self,mode='same'): """ Generate the output files """ if self.proc_indx == 0: if (not os.path.isfile(self.gdat_out.get_filename()) or self.overwrite): self.gdat_out.nc_create() if mode == 'same': self.gdat_out.extract_struct(self.gdat_in, \ self.list_gvar_in, list_gvar_out=self.list_gvar_out, \ chk_in=self.chk_in, chk_out=self.chk_out) elif mode == 'new': self.gdat_out.create_dim(self.chk_out) self.gdat_out.create_coord() for namevar in self.list_gvar_out: self.gdat_out.create_var(namevar) self.wait() self.disp("Ouput file has been correctly generated")
[docs] def generate_temp_out(self): """ """ self.gdat_temp_out=[] for k in range(self.nb_tempfiles): ftemp = self.gdat_out.filename[:-3]+'_temp_out#'+str(k)+'.nc' gdat_temp = geodata(ftemp,\ iotype = 'w',\ dtype = self.gdat_out.get_dtype(),\ dimensions = self.gdat_out.get_dimensions(),\ chk = self.chunking[k]['temp'],init=False) self.gdat_temp_out.append(gdat_temp) if k % self.nb_proc == self.proc_indx: if not os.path.isfile(ftemp) or self.overwrite: gdat_temp.nc_create() # Write or overwrite existing temporary # files gdat_temp.create_dim() for namevar in self.list_gvar_out: gdat_temp.create_var(namevar)
[docs] def generate_temp_in(self): """ Generate the temporary geodata instances """ self.gdat_temp_in = [] for k in range(self.nb_tempfiles): gdat_temp = geodata(self.gdat_in.get_filename(),\ iotype = 'w',\ dtype = self.gdat_in.get_dtype(),\ dimensions = self.gdat_in.get_dimensions(),\ chk = self.chunking[k]['in']) self.gdat_temp_in.append(gdat_temp)
[docs] def concatenate(self, dim='lon'): """ Concatenate the temporary files along the prescribed dimension Parameters: ----------- dim : optional, {'lon', 'time'} Dimension along which the concatenation is made """ self.wait() if self.proc_indx == 0: print "Files concatenation..." n_t = self.chk_out.size_time() if dim == 'lon': n_lat, n_lon = self.gdat_out.get_dim_latlon() chk = self.gdat_out.get_chunk() for namevar in self.list_gvar_out: axis = self.gdat_out.get_var_axis(namevar) if 'T' in axis: axis = axis.replace('T', '') if len(axis) == 0: break for i in range(n_t): htuple = () vtuple = () for k in range(self.nb_tempfiles): chk_temp = self.chunking[k]['temp'] chk_temp['t_min'] = i chk_temp['t_max'] = i + 1 var_temp = \ self.gdat_temp_out[k].get_var(namevar, chk_temp) vtuple += (var_temp,) if (k + 1) % self.nb_proc == 0: htuple += (np.vstack(vtuple),) vtuple = () var = np.hstack(htuple) chk['t_min'] = i chk['t_max'] = i + 1 self.gdat_out.save_var(namevar, var, chk) elif dim == 'time': for namevar in self.list_gvar_out: for k in range(self.nb_tempfiles): chk_temp = self.chunking[k]['temp'] var = self.gdat_temp_out[k].get_var(namevar, chk_temp) chk_out = self.chunking[k]['out'] self.gdat_out.save_var(namevar, var, chk=chk_out) print "Concatenation done" self.wait()
[docs] def get_temp_data(self, indx): """ """ return self.gdat_temp_in[indx], self.gdat_temp_out[indx]
[docs] def write_counter(self, var_counter, slice_counter): """ """ file_counter = open(self.fcounter,'w') pickle.dump((var_counter, slice_counter), file_counter) file_counter.close()
[docs] def read_counter(self): """ """ file_counter = open(self.fcounter,'r') var_counter, slice_counter = pickle.load(file_counter) file_counter.close() return var_counter, slice_counter #======================================================================= # Independant functions derived from geodata #=======================================================================
def copy_dimvarstruct(gdat_in, gdat_out, dim): """ Create and copy the structure of a dimension variable of the input NetCDF in the output NetCDF Arguments --------- gdat_in : geodata Input data from which the dimension will be read gdat_out : geodata Output data in which the dimension will be created dim : {'T', 'Z', 'Y', 'X'} Key of the dimension dim : {} Examples -------- """ try: copy_dimvarstruct = gdat_out.copy_dimvarstruct except AttributeError: pass return copy_dimvarstruct(gdat_in, dim) def extract(gdat_in, gdat_out, list_gvar_in, list_gvar_out=None, chk_in=None, chk_out=None): """ Extract the structure and the variables from a NetCDF file to another one Parameters ---------- gdat_in : geodata Input data from which the extraction will be made gdat_out : geodata Output data in which the extraction will be made list_gvar_out: list of string or geovar, optional Names of the variable that will be copied in the output file. The same names as the input variables are used by default. chk_in : chunk, optional Define the chunk to read in the input file for the extraction chk_out : chunk, optional Define the chunk to write the variable in the output file extraction fill_value : float, file The fill_value that will be used for the variables in the output netCDF file """ try: extract = gdat_out.extract except AttributeError: pass return extract(gdat_in, list_gvar_in, list_gvar_out=list_gvar_out, chk_in=chk_in, chk_out=chk_out) def extract_struct(gdat_in, gdat_out, list_gvar_in, list_gvar_out=None, chk_in=None, chk_out=None): """ Extract the structure from a NetCDF file to another one Parameters ---------- gdat_in : geodata Input data from which the extraction will be made gdat_out : geodata Output data in which the extraction will be made list_gvar_in : list of string or geovar Names of the variables of the input netCDF file for which the structure will be copied list_gvar_out: list of string or geovar, optional Names of the variables of the output netCDF file in which the structure will be copied chk_in : chunk, optional Define the chunk to read in the input file for the extraction chk_out : chunk, optional Define the chunk to write the variable in the output file extraction fill_value : float, file The fill_value that will be used for the variables in the output netCDF file """ try: extract_struct = gdat_out.extract_struct except AttributeError: pass extract_struct(gdat_in, list_gvar_in, list_gvar_out=list_gvar_out, chk_in=chk_in, chk_out=chk_out) #======================================================================= # Generation of examples #======================================================================= def example_geodata(dtype='example2d', format='NETCDF3_64BIT', dimensions='TZYX', time_unlimited=True): if os.path.isfile('../examples/example.nc'): os.remove('../examples/example.nc') lon1d = np.arange(-180, 181, 5) n_lon = np.size(lon1d) lat1d = np.arange(-90, 91, 5) n_lat = np.size(lat1d) lon2d, lat2d = np.meshgrid(lon1d, lat1d) depth = np.arange(0, 4001, 500) n_z = np.size(depth) yrtosec = 3600 * 24 * 365.25 timecounter = np.arange(0, 30 * yrtosec, yrtosec) n_t = np.size(timecounter) chk = chunk(t_min=0, t_max=n_t, z_min=0, z_max=n_z, lat_min=0, lat_max=n_lat, lon_min=0, lon_max=n_lon) gdat = geodata('../examples/example.nc', iotype='w', dtype=dtype, dimensions=dimensions, time_unlimited=time_unlimited, chk=chk, format=format) gdat.create_dim() gdat.create_coord() gdat.save_time(timecounter) gdat.save_depth(depth) gdat.save_lat(lat2d) gdat.save_lon(lon2d) return gdat def example_geovar(): gvar = geovar('exvar') gvar.long_name = 'Example of a variable' return gvar if __name__ == '__main__': gdata_example = example_geodata() print gdata_example import doctest doctest.testmod() os.system('rm ../examples/example*.nc')