#!/usr/bin/env python
########################################################################
# NetCDF I/O and parallelization library #
# #
# 2013, G. SERAZIN, PhD student at MEOM/LGGE Grenoble #
########################################################################
"""
The package ncTools
"""
#=======================================================================
# Module importation
#=======================================================================
import pickle
import copy
import sys
import os
try:
import netCDF4_utils
from netCDF4 import Dataset
except:
print 'Package netCDF4 is not available...'
exit()
try:
import numpy as np
except:
print 'Package numpy is not available...'
exit()
#=======================================================================
# Dictionnary of supported data types
#=======================================================================
geodtype = dict()
#-----------------------------------------------------------------------
# Orca
#-----------------------------------------------------------------------
dim_name = {'T':'time_counter',
'Z':'deptht',
'Y':'y',
'X':'x'}
dim_var = {'T':'time_counter',
'Z':'deptht',
'Y':'nav_lat',
'X':'nav_lon'}
latlon2D = True
geodtype['orca'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time',
'Z':'z',
'Y':'y',
'X':'x'}
dim_var = {'T':'time',
'Z':'nav_lev',
'Y':'nav_lat',
'X':'nav_lon'}
latlon2D = True
geodtype['orcat'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time_counter',
'Z':'deptht',
'Y':'y',
'X':'x'}
dim_var = {'T':'time_counter',
'Z':'deptht',
'Y':'nav_lat',
'X':'nav_lon'}
latlon2D = True
geodtype['orcau'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time_counter',
'Z':'depthu',
'Y':'y',
'X':'x'}
dim_var = {'T':'time_counter',
'Z':'depthu',
'Y':'nav_lat',
'X':'nav_lon'}
latlon2D = True
geodtype['orcav'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time_counter',
'Z':'depthv',
'Y':'y',
'X':'x'}
dim_var = {'T':'time_counter',
'Z':'depthv',
'Y':'nav_lat',
'X':'nav_lon'}
latlon2D = True
geodtype['maskorca025'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'t',
'Z':'z',
'Y':'y',
'X':'x'}
dim_var = {'T':'time_counter',
'Z':'nav_lev',
'Y':'nav_lat',
'X':'nav_lon'}
latlon2D = True
geodtype['maskorca12'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time_counter',
'Z':'evn',
'Y':'y',
'X':'x'}
dim_var = {'T':'time_counter',
'Z':'evn',
'Y':'lat',
'X':'lon'}
latlon2D = False
geodtype['eof1d_orca'] = (dim_name, dim_var, latlon2D)
dim_name = {'T':'time',
'Z':'evn',
'Y':'lat',
'X':'lon'}
dim_var = {'T':'time',
'Z':'evn',
'Y':'lat',
'X':'lon'}
latlon2D = False
geodtype['eof1d_aviso'] = (dim_name, dim_var, latlon2D)
#-----------------------------------------------------------------------
# Aviso
#-----------------------------------------------------------------------
dim_name = {'T':'time', 'Y':'lat', 'X':'lon'}
dim_var = {'T':'time', 'Y':'lat', 'X':'lon'}
latlon2D = False
geodtype['aviso'] = (dim_name, dim_var, latlon2D)
#-----------------------------------------------------------------------
# Spectrum
#-----------------------------------------------------------------------
dim_name = {'T':'omega', 'Z':'kappa', 'Y':'l', 'X':'k'}
dim_var = {'T':'omega', 'Z':'kappa', 'Y':'l', 'X':'k'}
latlon2D = False
geodtype['spectrum'] = (dim_name, dim_var, latlon2D)
del dim_name, dim_var, latlon2D
#-----------------------------------------------------------------------
# Examples
#-----------------------------------------------------------------------
dim_name = {'T':'t', 'Z':'z', 'Y':'y', 'X':'x'}
dim_var = {'T':'extime', 'Z':'exdepth', 'Y':'exlat', 'X':'exlon'}
latlon2D = True
geodtype['example2d'] = (dim_name, dim_var, latlon2D)
del dim_name, dim_var, latlon2D
#=======================================================================
# Class chunk
#=======================================================================
[docs]class chunk(dict):
"""
This class is used to specify a chunk in a NetCDF file that
corresponds to geophysical data. The keyword used are:
Attributes
----------
t_min : int
The minimum index along the time dimension
t_max : int
The maximum index along the time dimension
z_min : int
The minimum index along the vertical dimension
z_max : int
The maximum index along the vertical dimension
lat_min : int
The minimum index along the latitudinal dimension
lat_max : int
The maximum index along the latitudinal dimension
lon_min : int
The minimum index along the longitudinal dimension
lon_max : int
The maximum index along the longitudinal dimension
The chunk class is derived from the main python dictionnary class.
The values associated with each keywords are set to None by default.
"""
def __init__(self, t_min=None, t_max=None,
z_min=None, z_max=None,
lat_min=None, lat_max=None,
lon_min=None, lon_max=None):
"""
Arguments
---------
t_min : int, optional
The minimum index along the time dimension
t_max : int, optional
The maximum index along the time dimension
z_min : int, optional
The minimum index along the vertical dimension
z_max : int, optional
The maximum index along the vertical dimension
lat_min : int, optional
The minimum index along the latitudinal dimension
lat_max : int, optional
The maximum index along the latitudinal dimension
lon_min : int, optional
The minimum index along the longitudinal dimension
lon_max : int, optional
The maximum index along the longitudinal dimension
"""
dict.__init__(self, [('t_min',t_min), ('t_max',t_max),
('z_min',z_min), ('z_max',z_max),
('lat_min',lat_min), ('lat_max',lat_max),
('lon_min',lon_min), ('lon_max',lon_max)])
def __getitem__(self, key):
val = dict.__getitem__(self, key)
return val
def __setitem__(self, key, val):
dict.__setitem__(self, key, val)
def __str__(self):
string = ('---------------------\n'
'| t_min | t_max |\n| %7s | %7s |\n'
'---------------------\n'
'| z_min | z_max |\n| %7s | %7s |\n'
'---------------------\n'
'| lat_min | lat_max |\n| %7s | %7s |\n'
'---------------------\n'
'| lon_min | lon_max |\n| %7s | %7s |\n'
'---------------------'
% (self['t_min'], self['t_max'],
self['z_min'], self['z_max'],
self['lat_min'], self['lat_max'],
self['lon_min'], self['lon_max'])
)
return string
[docs] def size_dim(self, dim):
"""
Get the size of the chunk along one dimension
Parameters
----------
dim : {'T', 'Z', 'Y', 'X'}
The key of the dimension
Returns
-------
n : int
The size of the chunk along the corresponding dimension
Examples
--------
>>> chk = chunk(t_min=0, t_max=25, z_min=4, z_max=7,
... lat_min=23, lat_max=125, lon_min=65, lon_max=243)
>>> chk.size_dim('T')
25
>>> chk.size_dim('Z')
3
>>> chk.size_dim('Y')
102
>>> chk.size_dim('X')
178
"""
if not isinstance(dim,str):
raise TypeError, 'dim input must be a string'
if dim == 'T':
if (self['t_min'] is not None and
self['t_max'] is not None):
n_t = self['t_max']-self['t_min']
else:
n_t = 1
return n_t
elif dim == 'Z':
if (self['z_min'] is not None and
self['z_max'] is not None):
n_z = self['z_max']-self['z_min']
else:
n_z = 1
return n_z
elif dim == 'Y':
if (self['lat_min'] is not None and
self['lat_max'] is not None):
n_lat = self['lat_max']-self['lat_min']
else:
n_lat = 1
return n_lat
elif dim == 'X':
if (self['lon_min'] is not None and
self['lon_max'] is not None):
n_lon = self['lon_max']-self['lon_min']
else:
n_lon = 1
return n_lon
else:
raise ValueError, ("Not a valid dimension ('T', 'Z', 'Y' or"
" 'X')")
[docs] def size_time(self):
"""
Get the size of the chunk along the time dimension
Returns
-------
n_t : int
The size of the chunk along the time dimension
Examples
--------
>>> chk=chunk(t_min=0, t_max=25)
>>> chk.size_time()
25
>>> chk=chunk(t_max=25)
>>> chk.size_time()
1
"""
n_t = self.size_dim('T')
return n_t
[docs] def size_depth(self):
"""
Get the size of the chunk along the depth dimension
Returns
-------
n_z : int
The size of the chunk along the depth dimension
Examples
--------
>>> chk = chunk(z_min=4, z_max=7)
>>> chk.size_depth()
3
>>> chk = chunk(z_min=4)
>>> chk.size_depth()
1
"""
n_z = self.size_dim('Z')
return n_z
[docs] def size_latlon(self):
"""
Get the size of the chunk along the latitudinal and
longitudinal dimensions
Returns
-------
n_lat : int
The size of the chunk along the latitudinal dimension
n_lon : int
The size of the chunk along the longitudinal dimension
Examples
--------
>>> chk = chunk(lat_min=23, lat_max=125, lon_min=65,
... lon_max=243)
>>> chk.size_latlon()
(102, 178)
"""
n_lat = self.size_dim('Y')
n_lon = self.size_dim('X')
return n_lat, n_lon
[docs] def size_lat(self):
"""
Get the size of the chunk along the latitudinal dimension
Returns
-------
n_lat : int
The size of the chunk along the latitudinal dimension
Examples
--------
>>> chk = chunk(lat_min=23, lat_max=125)
>>> chk.size_lat()
102
>>> chk = chunk(lat_max=125)
>>> chk.size_lat()
1
"""
n_lat = self.size_dim('Y')
return n_lat
[docs] def size_lon(self):
"""
Get the size of the chunk along the longitudinal dimension
Returns
-------
n_lon : int
The size of the chunk along the longitudinal
dimension
Examples
--------
>>> chk = chunk(lon_min=65, lon_max=243)
>>> chk.size_lon()
178
>>> chk = chunk(lon_min=65)
>>> chk.size_lon()
1
"""
n_lon = self.size_dim('X')
return n_lon
[docs] def total_size(self):
"""
Get the total size of the chunk
Returns
-------
n : int
The total size of the chunk dimension
"""
n_t = self.size_dim('T')
n_z = self.size_dim('Z')
n_lat = self.size_dim('Y')
n_lon = self.size_dim('X')
return n_t*n_lat*n_lon*n_z
[docs] def min(self, dim):
"""
Return the minimum of the chunk value along one dimension
Parameters
----------
dim : {'T', 'Z', 'Y', 'X'}
Key of the dimension
Return
------
n : int
Minimum of the chunk along the dimension
Examples
--------
>>> chk = chunk(t_min=0, t_max=25, lat_min=23, lat_max=125,
... lon_min=65, lon_max=243)
>>> chk.min('T')
0
>>> chk.min('Z')
>>> chk.min('Y')
23
>>> chk.min('X')
65
"""
if not isinstance(dim, str):
raise TypeError, 'dim input must be a string'
if dim == 'T':
n = self['t_min']
elif dim == 'Z':
n = self['z_min']
elif dim == 'Y':
n = self['lat_min']
elif dim == 'X':
n = self['lon_min']
else:
raise ValueError, dim + (" is not a valid dimension ('T', "
"'Z', 'Y' or 'X')")
return n
[docs] def max(self, dim):
"""
Return the maximum of the chunk value along one dimension
Parameters
----------
dim : {'T', 'Z', 'Y', 'X'}
Key of the dimension
Return
------
n : int
Maximum of the chunk along the dimension
Examples
--------
>>> chk = chunk(t_min=0, t_max=25, lat_min=23, lat_max=125,
... lon_min=65, lon_max=243)
>>> chk.max('T')
25
>>> chk.max('Z')
>>> chk.max('Y')
125
>>> chk.max('X')
243
"""
if not isinstance(dim, str):
raise TypeError, 'dim input must be a string'
if dim == 'T':
n = self['t_max']
elif dim == 'Z':
n = self['z_max']
elif dim == 'Y':
n = self['lat_max']
elif dim == 'X':
n = self['lon_max']
else:
raise ValueError, dim + (" is not a valid dimension ('T', "
"'Z', 'Y' or 'X')")
return n
[docs] def copy(self):
"""
Copy a chunk
Return
------
chk : chunk
A copy of the original instance
"""
return copy.copy(self)
[docs] def isempty(self):
"""
Test if the chunk is empty
Return
------
val : bool
True if the chunk is empty, False otherwise
Example
-------
>>> chk = chunk(t_min=0, t_max=25)
>>> chk.isempty()
False
>>> chk = chunk()
>>> chk.isempty()
True
"""
val = self.values()
return val==[None]*len(val)
#=======================================================================
# Class geovar
#=======================================================================
[docs]class geovar:
"""
The class geovar is similar to the netCDF class Variables but it
does not include the value of the variable, it is only used to
described the name of the variable and the associated metadata.
"""
def __init__(self, varname, dtype='f4', fill_value=-99999.,
**kwargs):
"""
Arguments
---------
name : str
The variable name
dtype : {'f4', ...}
The type of the variable
fill_value : double
The value of the attribute fill_value
Example
-------
>>> gvar = geovar('sla', dtype='f4', fill_value=-99999,
... axis='TYX', long_name='Sea Level Anomaly')
>>> print gvar
sla (f4):
fill_value: -99999
long_name: Sea Level Anomaly
axis: TYX
"""
if isinstance(varname, str):
self.name = varname
self.dtype = dtype
self.fill_value = fill_value
else:
raise TypeError, 'arguments must be string type'
for key, value in kwargs.iteritems():
setattr(self, key, value)
#-----------------------------------------------------------------------
# SPECIAL METHODS
#-----------------------------------------------------------------------
def __call__(self):
return self.name
def __repr__(self):
return self.name
def __str__(self):
title = self.name + ' (' + str(self.dtype) + '):' + ' \n'
list_attr = ' fill_value: %s' %self.fill_value
for key, val in zip(self.attrs().keys(),
self.attrs().values()):
list_attr += ('\n ' + key + ': %s') %val
return title + list_attr + '\n'
#-----------------------------------------------------------------------
# PUBLIC METHODS
#-----------------------------------------------------------------------
def attrs(self):
dict_attr = copy.copy(self.__dict__)
del dict_attr['name']
del dict_attr['dtype']
del dict_attr['fill_value']
return dict_attr
def get_attr(self, name):
return getattr(self, name)
def set_attr(self, name, value):
setattr(self, name, value)
#=======================================================================
# Class geodata
#=======================================================================
[docs]class geodata:
"""
The class geodate is designed to work with geophysical data stored
in NetCDF file having the possible dimensions:
'T' : the time counter
'Z' : the vertical dimension
'Y' : the latitudinal dimension
'X' : the longitudinal dimension
Dimension names and dimension variables names are stored for a lot
of models and dataset. The class is able to recognize the
appropriate type to use. It allows as easiest access to NetCDF file
for which the dimension may be unkown and some functions are
designed to rapidly create, duplicate and modify the NetCDF. The
additional use of chunks may reveal very powerful when using it with
the calculation class.
Attributes
----------
filename : str
The name of the netcdf file.
iotype : {'r', 'w'}
Read or Write mode.
dtype : str
The type of the geophysical data
dimensions : str
A series of dimension keys ('T', 'Z', 'Y', 'X')
time_unlimited : bool
A boolean that tells if the record dimension is unlimited or not
chk : chunk
The default chunk on which perform operations
format : str
The netCDF format ('NETCDF3', 'NETCDF3_64BIT', 'NETCDF4',
'NETCDF4_CLASSIC')
"""
def __init__(self, filename, iotype='r', dtype='',
dimensions='', time_unlimited=True, chk=None,
format='NETCDF3_64BIT', init=True):
"""
Parameters
----------
filename : string
The name of the input file.
iotype : {'r', 'w'}, optional
Define if the file is opened in read or write mode. Default
is read only.
dtype : string, optional
Define the geodata type of the netCDF file. Supported types
are 'orca', 'aviso' and 'spectrum'. Note that in read mode, the
geodata type will be automatically recognized.
dimensions : string, optional
Define the dimensions of geodata. Dimensions can only contain
the values T, Z, Y and X. Note that in read mode, the
dimensions will be automatically recognized.
time_unlimited : bool, optional
If the netCDF does not exist, it will define the time dimension
as unlimited or not during its creation. Default is True.
chk : chunk, optional
Define a chunk by default. Operations will only be made on this
chunk of the netCDF. If the netCDF does not exist, the chunk
will be used to define the size of the dimensions to be
created. Default is None, which means no chunk by default.
format : {'NETCDF3', 'NETCDF3_64BIT', 'NETCDF4',
'NETCDF4_CLASSIC'}, optional
If the netCDF does not exist, it specifies the format to use.
Default is 'NETCDF3_64BIT'.
"""
# Filename
if isinstance(filename, str):
self.filename = filename
else:
raise TypeError, 'filename must be a string'
# I/O type
if iotype in ['r', 'w']:
self.iotype = iotype
else:
raise ValueError, 'iotype must be either "r" or "w"'
# Datatype
if isinstance(dtype, str):
self.dtype = dtype
else:
raise TypeError, 'dtype must be a string'
# Dimensions
if isinstance(dimensions, str):
self.dimensions = dimensions
else:
raise TypeError, 'dimensions must be a string'
# Time dimension unlimited or not
if isinstance(time_unlimited, bool):
self.time_unlimited = time_unlimited
else:
raise TypeError, 'time_unlimited must be a boolean'
# Chunk
if chk is None or isinstance(chk, chunk):
self.chk = chk
else:
raise TypeError, 'chk must be a chunk'
# Format
if format in ['NETCDF3_CLASSIC', 'NETCDF3_64BIT',
'NETCDF4_CLASSIC', 'NETCDF4']:
self.format = format
else:
raise ValueError, "Not a correct format"
self.opened = False
if not os.path.isfile(filename):
if not iotype == 'r':
if init:
self.nc_create()
self._nc_close()
if self.dtype == '':
self.dtype = 'orca'
self.dim_name = geodtype[self.dtype][0]
self.dim_var = geodtype[self.dtype][1]
self.latlon2D = geodtype[self.dtype][2]
if self.dimensions == '':
self.dimensions = 'TZYX'
else:
raise IOError, 'The file in read mode does not exist'
else:
if dtype == '':
self._build_dtype()
self.dim_name = geodtype[self.dtype][0]
self.dim_var = geodtype[self.dtype][1]
self.latlon2D = geodtype[self.dtype][2]
if self.dimensions == '':
self._build_dimensions()
#-----------------------------------------------------------------------
# SPECIAL METHODS -> /!\ To improve
#-----------------------------------------------------------------------
def __getitem__(self, varname):
self.get_var(varname)
def __setitem__(self, varname, values):
self.save_var(varname, values)
def __str__(self):
"""
Nice print for the geodata instance. It is similart to the
famous ncdump -h command.
"""
bound = ('====================================================='
+ '===========\n')
#Main attributes
header = ('Filename : %s \n'
'Format : %s \n'
'I/O access : %s \n'
'Data type : %s \n'
'Dimensions : %s \n'
'Unlimited time dimension : %s \n'
% (self.filename, self.format, self.iotype,
self.dtype, self.dimensions, self.time_unlimited)
)
#Chunk
chunk = ('Default chunk: \n' +
'-------------- \n' +
self.chk.__str__() + '\n\n')
#Dimensions
dimensions = ('Dimensions: \n' +
'----------- \n')
if self._check_dim():
for dim in self.dimensions:
dimensions += (dim + ' - ' + self.dim_name[dim] + ': '
+ str(self.get_dim(dim)) + '\n')
dimensions += '\n'
#Variables
variables = ('Variables: \n' +
'---------- \n')
self._nc_open()
for varname in self.netcdf.variables:
gvar = self.build_gvar(str(varname))
variables += (gvar.__str__() + '\n')
self._nc_close()
return (bound + header + bound + chunk +
dimensions + variables + bound)
#def __add__(self, gdat):
#add(self, gdat, list_gvar)
#def __sub__(self, gdat):
#def __mul__(self, gdat):
#def __div__(self, gdat):
#def __pow__(self, power):
def __del__(self):
self._nc_close()
#-----------------------------------------------------------------------
# PRIVATE METHODS
#-----------------------------------------------------------------------
def _build_dimensions(self):
dimensions=''
self._nc_open()
for key in self.netcdf.dimensions.keys():
if key in self.dim_name.values():
dimensions += self.dim_name.keys()\
[self.dim_name.values().index(key)]
if 'T' in dimensions:
self.dimensions+='T'
if 'Z' in dimensions:
self.dimensions+='Z'
if 'Y' in dimensions:
self.dimensions+='Y'
if 'X' in dimensions:
self.dimensions+='X'
self._nc_close()
def _build_dtype(self):
"""
Construct the corresponding dtype for geodata by reading the
dimension and the dimension variable names
"""
best_score = 0
self._nc_open()
for dtype in geodtype:
score = 0
dim_name = geodtype[dtype][0]
dim_var = geodtype[dtype][0]
for key in self.netcdf.dimensions.keys():
if key in dim_name.values():
score += 1
for var in self.netcdf.variables.keys():
if var in dim_var.values():
score +=1
if score > best_score:
best_score = copy.copy(score)
self.dtype = dtype
self.dim_name = geodtype[self.dtype][0]
self.dim_var = geodtype[self.dtype][1]
self.latlon2D = geodtype[self.dtype][2]
self._nc_close()
def _check_datatype(self, gdat):
"""
Check if the datatype of the current instance is consistent
with the input gdat
Parameters
----------
gdat : geodata
The input geodata instance to test with
Raises
------
TypeError
If the types do not match.
"""
if not self.dtype == gdat.get_dtype():
raise TypeError, ('gdat must the same type as self: ' +
self.dtype)
def _check_dim(self):
"""
Check if the dimensions already exists in the nectCDF file
Returns
-------
out : bool
True if the dimensions already exists
"""
self._nc_open()
dim_namelist = self.netcdf.dimensions.keys()
for dim in self.dimensions:
if not self.dim_name[dim] in dim_namelist:
return False
self._nc_close()
return True
def _checkIO(self):
"""
Check if it is allowed to write in the netCDF file.
Raises
------
IOError
If it is allowed to write in the file.
"""
if self.iotype == 'r':
raise IOError, "Not allowed to write in the file "
def _nc_close(self):
"""
Close the netCDF file
"""
if self.opened == True:
self.netcdf.close()
self.opened = False
def _nc_open(self):
"""
Open the netCDF file
"""
if self.opened == False:
if self.iotype == 'r':
self.netcdf=Dataset(self.filename, 'r')
elif self.iotype == 'w':
self.netcdf = Dataset(self.filename, 'a')
self.opened = True
#-----------------------------------------------------------------------
# PUBLIC METHODS
#-----------------------------------------------------------------------
[docs] def build_gvar(self, varname):
"""
Construct a geovar instance from the corresponding variable in
the netCDF file
Parameters
----------
varname : str
Name of the variable in the netCDF file
Returns
-------
gvar : geovar
The geovar instance with all the attributes of the variable
Examples
--------
>>> gdat = example_geodata()
>>> gvar = gdat.build_gvar('exlon')
>>> print gvar
exlon (float32):
fill_value: -9999.0
axis: YX
"""
gvar = geovar(varname, dtype=self.get_var_dtype(varname),
fill_value=self.get_var_FillValue(varname),
axis=self.get_var_axis(varname))
self._nc_open()
for attr in self.netcdf.variables[varname].ncattrs():
if not attr == '_FillValue':
gvar.set_attr(attr, self.get_var_attr(varname, attr))
self._nc_close()
return gvar
[docs] def changeIO(self, iotype):
"""
Change the read/write mode of the netCDF file.
Parameters
----------
iotype : {'r', 'w'}
Read or write mode.
Examples
--------
>>> gdat = example_geodata()
>>> print gdat.iotype
w
>>> gdat.changeIO('r')
>>> print gdat.iotype
r
"""
if iotype in ['r', 'w']:
self.iotype = iotype
else:
raise ValueError, 'iotype must be either "r" or "w"'
self._nc_close()
self.iotype = iotype
[docs] def check_chunk(self, chk):
"""
Check if the chunk is consistent with the dimensions of the
netCDF file otherwise reset the value to the min/max.
Parameters
----------
chk : chunk
The chunk to check
Examples
--------
"""
if chk == None:
if not self.chk == None:
chk = self.chk.copy()
else:
chk = chunk()
if chk['t_min'] == None:
chk['t_min'] = 0
if chk['t_max'] == None and (not self.time_unlimited
or self.iotype=='r'):
chk['t_max'] = self.get_dim_time()
if chk['z_min'] == None:
chk['z_min'] = 0
if chk['z_max'] == None:
chk['z_max'] = self.get_dim_depth()
if chk['lat_min'] == None:
chk['lat_min'] = 0
if chk['lat_max'] == None:
chk['lat_max'] = self.get_dim_lat()
if chk['lon_min'] == None:
chk['lon_min'] = 0
if chk['lon_max'] == None:
chk['lon_max'] = self.get_dim_lon()
return chk
[docs] def get_nc(self):
"""
Return the associated netCDF4 instance
"""
self._nc_open()
return self.netcdf
[docs] def get_dtype(self):
"""
Return the associated type of geodata
"""
return self.dtype
[docs] def get_dimensions(self):
"""
Return the dimensions
"""
return self.dimensions
[docs] def get_filename(self):
"""
Return the filename
"""
return self.filename
#-----------------------------------------------------------------------
# I - I/O functions
#-----------------------------------------------------------------------
[docs] def nc_create(self):
"""
Create the netCDF file
"""
self._checkIO()
self.netcdf = Dataset(self.filename, 'w', format=self.format)
self.opened = True
#-----------------------------------------------------------------------
# C1 - Chunk functions
#-----------------------------------------------------------------------
def get_chunk(self):
chk = self.check_chunk(self.chk)
return chk.copy()
def set_chunk(self, chk):
self.chk = chk
#-----------------------------------------------------------------------
# C2 - Copying functions
#-----------------------------------------------------------------------
# 1 - Dimensions
# ~~~~~~~~~~~~~~~
[docs] def copy_dim(self, gdat):
"""
Copy the dimensions from another geodata instance
Parameters
----------
gdat : geodata
The input instance from which the dimensions are copied
Examples
--------
>>> gdat_in = example_geodata()
>>> gdat_out = geodata('../examples/example_copy.nc',
... iotype='w', dtype='example2d')
>>> print gdat_out
================================================================
Filename : ../examples/example_copy.nc
Format : NETCDF3_64BIT
I/O access : w
Data type : example2d
Dimensions : TZYX
Unlimited time dimension : True
================================================================
Default chunk:
--------------
None
<BLANKLINE>
Dimensions:
-----------
<BLANKLINE>
Variables:
----------
================================================================
<BLANKLINE>
>>> gdat_out.copy_dim(gdat_in)
>>> print gdat_out
================================================================
Filename : ../examples/example_copy.nc
Format : NETCDF3_64BIT
I/O access : w
Data type : example2d
Dimensions : TZYX
Unlimited time dimension : True
================================================================
Default chunk:
--------------
None
<BLANKLINE>
Dimensions:
-----------
T - t: 0
Z - z: 9
Y - y: 37
X - x: 73
<BLANKLINE>
Variables:
----------
================================================================
<BLANKLINE>
"""
if not isinstance(gdat, geodata):
raise TypeError, 'gdat input must be a geodata'
self._checkIO()
self._nc_open()
self._check_datatype(gdat)
# Check if gdat is compatible with the current instance
gdimensions = gdat.get_dimensions()
for dim in 'TZYX':
if dim in self.dimensions and dim in gdimensions:
if dim=='T'and self.time_unlimited==True:
# Particular processing for forcing the time record to
# be unlimited
self.netcdf.createDimension(self.dim_name['T'],None)
else:
self.netcdf.createDimension(self.dim_name[dim],
gdat.get_dim(dim))
self._nc_close()
# 2 - Dimension variables
# ~~~~~~~~~~~~~~~~~~~~~~~~
[docs] def copy_dimvarstruct(self, gdat, dim):
"""
Create and copy the structure of a dimension variable.
Arguments
---------
gdat : geodata
Input data from which the dimension will be read
dim : {'T', 'Z', 'Y', 'X'}
Key of the dimension dim : {}
"""
self._checkIO()
self._check_datatype(gdat)
nc = gdat.get_nc()
if hasattr(nc.variables[self.dim_var[dim]], '_FillValue'):
fval = nc.variables[self.dim_var[dim]]._FillValue
else:
fval = 0.
dtyp = nc.variables[self.dim_var[dim]].dtype
self.create_dimvar(dim, dtype=dtyp, fill_value=fval)
self._nc_open()
for attr in nc.variables[self.dim_var[dim]].ncattrs():
if attr != '_FillValue':
# /!\ The attribute _FillValue cannot be changed in
# netCDF4 !!!
self.netcdf.variables[self.dim_var[dim]]\
.__setattr__(attr, nc.variables[self.dim_var[dim]]\
.getncattr(attr))
self.netcdf.variables[self.dim_var[dim]].\
set_auto_maskandscale(False)
self._nc_close()
[docs] def copy_coord(self, gdat, chk_in=None, chk_out=None):
"""
Copy all the coordinate variables from the instance gdat into
the current instance
Parameters
----------
gdata : geodata
The geodata instance from which the coordinate variables
are copied
chk_in : chunk
Define a subset of the input instance
chk_out : chunk
The corresponding subset in which the coordinate variables
are copied
"""
for dim in self.dimensions:
self.copy_dimvar(gdat, dim, chk_in=chk_in, chk_out=chk_out)
[docs] def copy_dimvar(self, gdat, dim, chk_in=None, chk_out=None):
"""
Copy the variables associated with the dimensions
"""
if not isinstance(dim, str):
raise TypeError, 'dim input must be a string'
if not isinstance(gdat, geodata):
raise TypeError, 'gdat input must be a geodata'
self._checkIO()
if not self._check_dim():
if chk_out == None and self.chk == None:
# Case : chk_out is not defined and there is no default
# chunk in the instance
self.copy_dim(gdat)
# -> The dimension are copied from the gdat
else:
# Other case: call create_dimension function with chk set
# to chk_out
self.create_dim(chk=chk_out)
self.copy_dimvarstruct(gdat, dim)
chk_in = gdat.check_chunk(chk_in)
chk_out = self.check_chunk(chk_out)
nc = gdat.get_nc()
self._nc_open()
if self.latlon2D == True and (dim == 'Y' or dim == 'X'):
# Special processing for 2D lat/lon fields
self.netcdf.variables[self.dim_var[dim]]\
[chk_out.min('Y'):chk_out.max('Y'),\
chk_out.min('X'):chk_out.max('X')]\
= \
np.array(nc.variables[self.dim_var[dim]]\
[chk_in.min('Y'):chk_in.max('Y'),\
chk_in.min('X'):chk_in.max('X')])
else:
self.netcdf.variables[self.dim_var[dim]]\
[chk_out.min(dim):chk_out.max(dim)]\
= \
np.array(nc.variables[self.dim_var[dim]]\
[chk_in.min(dim):chk_in.max(dim)])
self._nc_close()
def copy_time(self,gdat,chk_in=None,chk_out=None):
self.copy_dimvar(gdat,'T',chk_in=chk_in,chk_out=chk_out)
def copy_depth(self,gdat,chk_in=None,chk_out=None):
self.copy_dimvar(gdat,'Z',chk_in=chk_in,chk_out=chk_out)
def copy_lat(self,gdat,chk_in=None,chk_out=None):
self.copy_dimvar(gdat,'Y',chk_in=chk_in,chk_out=chk_out)
def copy_lon(self,gdat,chk_in=None,chk_out=None):
self.copy_dimvar(gdat,'X',chk_in=chk_in,chk_out=chk_out)
def copy_latlon(self,gdat,chk_in=None,chk_out=None):
self.copy_lat(gdat,chk_in=chk_in,chk_out=chk_out)
self.copy_lon(gdat,chk_in=chk_in,chk_out=chk_out)
# 2 - Variables
# ~~~~~~~~~~~~~~
[docs] def copy_varstruct(self, gdat, gvar_in, gvar_out=None):
"""
Copy the structure of the variable in the geodata
instance geodat to a new variable in self
"""
# Check the arugments
if isinstance(gvar_in, geovar):
pass
elif isinstance(gvar_in, str):
gvar_in = geovar(gvar_in, dtype=gdat.get_var_dtype(gvar_in),
fill_value=gdat.get_var_FillValue(gvar_in),
axis = gdat.get_var_axis(gvar_in))
else:
raise TypeError, "Parameter must be either string or geovar"
if gvar_out == None:
gvar_out = gvar_in
elif isinstance(gvar_out, geovar):
pass
elif isinstance(gvar_out, str):
gvar_out = geovar(gvar_out, dtype=gvar_in.dtype,
fill_value=gvar_in.fill_value,
axis = gvar_in.axis)
else:
raise TypeError, "Parameter must be either string or geovar"
self._checkIO()
self._check_datatype(gdat)
nc = gdat.get_nc()
for attr in nc.variables[gvar_in.name].ncattrs():
if not attr in gvar_out.attrs():
gvar_out.set_attr(attr, gdat.get_var_attr(gvar_in,
attr))
self.create_var(gvar_out)
def copy_var(self, gdat, gvar_in, gvar_out=None, chk_in=None,
chk_out=None):
# Check the arugments
if isinstance(gvar_in, geovar):
pass
elif isinstance(gvar_in, str):
gvar_in = geovar(gvar_in, dtype=gdat.get_var_dtype(gvar_in),
fill_value=gdat.get_var_FillValue(gvar_in),
axis = gdat.get_var_axis(gvar_in))
else:
raise TypeError, "Parameter must be either string or geovar"
if gvar_out == None:
gvar_out = gvar_in
elif isinstance(gvar_out, geovar):
pass
elif isinstance(gvar_out, str):
gvar_out = geovar(gvar_out, dtype=gvar_in.dtype,
fill_value=gvar_in.fill_value,
axis = gvar_in.axis)
else:
raise TypeError, "Parameter must be either string or geovar"
self._checkIO()
self._check_datatype(gdat)
chk_in = gdat.check_chunk(chk_in)
chk_out = self.check_chunk(chk_out)
gdimensions = gdat.get_dimensions()
# Copy the structure of the input variable into the output
# variable if the variable is not already in the netCDF
# file
if not gvar_out.name in self.netcdf.variables:
self.copy_varstruct(gdat, gvar_in, gvar_out=gvar_out)
nc = gdat.get_nc()
dim = self.get_var_axis(gvar_out.name)
self._nc_open()
# 1D case
if len(dim) == 1:
self.netcdf.variables[gvar_out.name]\
[chk_out.min(dim[0]):chk_out.max(dim[0])]\
= \
np.array(nc.variables[gvar_in.name]\
[chk_in.min(dim[0]):chk_in.max(dim[0])])
# 2D case
elif len(dim) == 2:
self.netcdf.variables[gvar_out.name]\
[chk_out.min(dim[0]):chk_out.max(dim[0]),\
chk_out.min(dim[1]):chk_out.max(dim[1])] \
= \
np.array(nc.variables[gvar_in.name]\
[chk_in.min(dim[0]):chk_in.max(dim[0]),\
chk_in.min(dim[1]):chk_in.max(dim[1])])
# 3D case
elif len(dim) == 3:
for i in range(chk_in.size_dim(dim[0])):
self.netcdf.variables[gvar_out.name]\
[chk_out.min(dim[0])+i,\
chk_out.min(dim[1]):chk_out.max(dim[1]),\
chk_out.min(dim[2]):chk_out.max(dim[2])]\
= \
np.array(nc.variables[gvar_in.name]\
[chk_in.min(dim[0])+i,\
chk_in.min(dim[1]):chk_in.max(dim[1]),\
chk_in.min(dim[2]):chk_in.max(dim[2])])
# 4D case
elif len(dim) == 4:
for i in range(chk_in.size_dim(dim[0])):
self.netcdf.variables[gvar_out.name]\
[chk_out.min(dim[0])+i,\
chk_out.min(dim[1]):chk_out.max(dim[1]),\
chk_out.min(dim[2]):chk_out.max(dim[2]),\
chk_out.min(dim[3]):chk_out.max(dim[3])]\
= \
np.array(nc.variables[gvar_in.name]\
[chk_in.min(dim[0])+i,\
chk_in.min(dim[1]):chk_in.max(dim[1]),\
chk_in.min(dim[2]):chk_in.max(dim[2]),\
chk_in.min(dim[3]):chk_in.max(dim[3])])
self._nc_close()
#-----------------------------------------------------------------------
# E - Extraction functions
#-----------------------------------------------------------------------
def get_size(self):
chk = self.get_chunk()
return chk.total_size()
#-----------------------------------------------------------------------
# R - Reading functions
#-----------------------------------------------------------------------
# 1 - Dimensions
# ~~~~~~~~~~~~~~~
[docs] def get_dim(self, dim):
"""
Get the size of a dimension int the netCDF4 instance nc_in
along one dimension (T, Z, Y or X)
"""
if not isinstance(dim, str):
raise TypeError, 'dim input must be a string'
self._nc_open()
if dim in self.dimensions:
n = len(self.netcdf.dimensions[self.dim_name[dim]])
else:
n = 1
return n
self._nc_close()
[docs] def get_dim_time(self):
"""
Get the dimension of the time axis
Returns
-------
n_t : int
The dimension of the time axis
"""
n_t = self.get_dim('T')
return n_t
[docs] def get_dim_depth(self):
"""
Return the dimension of the vertical axis
Returns
-------
n_z : int
The dimension of the vertical axis
"""
n_z = self.get_dim('Z')
return n_z
[docs] def get_dim_lat(self):
"""
Return the dimension of the latitudinal axis
Returns
-------
n_lat : int
The dimension of the latitudinal axis
"""
n_lat = self.get_dim('Y')
return n_lat
[docs] def get_dim_lon(self):
"""
Return the dimension of the longitudinal axis
Returns
-------
n_lon : int
The dimension of the longitudinal axis
"""
n_lon = self.get_dim('X')
return self.get_dim('X')
[docs] def get_dim_latlon(self):
"""
Return the dimension of the latitudinal and longitudinal axis
Returns
-------
n_lon : int
The dimension of the longitudinal axis
n_lat : int
The dimension of the latitudinal axis
"""
n_lat, n_lon = self.get_dim_lat(),self.get_dim_lon()
return n_lat, n_lon
# 2 - Dimension variables
# ~~~~~~~~~~~~~~~~~~~~~~~~
[docs] def get_dimvar(self, dim, chk=None):
'''
Return the coordinates associated with the dimension dim from
the netCDF4 instance nc_in or only a subset optionally
defined by a chunk instance
'''
if not isinstance(dim,str):
raise TypeError, 'dim input must be a string'
self._nc_open()
chk = self.check_chunk(chk)
if dim in self.dimensions:
if self.latlon2D==True and (dim=='Y' or dim=='X'):
# Special processing for 2D lat/lon fields
dimvar = \
np.array(self.netcdf.variables[self.dim_var[dim]]\
[chk.min('Y'):chk.max('Y'),chk.min('X'):chk.max('X')])
else:
dimvar = \
np.array(self.netcdf.variables[self.dim_var[dim]]\
[chk.min(dim):chk.max(dim)])
return np.squeeze(dimvar)
else:
raise ValueError, ('There is no dimension ' + dim +
'in the dimensions: ' + self.dimensions)
self._nc_close()
[docs] def get_time(self, chk=None):
"""
Return the time record, optionally a subset defined by a
chunk
Parameters
----------
chk : chunk, optional
Define the chunk to read in the NetCDF file
Returns
-------
time : 1darray
The time 1d/2d array read in the NetCDF file
"""
time = self.get_dimvar('T', chk=chk)
return time
[docs] def get_depth(self, chk=None):
"""
Return the vertical 1d array, optionally a subset defined by a
chunk
Parameters
----------
chk : chunk, optional
Define the chunk to read in the NetCDF file
Returns
-------
depth : 1darray
The depth 1d array read in the NetCDF file
"""
return self.get_dimvar('Z', chk=chk)
[docs] def get_lat(self, chk=None):
"""
Return the latitudinal 1d/2d array, optionally a subset
defined by a chunk chk
Parameters
----------
chk : chunk, optional
Define the chunk to read in the NetCDF file
Returns
-------
lat : 1darray/2darray
The latitude 1d/2d array read in the NetCDF file
"""
lat = self.get_dimvar('Y', chk=chk)
return lat
[docs] def get_lon(self, chk=None):
"""
Return the longitudinal 1d/2d array, optionally a subset
defined by a chunk
Parameters
----------
chk : chunk, optional
Define the chunk to read in the NetCDF file
Returns
-------
lon : 1darray/2darray
The longitude 1d/2d array read in the NetCDF file
"""
lon = self.get_dimvar('X', chk=chk)
return lon
[docs] def get_latlon(self, chk=None):
"""
Return the latitudinal and longitudinal 1d/2d array,
optionally a subset defined by a chunk
Parameters
----------
chk : chunk, optional
Define the chunk to read in the NetCDF file
Returns
-------
lat : 1darray/2darray
The latitude 1d/2d array read in the NetCDF file
lon : 1darray/2darray
The longitude 1d/2d array read in the NetCDF file
"""
lat, lon = self.get_lat(chk=chk), self.get_lon(chk=chk)
return lat, lon
# 3 - Variables
# ~~~~~~~~~~~~~~
[docs] def get_var(self, gvar, chk=None, dtype='f8'):
"""
Read a variable in the netCDF file
Parameters
----------
gvar : string or geovar
Input variable can only be described by its name or by a
a more complex instance geovar
chk : chunk, optional
Specify a subset to read, all the data is read by default
dtype : data-type, optional
The desired data-type of the numpy array in which the data
is stored. Default is double precision
Returns
-------
out : ndarray
A numpy array of the dimension of the variable, note that
dimensions of length 1 are removed.
"""
if isinstance(gvar, geovar):
var_name = gvar.name
elif isinstance(gvar, str):
var_name = gvar
else:
raise TypeError, 'var must be either a string or a geovar'
dim = self.get_var_axis(var_name)
self._nc_open()
chk = self.check_chunk(chk)
if len(dim)==1:
out = \
np.array(self.netcdf.variables[var_name]\
[chk.min(dim[0]):chk.max(dim[0])],\
dtype=dtype)
elif len(dim)==2:
out = \
np.array(self.netcdf.variables[var_name]\
[chk.min(dim[0]):chk.max(dim[0]),\
chk.min(dim[1]):chk.max(dim[1])],\
dtype=dtype)
elif len(dim)==3:
out = \
np.array(self.netcdf.variables[var_name]\
[chk.min(dim[0]):chk.max(dim[0]),\
chk.min(dim[1]):chk.max(dim[1]),\
chk.min(dim[2]):chk.max(dim[2])],\
dtype=dtype)
elif len(dim)==4:
out = \
np.array(self.netcdf.variables[var_name]\
[chk.min(dim[0]):chk.max(dim[0]),\
chk.min(dim[1]):chk.max(dim[1]),\
chk.min(dim[2]):chk.max(dim[2]),\
chk.min(dim[3]):chk.max(dim[3])],\
dtype=dtype)
self._nc_close()
return np.squeeze(out)
[docs] def get_var_FillValue(self, gvar):
"""
Return the value of the FillValue attribute
Parameters
----------
gvar : string or geovar
Input variable can only be described by its name or by a
a more complex instance geovar
attr_name : string
Name of the variable attribute in the NetCDF file
Returns
-------
out : unknown
The value of the attribute
"""
if isinstance(gvar, geovar):
var_name = gvar.name
elif isinstance(gvar, str):
var_name = gvar
else:
raise TypeError, "Parameter must be either string or geovar"
self._nc_open()
if hasattr(self.netcdf.variables[var_name], '_FillValue'):
fval = self.netcdf.variables[var_name]._FillValue
elif hasattr(self.netcdf.variables[var_name], 'missing_value'):
fval = self.netcdf.variables[var_name].missing_value
else:
fval = None
self._nc_close()
return fval
[docs] def get_var_attr(self, gvar, attr_name):
"""
Return the value of the variable attribute
Parameters
----------
gvar : string or geovar
Input variable can only be described by its name or by a
a more complex instance geovar
attr_name : string
Name of the variable attribute in the NetCDF file
Returns
-------
out : unknown
The value of the attribute
"""
if isinstance(gvar, geovar):
var_name = gvar.name
elif isinstance(gvar, str):
var_name = gvar
else:
raise TypeError, "Parameter must be either string or geovar"
self._nc_open()
if hasattr(self.netcdf.variables[var_name], attr_name):
attr = self.netcdf.variables[var_name].getncattr(attr_name)
else:
attr = None
self._nc_close()
return attr
[docs] def get_var_dtype(self, gvar):
"""
Return the type of the variable in the netcdf file
Parameters
----------
gvar : string or geovar
Input variable can only be described by its name or by a
a more complex instance geovar
Returns
-------
out : string
The type of the variable in the standard convention (ex:
'f4')
"""
if isinstance(gvar, geovar):
var_name = gvar.name
elif isinstance(gvar, str):
var_name = gvar
else:
raise TypeError, "Parameter must be either string or geovar"
self._nc_open()
out = self.netcdf.variables[var_name].dtype
self._nc_close()
return out
[docs] def get_var_names(self):
"""
Return the list of the variable names without the dimension
variables
Returns
-------
out : list of string
A list of string that contains the variable names
"""
self._nc_open()
dict_var = self.netcdf.variables
for dim in self.dimensions:
del dict_var[self.dim_var[dim]]
self._nc_close()
# Force the string type because of a common use of unicode type
out = [str(elem) for elem in dict_var.keys()]
return out
[docs] def get_var_axis(self, gvar):
"""
Return the axis attribute of the variable in the NetCDF file
Parameters
----------
gvar : string or geovar
Input variable can only be described by its name or by a
a more complex instance geovar
Returns
-------
axis : string
The axis of the variable containing which is a combination
of the available axis {'T', 'Z', 'Y', 'X'}
"""
if isinstance(gvar, geovar):
var_name = gvar.name
elif isinstance(gvar, str):
var_name = gvar
else:
raise TypeError, 'var must be either a string or a geovar'
axis = ''
self._nc_open()
for key in self.netcdf.variables[var_name].dimensions:
if key in self.dim_name.values():
axis += \
self.dim_name.keys()[self.dim_name.values().index(key)]
self._nc_close()
return axis
#-----------------------------------------------------------------------
# W - Writing functions
#-----------------------------------------------------------------------
# 1 - Dimensions
# ~~~~~~~~~~~~~~~
[docs] def create_dim(self, chk=None):
"""
Create the dimensions in the netCDF file from a chunk
Arguments
---------
chk : chunk, optional
The size of the dimensions to create
"""
if chk == None and self.chk == None:
raise RuntimeError, "The dimension cannot be defined"
self._checkIO()
self._nc_open()
chk = self.check_chunk(chk)
for dim in self.dimensions:
if dim == 'T' and self.time_unlimited == True:
self.netcdf.createDimension(self.dim_name[dim], None)
else:
self.netcdf.createDimension(self.dim_name[dim],
chk.size_dim(dim))
self._nc_close()
[docs] def create_dimvar(self, dim, dtype='f4', fill_value=-9999.):
"""
Create the dimension variables in the netCDF file
"""
self._checkIO()
self._nc_open()
if self.latlon2D == True and (dim == 'Y' or dim == 'X'):
# Special processing for 2D lat/lon fields
self.netcdf.createVariable(self.dim_var[dim],
dtype,
(self.dim_name['Y'],
self.dim_name['X'], ),
fill_value=fill_value)
else:
self.netcdf.createVariable(self.dim_var[dim],
dtype, (self.dim_name[dim], ),
fill_value=fill_value)
# Force auto_maskandscale to False
self.netcdf.variables[self.dim_var[dim]]\
.set_auto_maskandscale(False)
self._nc_close()
[docs] def create_coord(self, dtype='f4', fill_value=-9999.):
"""
Create all the dimension variables in the netCDF file
"""
for dim in self.dimensions:
self.create_dimvar(dim, dtype=dtype, fill_value=fill_value)
[docs] def save_dimvar(self, dim, value, chk=None):
"""
Write the values of a dimension variable in the netCDF file
Arguments
---------
dim : {'T', 'Z', 'Y', 'X'}
Key string of the dimensions
values : ndarray
The values of variable to save
chk : chunk, optional
The subset of the NetCDF file where the arrays will be
saved
"""
self._checkIO()
# Try to create the variable, if it already exists an
# exception is raised and it goes directly to the next step
try:
self.create_dimvar(dim)
except:
pass
self._nc_open()
dimvar = self.netcdf.variables[self.dim_var[dim]]
chk = self.check_chunk(chk)
# 2D latitude/longitude case
if self.latlon2D == True and (dim == 'Y' or dim == 'X'):
dimvar[\
chk.min('Y'):chk.max('Y'),\
chk.min('X'):chk.max('X')]\
= value
# 1D case
else:
dimvar[\
chk.min(dim):chk.max(dim)]\
= value
# Force auto_maskandscale to False
dimvar.set_auto_maskandscale(False)
self._nc_close()
[docs] def save_time(self, time, chk=None):
"""
Write the values of the time variable in the netCDF file
Arguments
---------
time : 1darray
Values of the time variable
chk : chunk, optional
The subset of the NetCDF file where the array will be
saved
"""
self.save_dimvar('T', time, chk=chk)
[docs] def save_depth(self, depth, chk=None):
"""
Write the values of the vertical variable in the netCDF file
Arguments
---------
depth : 1darray
Values of the vertical variable
chk : chunk, optional
The subset of the NetCDF file where the array will be
saved
"""
self.save_dimvar('Z', depth, chk=chk)
[docs] def save_lat(self, lat, chk=None):
"""
Write the values of the latitudinal variable in the netCDF file
Arguments
---------
lat : 1darray/2darray
Values of the latitudinal variable
chk : chunk, optional
The subset of the NetCDF file where the array will be
saved
"""
self.save_dimvar('Y', lat, chk=chk)
[docs] def save_lon(self, lon, chk=None):
"""
Write the values of the longitudinal variable in the netCDF file
Arguments
---------
lon : 1darray/2darray
Values of the longitudinal variable
chk : chunk, optional
The subset of the NetCDF file where the array will be
saved
"""
self.save_dimvar('X', lon, chk=chk)
[docs] def save_latlon(self, lat, lon, chk=None):
"""
Write the values of latidudinal and longitudinal variable in
the netCDF file
Arguments
---------
lat : 1darray/2darray
Values of the latitudinal variable
lon : 1darray/2darray
Values of the longitudinal variable
chk : chunk, optional
The subset of the NetCDF file where the arrays will be
saved
"""
self.save_dimvar('Y', lat, chk=chk)
self.save_dimvar('X', lon, chk=chk)
# 3 - Variables
# ~~~~~~~~~~~~~~
[docs] def create_var(self, gvar):
"""
Create the variable with its attributes in the netCDF file
Parameters
----------
gvar : geovar or string
The variable to create in the netCDF file defined by a
string or a complex object geovar
"""
if isinstance(gvar, geovar):
pass
elif isinstance(gvar, str):
gvar = geovar(gvar)
else:
raise TypeError, 'var must be either a string or a geovar'
self._checkIO() # Check write access
# Check if the dimensions have been created
if not self._check_dim():
self.create_dim()
self._nc_open()
var_name = gvar.name
fval = gvar.fill_value
dtype = gvar.dtype
# Check if the variable already exists in the NetCDF file
if var_name in self.netcdf.variables:
self._nc_close()
return
# If gvar has the attribute 'axis', create the variable
# according to those dimensions, otherwise create the variable
# according to the dimensions of the NetCDF file
if 'axis' in gvar.attrs():
dim = gvar.get_attr('axis')
else:
dim = self.dimensions
# 1D case
if len(dim) == 1:
self.netcdf.createVariable(var_name, dtype,
(self.dim_name[dim[0]],),
fill_value=fval)
# 2D case
elif len(dim) == 2:
self.netcdf.createVariable(var_name, dtype,
(self.dim_name[dim[0]],
self.dim_name[dim[1]],),
fill_value=fval)
# 3D case
elif len(dim) == 3:
self.netcdf.createVariable(var_name, dtype,
(self.dim_name[dim[0]],
self.dim_name[dim[1]],
self.dim_name[dim[2]],),
fill_value=fval)
# 4D case
elif len(dim) == 4:
self.netcdf.createVariable(var_name, dtype,
(self.dim_name[dim[0]],
self.dim_name[dim[1]],
self.dim_name[dim[2]],
self.dim_name[dim[3]],),
fill_value=fval)
# Copy the attributes of gvar in the corresponding variables in
# the NetCDF file
# /!\ The attribute _FillValue can only be set at the creation
# of the variable
for attr in gvar.attrs():
if not attr == '_FillValue':
self.netcdf.variables[var_name].__setattr__\
(attr, gvar.get_attr(attr))
# Force auto_maskandscale to False
self.netcdf.variables[var_name].set_auto_maskandscale(False)
self._nc_close()
[docs] def save_var(self, gvar, values, chk=None):
"""
Write the variable values in the corresponding variable of
the netCDF file
Arguments
---------
gvar : geovar or str
The geovar or name of the variable in the NetCDF file
values : ndarray
The values of variable to save
chk : chunk, optional
The subset of the NetCDF file where the arrays will be
saved
"""
# Check arguments
if isinstance(gvar, geovar):
var_name = gvar.name
elif isinstance(gvar, str):
var_name = gvar
else:
raise TypeError, 'var must be either a string or a geovar'
chk = self.check_chunk(chk)
self._checkIO()
# Try to create the variable, if it already exists an
# exception is raised and it goes directly to the next step
try:
self.create_var(gvar)
except:
pass
dim = self.get_var_axis(var_name)
self._nc_open()
var_nc = self.netcdf.variables[var_name]
# 1D case
if len(dim) == 1:
var_nc[chk.min(dim[0]):chk.max(dim[0])] = values
# 2D case
elif len(dim) == 2:
var_nc[chk.min(dim[0]):chk.max(dim[0]),
chk.min(dim[1]):chk.max(dim[1])] = values
# 3D case
elif len(dim) == 3:
var_nc[chk.min(dim[0]):chk.max(dim[0]),
chk.min(dim[1]):chk.max(dim[1]),
chk.min(dim[2]):chk.max(dim[2])] = values
# 4D case
elif len(dim) == 4:
var_nc[chk.min(dim[0]):chk.max(dim[0]),
chk.min(dim[1]):chk.max(dim[1]),
chk.min(dim[2]):chk.max(dim[2]),
chk.min(dim[3]):chk.max(dim[3])] = values
# Force auto_maskandscale to False
var_nc.set_auto_maskandscale(False)
self._nc_close()
#=======================================================================
# Class calculation
#=======================================================================
[docs]class calculation:
"""
Attributes
----------
data_in : geodata
Input geophysical data
data_out : geodata
Output geophysiscal data
nb_tempfiles :
Number of temporary files
- parallelized <'boolean'>: Define if the calculation
is parallelized or not
- nb_proc <'int'>: total number of processors
- proc_indx <'in'>: index of current processor
- overwrite <'boolean'>: overwrite or not temporary and output files
- chk_xtrac <'chunk'>: chunk defining the structure to extract from
the input file to generate the output file
- fcounter <'string'>: path of temporary counter file
- chunking <'dictionnary['in', <'chunk'>, 'temp_in', <'chunk'>,
'temp_out', <'chunk'>, 'out',<'chunk'>]'>
Constructor:
- data_in <'geodata'>: characteristics of input data
- data_out <'geodata'>: characteristics of output data
- chk <'chunk'>: chunk defining the structure to extract from the
input file to generate the output file
- parallelized <'boolean'>: Define if the calculation is
parallelized or not
- overwrite <'boolean'>: overwrite or not temporary files
"""
def __init__(self, gdat_in, gdat_out, list_gvar_in,
list_gvar_out=None, chk_in=None, chk_out=None,
parallelized=False, overwrite=False):
"""
Arguments
---------
gdat_in : geodata
Input data
gdat_out : geodata
Output data
list_gvar_in :
List of input variables
list_gvar_out : optional
List of output variables
chk_in : chunk, optional
chk_out : chunk, optional
parallelized : bool, optional
Set to True to use MPI parallelization
overwrite : bool, optional
Define existing files must be overwritten
"""
# Input geodata
if isinstance(gdat_in, geodata):
self.gdat_in = gdat_in
else:
raise TypeError, 'gdat_in must be a geodata type'
# Output geodata
if isinstance(gdat_out, geodata):
self.gdat_out = gdat_out
else:
raise TypeError, 'gdat_out must be a geodata type'
# List of input/output variables
self.list_gvar_in = list_gvar_in
if list_gvar_out == None:
self.list_gvar_out = list_gvar_in
else:
self.list_gvar_out = list_gvar_out
# Input/output chunks
if chk_in == None:
self.chk_in = self.gdat_in.get_chunk()
else:
self.chk_in = gdat_in.check_chunk(chk_in)
if chk_out == None:
self.chk_out = self.chk_in.copy()
else:
self.chk_out = chk_out
# Number of temporary files
self.nb_tempfiles = 1
# Get the total number of cores and the current core ID
# if parallelized compuation
self.parallelized = parallelized
if parallelized == True:
from mpi4py import MPI
#Parallelisation initialization
self.comm = MPI.COMM_WORLD
self.nb_proc = self.comm.Get_size()
self.proc_indx = self.comm.Get_rank()
else:
self.nb_proc = 1
self.proc_indx = 0
# Overwrite property
self.overwrite = overwrite
# File counter
self.fcounter = (gdat_out.filename[:-3] + '_temp#' +
str(self.proc_indx)+'.p')
if not os.path.isfile(self.fcounter) or overwrite:
# Initialize a backup file for indexing current operating
# file
self.write_counter(0,0)
# Temporary input/output
self.gdat_temp_in = gdat_in
self.gdat_temp_out = gdat_out
self.chunking = {'in':self.chk_in, 'temp':self.chk_out,
'out':self.chk_out}
[docs] def optimize_1D(self, nb_var, dim='lon', free_mem=None):
"""
Optimize the computation by defining slices along the dimension
specified
"""
if free_mem == None:
import subprocess
# Get the available RAM
free = subprocess.Popen(("free -t | grep buffers/cache | "
"awk '{ print $NF }'"),
shell=True, stdout=subprocess.PIPE)
free_mem = int(free.communicate()[0])
else:
free_mem = int(free_mem)
self.disp("Memory available: %.0f Mo " % (free_mem / 1000))
# Compute the memory occupied by 64-bit variable
var_mem = self.chk_in.total_size() * 8 / 1000
# Determine the number and sizes of slices for processing
# the dataset according to the number of variables used in the
# computation
nb_slic_lon = (nb_var * var_mem) / free_mem + 1
# Copy
chk_in = self.chk_in.copy()
chk_out = self.chk_out.copy()
chk_temp = chk_out.copy()
self.chunking = []
n_lat, n_lon = chk_in.size_latlon()
if dim == 'lon':
nb_slic_lat = self.nb_proc
# Get the minimum and maximum longitudal index
i_min = self.chk_in['lon_min']
i_max = self.chk_in['lon_max']
j_min = self.chk_in['lat_min']
j_max = self.chk_in['lat_max']
n_step_lon = n_lon / nb_slic_lon
n_step_lat = n_lat / nb_slic_lat
self.nb_tempfiles = nb_slic_lon * nb_slic_lat
for i in range(nb_slic_lon):
chk_in['lon_min'] = i_min + i * n_step_lon
chk_out['lon_min'] = i * n_step_lon
chk_temp['lon_min'] = 0
if not i == (nb_slic_lon - 1):
chk_in['lon_max'] = i_min + (i + 1) * n_step_lon
chk_out['lon_max'] = (i + 1) * n_step_lon
chk_temp['lon_max'] = n_step_lon
else:
chk_in['lon_max'] = i_max
chk_out['lon_max'] = n_lon
chk_temp['lon_max'] = n_lon - i * n_step_lon
for j in range(nb_slic_lat):
chk_in['lat_min'] = j_min + j * n_step_lat
chk_out['lat_min'] = j * n_step_lat
chk_temp['lat_min'] = 0
if not j == (nb_slic_lat - 1):
chk_in['lat_max'] = j_min + (j + 1) * n_step_lat
chk_out['lat_max'] = (j + 1) * n_step_lat
chk_temp['lat_max'] = n_step_lat
else:
chk_in['lat_max'] = j_max
chk_out['lat_max'] = n_lat
chk_temp['lat_max'] = (n_lat - j * n_step_lat)
self.chunking.append({'in':chk_in.copy(),
'temp':chk_temp.copy(),
'out':chk_out.copy()})
elif dim == 'time':
nb_slic = chk_in.size_time()
for k in range(nb_slic):
self.nb_tempfiles = nb_slic
chk_in['t_min'] = k
chk_in['t_max'] = k + 1
chk_out['t_min'] = k
chk_out['t_max'] = k + 1
chk_temp['t_min'] = 0
chk_temp['t_max'] = 1
self.chunking.append({'in':chk_in.copy(),
'temp':chk_temp.copy(),
'out':chk_out.copy()})
[docs] def start(self):
"""
"""
self.disp("Generation of " + str(self.nb_tempfiles) +
" temporary files...")
self.generate_temp_in()
self.generate_temp_out()
self.wait()
self.disp("Temporary files have been correctly generated")
[docs] def stop(self):
"""
"""
self.wait()
#Delete temporary NetCDF files
if self.proc_indx == 0:
for i in range(self.nb_tempfiles):
ftemp_out = (self.gdat_out.filename[:-3] + '_temp_out#'
+ str(i) + '.nc')
os.remove(ftemp_out)
#Delete
os.remove(self.fcounter)
[docs] def wait(self):
"""
"""
if self.parallelized==True:
#Wait for all processus to finish
self.comm.Barrier()
[docs] def disp(self, txt):
"""
"""
if self.proc_indx == 0:
print txt
[docs] def delete(self, filename):
"""
"""
if self.proc_indx == 0:
os.remove(filename)
[docs] def generate_file_out(self,mode='same'):
"""
Generate the output files
"""
if self.proc_indx == 0:
if (not os.path.isfile(self.gdat_out.get_filename()) or
self.overwrite):
self.gdat_out.nc_create()
if mode == 'same':
self.gdat_out.extract_struct(self.gdat_in, \
self.list_gvar_in, list_gvar_out=self.list_gvar_out, \
chk_in=self.chk_in, chk_out=self.chk_out)
elif mode == 'new':
self.gdat_out.create_dim(self.chk_out)
self.gdat_out.create_coord()
for namevar in self.list_gvar_out:
self.gdat_out.create_var(namevar)
self.wait()
self.disp("Ouput file has been correctly generated")
[docs] def generate_temp_out(self):
"""
"""
self.gdat_temp_out=[]
for k in range(self.nb_tempfiles):
ftemp = self.gdat_out.filename[:-3]+'_temp_out#'+str(k)+'.nc'
gdat_temp = geodata(ftemp,\
iotype = 'w',\
dtype = self.gdat_out.get_dtype(),\
dimensions = self.gdat_out.get_dimensions(),\
chk = self.chunking[k]['temp'],init=False)
self.gdat_temp_out.append(gdat_temp)
if k % self.nb_proc == self.proc_indx:
if not os.path.isfile(ftemp) or self.overwrite:
gdat_temp.nc_create()
# Write or overwrite existing temporary
# files
gdat_temp.create_dim()
for namevar in self.list_gvar_out:
gdat_temp.create_var(namevar)
[docs] def generate_temp_in(self):
"""
Generate the temporary geodata instances
"""
self.gdat_temp_in = []
for k in range(self.nb_tempfiles):
gdat_temp = geodata(self.gdat_in.get_filename(),\
iotype = 'w',\
dtype = self.gdat_in.get_dtype(),\
dimensions = self.gdat_in.get_dimensions(),\
chk = self.chunking[k]['in'])
self.gdat_temp_in.append(gdat_temp)
[docs] def concatenate(self, dim='lon'):
"""
Concatenate the temporary files along the prescribed dimension
Parameters:
-----------
dim : optional, {'lon', 'time'}
Dimension along which the concatenation is made
"""
self.wait()
if self.proc_indx == 0:
print "Files concatenation..."
n_t = self.chk_out.size_time()
if dim == 'lon':
n_lat, n_lon = self.gdat_out.get_dim_latlon()
chk = self.gdat_out.get_chunk()
for namevar in self.list_gvar_out:
axis = self.gdat_out.get_var_axis(namevar)
if 'T' in axis:
axis = axis.replace('T', '')
if len(axis) == 0:
break
for i in range(n_t):
htuple = ()
vtuple = ()
for k in range(self.nb_tempfiles):
chk_temp = self.chunking[k]['temp']
chk_temp['t_min'] = i
chk_temp['t_max'] = i + 1
var_temp = \
self.gdat_temp_out[k].get_var(namevar,
chk_temp)
vtuple += (var_temp,)
if (k + 1) % self.nb_proc == 0:
htuple += (np.vstack(vtuple),)
vtuple = ()
var = np.hstack(htuple)
chk['t_min'] = i
chk['t_max'] = i + 1
self.gdat_out.save_var(namevar, var, chk)
elif dim == 'time':
for namevar in self.list_gvar_out:
for k in range(self.nb_tempfiles):
chk_temp = self.chunking[k]['temp']
var = self.gdat_temp_out[k].get_var(namevar,
chk_temp)
chk_out = self.chunking[k]['out']
self.gdat_out.save_var(namevar, var,
chk=chk_out)
print "Concatenation done"
self.wait()
[docs] def get_temp_data(self, indx):
"""
"""
return self.gdat_temp_in[indx], self.gdat_temp_out[indx]
[docs] def write_counter(self, var_counter, slice_counter):
"""
"""
file_counter = open(self.fcounter,'w')
pickle.dump((var_counter, slice_counter), file_counter)
file_counter.close()
[docs] def read_counter(self):
"""
"""
file_counter = open(self.fcounter,'r')
var_counter, slice_counter = pickle.load(file_counter)
file_counter.close()
return var_counter, slice_counter
#=======================================================================
# Independant functions derived from geodata
#=======================================================================
def copy_dimvarstruct(gdat_in, gdat_out, dim):
"""
Create and copy the structure of a dimension variable of the
input NetCDF in the output NetCDF
Arguments
---------
gdat_in : geodata
Input data from which the dimension will be read
gdat_out : geodata
Output data in which the dimension will be created
dim : {'T', 'Z', 'Y', 'X'}
Key of the dimension dim : {}
Examples
--------
"""
try:
copy_dimvarstruct = gdat_out.copy_dimvarstruct
except AttributeError:
pass
return copy_dimvarstruct(gdat_in, dim)
def extract(gdat_in, gdat_out, list_gvar_in, list_gvar_out=None,
chk_in=None, chk_out=None):
"""
Extract the structure and the variables from a NetCDF file to
another one
Parameters
----------
gdat_in : geodata
Input data from which the extraction will be made
gdat_out : geodata
Output data in which the extraction will be made
list_gvar_out: list of string or geovar, optional
Names of the variable that will be copied in the output
file. The same names as the input variables are used by
default.
chk_in : chunk, optional
Define the chunk to read in the input file for the
extraction
chk_out : chunk, optional
Define the chunk to write the variable in the output file
extraction
fill_value : float, file
The fill_value that will be used for the variables in the
output netCDF file
"""
try:
extract = gdat_out.extract
except AttributeError:
pass
return extract(gdat_in, list_gvar_in, list_gvar_out=list_gvar_out,
chk_in=chk_in, chk_out=chk_out)
def extract_struct(gdat_in, gdat_out, list_gvar_in, list_gvar_out=None,
chk_in=None, chk_out=None):
"""
Extract the structure from a NetCDF file to another one
Parameters
----------
gdat_in : geodata
Input data from which the extraction will be made
gdat_out : geodata
Output data in which the extraction will be made
list_gvar_in : list of string or geovar
Names of the variables of the input netCDF file for which the
structure will be copied
list_gvar_out: list of string or geovar, optional
Names of the variables of the output netCDF file in which
the structure will be copied
chk_in : chunk, optional
Define the chunk to read in the input file for the
extraction
chk_out : chunk, optional
Define the chunk to write the variable in the output file
extraction
fill_value : float, file
The fill_value that will be used for the variables in the
output netCDF file
"""
try:
extract_struct = gdat_out.extract_struct
except AttributeError:
pass
extract_struct(gdat_in, list_gvar_in, list_gvar_out=list_gvar_out,
chk_in=chk_in, chk_out=chk_out)
#=======================================================================
# Generation of examples
#=======================================================================
def example_geodata(dtype='example2d', format='NETCDF3_64BIT',
dimensions='TZYX', time_unlimited=True):
if os.path.isfile('../examples/example.nc'):
os.remove('../examples/example.nc')
lon1d = np.arange(-180, 181, 5)
n_lon = np.size(lon1d)
lat1d = np.arange(-90, 91, 5)
n_lat = np.size(lat1d)
lon2d, lat2d = np.meshgrid(lon1d, lat1d)
depth = np.arange(0, 4001, 500)
n_z = np.size(depth)
yrtosec = 3600 * 24 * 365.25
timecounter = np.arange(0, 30 * yrtosec, yrtosec)
n_t = np.size(timecounter)
chk = chunk(t_min=0, t_max=n_t, z_min=0, z_max=n_z, lat_min=0,
lat_max=n_lat, lon_min=0, lon_max=n_lon)
gdat = geodata('../examples/example.nc', iotype='w', dtype=dtype,
dimensions=dimensions, time_unlimited=time_unlimited,
chk=chk, format=format)
gdat.create_dim()
gdat.create_coord()
gdat.save_time(timecounter)
gdat.save_depth(depth)
gdat.save_lat(lat2d)
gdat.save_lon(lon2d)
return gdat
def example_geovar():
gvar = geovar('exvar')
gvar.long_name = 'Example of a variable'
return gvar
if __name__ == '__main__':
gdata_example = example_geodata()
print gdata_example
import doctest
doctest.testmod()
os.system('rm ../examples/example*.nc')