Source code for cal_nc.nc_func

#! /usr/bin/env python3
# -*- coding: utf-8 -*-
r"""
Functions for reading, writing, and manipulating netCDF files.
"""


import datetime, pytz
import netCDF4
import os.path
import shutil

import pdb

import cal_proc
from cal_proc import *
from .nc_conf import *
from . import utils

__all__ = ['read_nc','process_nc','run_ncgen']


# Directory where temporary files are stored by default
default_tmp_dir = './tmp'


[docs]def read_nc(master,aux=[]): """Function for reading netCDF calibration files into DataSets. .. Note:: All nc files are left open so that the Datasets associated with each file can be operated on/with in the rest of the program. This is required whether or not the file was opened as read-only. Thus all Datasets should be explicitly closed when they are finished with. Args: master (:obj:`str` or :obj:`pathlib`): 'master' netCDF filename that is opened for read/write. aux (:obj:`list`): List of any additional netCDF filenames that are to be added/concatenated with master nc file. Auxillary nc files are opened as read only. Default is [], ie no auxillary files. Returns: Tuple of dataset from master netCDF and list of any auxillary Datasets. """ # Open master nc file for reading/writing master_ds = netCDF4.Dataset(master, mode='r+', format='NETCDF4') # master_ds = xr.open_dataset(master, # decode_times=True) # Open any auxillary files as read only aux_ds = [netCDF4.Dataset(f_, mode='r', format='NETCDF4') for f_ in aux] # aux_ds = [xr.open_dataset(master, # decode_times=True) for f_ in aux] return master_ds, aux_ds
[docs]def process_nc(master_nc, aux_nc=[], anc_files=[], out_nc=None, instr=None, updates={}): """Processes all netCDF and auxillary files. The master netCDF is copied to a temporary file which is opened for read/ write while any auxilary files are opened as read-only datasets. Modifications, concatenations, etc are done on the temporary dataset and once complete it is closed and moved to the output file which may be either the master file or `out_nc`. Updates are applied to the master dataset *after* any concatenation etc. Args: master_nc (:obj:`str`): Filename string of 'master' netCDF file. aux_nc (:obj:`list`, optional): List of any additional filename/s of netCDF files that are to be added/concatenated with master nc file. Default is [], ie no auxillary files. anc_files (:obj:`list`, optional): List of any ancillary files that are not netCDF and so need to be parsed before being injested into the dataset. Default is [], ie no ancillary files. out_nc (:obj:`str`, optional): Filename string of netCDF to be written. If None (default) or the same as `master_nc`, `master_nc` is overwritten. instr (:obj:`str`, optional): Identifying string of instrument which determines processor class. If `None` (default) then instrument is identified from `master_nc`. updates (:obj:`dict`, optional): All other updates to be applied to the final dataset. Default is {}. """ # Create a temporary copy of the master # Note that all 'master' operations are done on this temporary copy tmp_nc = '{}_tmp.nc'.format(os.path.splitext(master_nc)[0]) try: shutil.copy2(master_nc,tmp_nc) except Exception as err: # This always seems to give an error but does work pass # Create a instrument processor from the master nc file. This file remains # open until explicitly closed. master_ds = netCDF4.Dataset(tmp_nc, mode='r+', format='NETCDF4', diskless=True, persist=True) # If instrument name has not explicitly been given then obtain intrument # from master dataset if instr is None: try: instr = master_ds.getncattr('instr') except AttributeError: print('No instrument name given in master file.') return 1, 'Use --update instr instrument argument.' # Obtain appropriate instrument processing class instr_class = cal_proc.proc_map(instr) try: # Initialise the nc object master = instr_class(master_ds) except Exception as err: print('Instrument processing class not found: {}\n'.format(instr)) return 1, '' # Print out instrument processor help if required try: if ['help'] in updates.values(): print(master) # TODO(gn): Change this to a Raise return 1, '' except TypeError: # eg if args['update_arg'] is None pass # Extract any ancillary files from updates with key 'parsefile' try: anc_files.extend(updates.pop('_parsefile')) except KeyError: pass # Read in all additional nc files and add/append to master aux_ds = [] for aux in aux_nc: try: aux_ds.append(netCDF4.Dataset(aux, mode='r', format='NETCDF4')) except FileNotFoundError: continue else: master.append_dataset(aux_ds[-1]) # Read in any ancillary files for i,anc in enumerate(anc_files): if os.path.splitext(anc)[-1].lower() in ['.cfg','.config']: # Read in any configuration files. # Currently this code assumes that all information is included # within the config file. cfg_dict = read_config(anc) # Separate file to parse and associated variables (v_dicts, s_dicts) = zip(*[extract_specials(d_) for d_ in cfg_dict.values()]) # Obtain list of files to parse and variable groups p_files = [] var_dicts = [] grps = [] for i,s in enumerate(s_dicts): try: p_file = s.pop('_parsefile') except KeyError as err: p_files.append(None) else: # Attempt to find correct path p_files.append(utils.filepath(p_file,os.path.dirname(anc))) try: grp = s.pop('_group') except KeyError as err: grp = '' else: # If not given or given explicitly then is root group if grp in [None,'/']: grp = '' # 'Correct' variable names to include group path var_dicts.append({os.path.join(grp,k_):v_ for (k_,v_) in v_dicts[i].items()}) else: # If ancillary files are not config's then need to be parsed # directly. # Any attributes that are included in updates are associated with # the ancillary file. Thus if there are more than one anc file # then there should be the same number of identicaly update # parameters (unless they are to be broadcast to all updates. # Create a dictionary of variables/attributes associated with # the ancillary file, anc. # This comprehension pseudo broadcasts the last value if not enough # have been given in updates. # Note that if too many attributes have been given (compared to # the number of anc files) then these shall be lost! var_dicts = [{k_:(v_[i] if len(v_)>=i else v_[-1]) for (k_,v_) in updates.items()}] p_files = [anc] for p_,v_ in zip(p_files, var_dicts): # The zip obj will be as short as the shortest input, ie empty if var_dicts==[] if p_ == None: master.append_dict(v_) else: master.update_bincal_from_file(p_,v_) # Append any updates that are attributes rather than variables. # Attributes are skipped in cal_proc.generic.append_dict() # Every update must include a username and a history that are appended # to the root attributes 'username' and 'history'. If these are not # given then creation is dealt with in cal_proc.generic() try: update_by = updates.pop('username') except KeyError as err: update_by = None master.update_user(update_by) try: update_when = updates.pop('history') except KeyError as err: update_when = None master.update_hist(update_when) for attr,update in updates.items(): grp_, attr_ = os.path.split(attr) if grp_ == '' and attr not in master.ds.ncattrs(): # Update not a root attribute so skip continue elif attr_ not in master.ds[grp_].ncattrs(): # Update not a group attribute so skip continue master.update_attr(attr,update) # Add any version information that is missing from nc master.update_ver() # Close nc datasets for ds_ in [master_ds] + aux_ds: if ds_.isopen(): ds_.close() if out_nc == None: # Write back over existing master nc file shutil.move(tmp_nc, master_nc) else: shutil.move(tmp_nc, out_nc) return 0, ''
[docs]def run_ncgen(fin,fout,nc_fmt=3): """Create netCDF file from input cdl by calling external program, `ncgen`. Args: fin (:obj:`str` or :obj:`pathlib`): Filename of cdl file. fout (:obj:`str`): Filename of output netCDF file. nc_fmt (:obj:`int`): Integer specifying the format of the netCDF created, default is 3 for netCDF-4. Options are; 1. netcdf classic file format, netcdf-3 type model 2. netcdf 64 bit classic file format, netcdf-3 type model 3. netcdf-4 file format, netcdf-4 type model 4. netcdf-4 file format, netcdf-3 type model Note that using a netcdf-3 format will break group features and thus the entire `cal-nc` structure. """ import subprocess try: subprocess.check_call(['ncgen','-b','-k{:d}'.format(nc_fmt),'-o', fout,fin]) except subprocess.CalledProcessError as err: #print('\n',vars(err)) if err.returncode == 127: print('\nCommand not found. Check that ncgen is installed.\n') elif err.returncode == 1: # Generally error in cdl print('\nGeneration of netCDF from cdl file failed.') print(' {}'.format(fin)) print('Check input cdl syntax.\n') raise SystemExit except Exception as err: print('\nSomething went horribly wrong with the ncgen call\n') print('\n',err) pdb.set_trace() return