Package netCDF4 :: Module utils
[hide private]
[frames] | no frames]

Source Code for Module netCDF4.utils

  1  from __future__ import print_function 
  2   
  3  import sys 
  4  import numpy as np 
  5  from numpy import ma 
  6  import warnings 
  7  import getopt 
  8  import os 
  9   
 10  python3 = sys.version_info[0] > 2 
 11  if python3: 
 12      # no unicode type in python 3, use bytes instead when testing 
 13      # for a string-like object 
 14      unicode = str 
 15  try: 
 16      bytes 
 17  except NameError: 
 18      # no bytes type in python < 2.6 
 19      bytes = str 
 20   
 21   
22 -def _sortbylist(A,B):
23 # sort one list (A) using the values from another list (B) 24 return [A[i] for i in sorted(range(len(A)), key=B.__getitem__)]
25
26 -def _find_dim(grp, dimname):
27 # find Dimension instance given group and name. 28 # look in current group, and parents. 29 group = grp 30 dim = None 31 while 1: 32 try: 33 dim = group.dimensions[dimname] 34 break 35 except: 36 try: 37 group = group.parent 38 except: 39 raise ValueError("cannot find dimension %s in this group or parent groups" % dimname) 40 return dim
41
42 -def _walk_grps(topgrp):
43 """Iterate through all (sub-) groups of topgrp, similar to os.walktree. 44 45 """ 46 grps = topgrp.groups.values() 47 yield grps 48 for grp in topgrp.groups.values(): 49 for children in _walk_grps(grp): 50 yield children
51
52 -def _quantize(data,least_significant_digit):
53 """ 54 quantize data to improve compression. data is quantized using 55 around(scale*data)/scale, where scale is 2**bits, and bits is determined 56 from the least_significant_digit. For example, if 57 least_significant_digit=1, bits will be 4. 58 """ 59 precision = pow(10.,-least_significant_digit) 60 exp = np.log10(precision) 61 if exp < 0: 62 exp = int(np.floor(exp)) 63 else: 64 exp = int(np.ceil(exp)) 65 bits = np.ceil(np.log2(pow(10.,-exp))) 66 scale = pow(2.,bits) 67 datout = np.around(scale*data)/scale 68 if ma.isMA(datout): 69 datout.set_fill_value(data.fill_value) 70 return datout 71 else: 72 return datout
73
74 -def _StartCountStride(elem, shape, dimensions=None, grp=None, datashape=None,\ 75 put=False):
76 """Return start, count, stride and indices needed to store/extract data 77 into/from a netCDF variable. 78 79 This function is used to convert a slicing expression into a form that is 80 compatible with the nc_get_vars function. Specifically, it needs 81 to interpret integers, slices, Ellipses, and 1-d sequences of integers 82 and booleans. 83 84 Numpy uses "broadcasting indexing" to handle array-valued indices. 85 "Broadcasting indexing" (a.k.a "fancy indexing") treats all multi-valued 86 indices together to allow arbitrary points to be extracted. The index 87 arrays can be multidimensional, and more than one can be specified in a 88 slice, as long as they can be "broadcast" against each other. 89 This style of indexing can be very powerful, but it is very hard 90 to understand, explain, and implement (and can lead to hard to find bugs). 91 Most other python packages and array processing 92 languages (such as netcdf4-python, xray, biggus, matlab and fortran) 93 use "orthogonal indexing" which only allows for 1-d index arrays and 94 treats these arrays of indices independently along each dimension. 95 96 The implementation of "orthogonal indexing" used here requires that 97 index arrays be 1-d boolean or integer. If integer arrays are used, 98 the index values must be sorted and contain no duplicates. 99 100 In summary, slicing netcdf4-python variable objects with 1-d integer or 101 boolean arrays is allowed, but may give a different result than slicing a 102 numpy array. 103 104 Numpy also supports slicing an array with a boolean array of the same 105 shape. For example x[x>0] returns a 1-d array with all the positive values of x. 106 This is also not supported in netcdf4-python, if x.ndim > 1. 107 108 Orthogonal indexing can be used in to select netcdf variable slices 109 using the dimension variables. For example, you can use v[lat>60,lon<180] 110 to fetch the elements of v obeying conditions on latitude and longitude. 111 Allow for this sort of simple variable subsetting is the reason we decided to 112 deviate from numpy's slicing rules. 113 114 This function is used both by the __setitem__ and __getitem__ method of 115 the Variable class. 116 117 Parameters 118 ---------- 119 elem : tuple of integer, slice, ellipsis or 1-d boolean or integer 120 sequences used to slice the netCDF Variable (Variable[elem]). 121 shape : tuple containing the current shape of the netCDF variable. 122 dimensions : sequence 123 The name of the dimensions. This is only useful to find out 124 whether or not some dimensions are unlimited. Only needed within 125 __setitem__. 126 grp : netCDF Group 127 The netCDF group to which the variable being set belongs to. 128 Only needed within __setitem__. 129 datashape : sequence 130 The shape of the data that is being stored. Only needed by __setitime__ 131 put : True|False (default False). If called from __setitem__, put is True. 132 133 Returns 134 ------- 135 start : ndarray (..., n) 136 A starting indices array of dimension n+1. The first n 137 dimensions identify different independent data chunks. The last dimension 138 can be read as the starting indices. 139 count : ndarray (..., n) 140 An array of dimension (n+1) storing the number of elements to get. 141 stride : ndarray (..., n) 142 An array of dimension (n+1) storing the steps between each datum. 143 indices : ndarray (..., n) 144 An array storing the indices describing the location of the 145 data chunk in the target/source array (__getitem__/__setitem__). 146 147 Notes: 148 149 netCDF data is accessed via the function: 150 nc_get_vars(grpid, varid, start, count, stride, data) 151 152 Assume that the variable has dimension n, then 153 154 start is a n-tuple that contains the indices at the beginning of data chunk. 155 count is a n-tuple that contains the number of elements to be accessed. 156 stride is a n-tuple that contains the step length between each element. 157 158 """ 159 # Adapted from pycdf (http://pysclint.sourceforge.net/pycdf) 160 # by Andre Gosselin.. 161 # Modified by David Huard to handle efficiently fancy indexing with 162 # sequences of integers or booleans. 163 164 nDims = len(shape) 165 if nDims == 0: 166 nDims = 1 167 shape = (1,) 168 169 # When a single array or (non-tuple) sequence of integers is given 170 # as a slice, assume it applies to the first dimension, 171 # and use ellipsis for remaining dimensions. 172 if np.iterable(elem): 173 if type(elem) == np.ndarray or (type(elem) != tuple and \ 174 np.array([_is_int(e) for e in elem]).all()): 175 elem = [elem] 176 for n in range(len(elem)+1,nDims+1): 177 elem.append(slice(None,None,None)) 178 else: # Convert single index to sequence 179 elem = [elem] 180 181 # replace sequence of integer indices with boolean arrays 182 newElem = [] 183 IndexErrorMsg=\ 184 "only integers, slices (`:`), ellipsis (`...`), and 1-d integer or boolean arrays are valid indices" 185 for i, e in enumerate(elem): 186 # string-like object try to cast to int 187 # needs to be done first, since strings are iterable and 188 # hard to distinguish from something castable to an iterable numpy array. 189 if type(e) in [str,bytes,unicode]: 190 try: 191 e = int(e) 192 except: 193 raise IndexError(IndexErrorMsg) 194 ea = np.asarray(e) 195 # Raise error if multidimensional indexing is used. 196 if ea.ndim > 1: 197 raise IndexError("Index cannot be multidimensional") 198 # set unlim to True if dimension is unlimited and put==True 199 # (called from __setitem__) 200 if put and (dimensions is not None and grp is not None) and len(dimensions): 201 try: 202 dimname = dimensions[i] 203 # is this dimension unlimited? 204 # look in current group, and parents for dim. 205 dim = _find_dim(grp, dimname) 206 unlim = dim.isunlimited() 207 except IndexError: # more slices than dimensions (issue 371) 208 unlim = False 209 else: 210 unlim = False 211 # an iterable (non-scalar) integer array. 212 if np.iterable(ea) and ea.dtype.kind == 'i': 213 # convert negative indices in 1d array to positive ones. 214 ea = np.where(ea < 0, ea + shape[i], ea) 215 if np.any(ea < 0): 216 raise IndexErro("integer index out of range") 217 if not np.all(np.diff(ea) > 0): # same but cheaper than np.all(np.unique(ea) == ea) 218 # raise an error when new indexing behavior is different 219 # (i.e. when integer sequence not sorted, or there are 220 # duplicate indices in the sequence) 221 msg = "integer sequences in slices must be sorted and cannot have duplicates" 222 raise IndexError(msg) 223 # convert to boolean array. 224 # if unlim, let boolean array be longer than current dimension 225 # length. 226 elen = shape[i] 227 if unlim: 228 elen = max(ea.max()+1,elen) 229 else: 230 if ea.max()+1 > elen: 231 msg="integer index exceeds dimension size" 232 raise IndexError(msg) 233 eb = np.zeros(elen,np.bool) 234 eb[ea] = True 235 newElem.append(eb) 236 # an iterable (non-scalar) boolean array 237 elif np.iterable(ea) and ea.dtype.kind =='b': 238 # check that boolen array not too long 239 if not unlim and shape[i] != len(ea): 240 msg=""" 241 Boolean array must have the same shape as the data along this dimension.""" 242 raise IndexError(msg) 243 newElem.append(ea) 244 # integer scalar 245 elif ea.dtype.kind == 'i': 246 newElem.append(e) 247 # slice or ellipsis object 248 elif type(e) == slice or type(e) == type(Ellipsis): 249 newElem.append(e) 250 else: # castable to a scalar int, otherwise invalid 251 try: 252 e = int(e) 253 newElem.append(e) 254 except: 255 raise IndexError(IndexErrorMsg) 256 elem = newElem 257 258 # replace Ellipsis and boolean arrays with slice objects, if possible. 259 hasEllipsis = False 260 newElem = [] 261 for e in elem: 262 ea = np.asarray(e) 263 # Replace ellipsis with slices. 264 if type(e) == type(Ellipsis): 265 if hasEllipsis: 266 raise IndexError("At most one ellipsis allowed in a slicing expression") 267 # The ellipsis stands for the missing dimensions. 268 newElem.extend((slice(None, None, None),) * (nDims - len(elem) + 1)) 269 hasEllipsis = True 270 # Replace boolean array with slice object if possible. 271 elif ea.dtype.kind == 'b': 272 if ea.any(): 273 indices = np.flatnonzero(ea) 274 start = indices[0] 275 stop = indices[-1] + 1 276 if len(indices) >= 2: 277 step = indices[1] - indices[0] 278 else: 279 step = None 280 if np.array_equal(indices, np.arange(start, stop, step)): 281 newElem.append(slice(start, stop, step)) 282 else: 283 newElem.append(ea) 284 else: 285 newElem.append(slice(0, 0)) 286 else: 287 newElem.append(e) 288 elem = newElem 289 290 # If slice doesn't cover all dims, assume ellipsis for rest of dims. 291 if len(elem) < nDims: 292 for n in range(len(elem)+1,nDims+1): 293 elem.append(slice(None,None,None)) 294 295 # make sure there are not too many dimensions in slice. 296 if len(elem) > nDims: 297 raise ValueError("slicing expression exceeds the number of dimensions of the variable") 298 299 # Compute the dimensions of the start, count, stride and indices arrays. 300 # The number of elements in the first n dimensions corresponds to the 301 # number of times the _get method will be called. 302 sdim = [] 303 for i, e in enumerate(elem): 304 # at this stage e is a slice, a scalar integer, or a 1d boolean array. 305 # Booleans --- _get call for each True value 306 if np.asarray(e).dtype.kind == 'b': 307 sdim.append(e.sum()) 308 # Scalar int or slice, just a single _get call 309 else: 310 sdim.append(1) 311 312 # Create the start, count, stride and indices arrays. 313 314 sdim.append(max(nDims, 1)) 315 start = np.empty(sdim, dtype=int) 316 count = np.empty(sdim, dtype=int) 317 stride = np.empty(sdim, dtype=int) 318 indices = np.empty(sdim, dtype=object) 319 320 for i, e in enumerate(elem): 321 322 ea = np.asarray(e) 323 324 # set unlim to True if dimension is unlimited and put==True 325 # (called from __setitem__). Note: grp and dimensions must be set. 326 if put and (dimensions is not None and grp is not None) and len(dimensions): 327 dimname = dimensions[i] 328 # is this dimension unlimited? 329 # look in current group, and parents for dim. 330 dim = _find_dim(grp, dimname) 331 unlim = dim.isunlimited() 332 else: 333 unlim = False 334 335 # SLICE # 336 if type(e) == slice: 337 338 # determine length parameter for slice.indices. 339 340 # shape[i] can be zero for unlim dim that hasn't been written to 341 # yet. 342 # length of slice may be longer than current shape 343 # if dimension is unlimited (and we are writing, not reading). 344 if unlim and e.stop is not None and e.stop > shape[i]: 345 length = e.stop 346 elif unlim and e.stop is None and datashape != (): 347 if e.start is None: 348 length = datashape[i] 349 else: 350 length = e.start+datashape[i] 351 else: 352 if unlim and datashape == () and len(dim) == 0: 353 # writing scalar along unlimited dimension using slicing 354 # syntax (var[:] = 1, when var.shape = ()) 355 length = 1 356 else: 357 length = shape[i] 358 359 beg, end, inc = e.indices(length) 360 n = len(range(beg,end,inc)) 361 362 start[...,i] = beg 363 count[...,i] = n 364 stride[...,i] = inc 365 indices[...,i] = slice(None) 366 367 # BOOLEAN ITERABLE # 368 elif ea.dtype.kind == 'b': 369 e = np.arange(len(e))[e] # convert to integer array 370 start[...,i] = np.apply_along_axis(lambda x: e*x, i, np.ones(sdim[:-1])) 371 indices[...,i] = np.apply_along_axis(lambda x: np.arange(sdim[i])*x, i, np.ones(sdim[:-1], int)) 372 373 count[...,i] = 1 374 stride[...,i] = 1 375 376 377 # all that's left is SCALAR INTEGER # 378 else: 379 if e >= 0: 380 start[...,i] = e 381 elif e < 0 and (-e <= shape[i]) : 382 start[...,i] = e+shape[i] 383 else: 384 raise IndexError("Index out of range") 385 386 count[...,i] = 1 387 stride[...,i] = 1 388 indices[...,i] = -1 # Use -1 instead of 0 to indicate that 389 # this dimension shall be squeezed. 390 391 return start, count, stride, indices#, out_shape
392
393 -def _out_array_shape(count):
394 """Return the output array shape given the count array created by getStartCountStride""" 395 396 s = list(count.shape[:-1]) 397 out = [] 398 399 for i, n in enumerate(s): 400 if n == 1: 401 c = count[..., i].ravel()[0] # All elements should be identical. 402 out.append(c) 403 else: 404 out.append(n) 405 return out
406
407 -def _is_container(a):
408 # is object container-like? (can test for 409 # membership with "is in", but not a string) 410 try: 1 in a 411 except: return False 412 if type(a) == type(basestring): return False 413 return True
414
415 -def _is_int(a):
416 try: 417 return int(a) == a 418 except: 419 return False
420
421 -def _tostr(s):
422 try: 423 ss = str(s) 424 except: 425 ss = s 426 return ss
427 428
429 -def _getgrp(g,p):
430 import posixpath 431 grps = p.split("/") 432 for gname in grps: 433 if gname == "": continue 434 g = g.groups[gname] 435 return g
436
437 -def ncinfo():
438 439 from netCDF4 import Dataset 440 441 442 usage = """ 443 Print summary information about a netCDF file. 444 445 usage: %s [-h] [-g grp or --group=grp] [-v var or --variable=var] [-d dim or --dimension=dim] filename 446 447 -h -- Print usage message. 448 -g <group name> or --group=<group name> -- Print info for this group 449 (default is root group). Nested groups specified 450 using posix paths ("group1/group2/group3"). 451 -v <variable name> or --variable=<variable name> -- Print info for this variable. 452 -d <dimension name> or --dimension=<dimension name> -- Print info for this dimension. 453 454 netcdf filename must be last argument. 455 \n""" % os.path.basename(sys.argv[0]) 456 457 try: 458 opts, pargs = getopt.getopt(sys.argv[1:],'hv:g:d:', 459 ['group=', 460 'variable=', 461 'dimension=']) 462 except: 463 (type, value, traceback) = sys.exc_info() 464 sys.stdout.write("Error parsing the options. The error was: %s\n" % value) 465 sys.stderr.write(usage) 466 sys.exit(0) 467 468 # Get the options 469 group = None; var = None; dim=None 470 for option in opts: 471 if option[0] == '-h': 472 sys.stderr.write(usage) 473 sys.exit(0) 474 elif option[0] == '--group' or option[0] == '-g': 475 group = option[1] 476 elif option[0] == '--variable' or option[0] == '-v': 477 var = option[1] 478 elif option[0] == '--dimension' or option[0] == '-d': 479 dim = option[1] 480 else: 481 sys.stdout.write("%s: Unrecognized option\n" % option[0]) 482 sys.stderr.write(usage) 483 sys.exit(0) 484 485 # filename passed as last argumenbt 486 filename = pargs[-1] 487 488 f = Dataset(filename) 489 if group is None: 490 if var is None and dim is None: 491 print(f) 492 else: 493 if var is not None: 494 print(f.variables[var]) 495 if dim is not None: 496 print(f.dimensions[dim]) 497 else: 498 if var is None and dim is None: 499 print(getgrp(f,group)) 500 else: 501 g = getgrp(f,group) 502 if var is not None: 503 print(g.variables[var]) 504 if dim is not None: 505 print(g.dimensions[var]) 506 f.close()
507
508 -def _nc4tonc3(filename4,filename3,clobber=False,nchunk=10,quiet=False,format='NETCDF3_64BIT'):
509 """convert a netcdf 4 file (filename4) in NETCDF4_CLASSIC format 510 to a netcdf 3 file (filename3) in NETCDF3_64BIT format.""" 511 ncfile4 = Dataset(filename4,'r') 512 if ncfile4.file_format != 'NETCDF4_CLASSIC': 513 raise IOError('input file must be in NETCDF4_CLASSIC format') 514 ncfile3 = Dataset(filename3,'w',clobber=clobber,format=format) 515 # create dimensions. Check for unlimited dim. 516 unlimdimname = False 517 unlimdim = None 518 # create global attributes. 519 if not quiet: sys.stdout.write('copying global attributes ..\n') 520 #for attname in ncfile4.ncattrs(): 521 # setattr(ncfile3,attname,getattr(ncfile4,attname)) 522 ncfile3.setncatts(ncfile4.__dict__) 523 if not quiet: sys.stdout.write('copying dimensions ..\n') 524 for dimname,dim in ncfile4.dimensions.items(): 525 if dim.isunlimited(): 526 unlimdimname = dimname 527 unlimdim = dim 528 ncfile3.createDimension(dimname,None) 529 else: 530 ncfile3.createDimension(dimname,len(dim)) 531 # create variables. 532 for varname,ncvar in ncfile4.variables.items(): 533 if not quiet: 534 sys.stdout.write('copying variable %s\n' % varname) 535 # is there an unlimited dimension? 536 if unlimdimname and unlimdimname in ncvar.dimensions: 537 hasunlimdim = True 538 else: 539 hasunlimdim = False 540 if hasattr(ncvar, '_FillValue'): 541 FillValue = ncvar._FillValue 542 else: 543 FillValue = None 544 var = ncfile3.createVariable(varname,ncvar.dtype,ncvar.dimensions,fill_value=FillValue) 545 # fill variable attributes. 546 attdict = ncvar.__dict__ 547 if '_FillValue' in attdict: 548 del attdict['_FillValue'] 549 var.setncatts(attdict) 550 #for attname in ncvar.ncattrs(): 551 # if attname == '_FillValue': continue 552 # setattr(var,attname,getattr(ncvar,attname)) 553 # fill variables with data. 554 if hasunlimdim: # has an unlim dim, loop over unlim dim index. 555 # range to copy 556 if nchunk: 557 start = 0; stop = len(unlimdim); step = nchunk 558 if step < 1: 559 step = 1 560 for n in range(start, stop, step): 561 nmax = n+nchunk 562 if nmax > len(unlimdim): 563 nmax=len(unlimdim) 564 var[n:nmax] = ncvar[n:nmax] 565 else: 566 var[0:len(unlimdim)] = ncvar[:] 567 else: # no unlim dim or 1-d variable, just copy all data at once. 568 var[:] = ncvar[:] 569 ncfile3.sync() # flush data to disk 570 # close files. 571 ncfile3.close() 572 ncfile4.close()
573
574 -def nc4tonc3():
575 usage = """ 576 Convert a netCDF 4 file (in NETCDF4_CLASSIC format) to netCDF 3 format. 577 578 usage: %s [-h] [-o] [--chunk] netcdf4filename netcdf3filename 579 -h -- Print usage message. 580 -o -- Overwite destination file (default is to raise an error if output file already exists). 581 --quiet=(0|1) -- if 1, don't print diagnostic information. 582 --format -- netcdf3 format to use (NETCDF3_64BIT by default, can be set to NETCDF3_CLASSIC) 583 --chunk=(integer) -- number of records along unlimited dimension to 584 write at once. Default 10. Ignored if there is no unlimited 585 dimension. chunk=0 means write all the data at once. 586 \n""" % os.path.basename(sys.argv[0]) 587 588 try: 589 opts, pargs = getopt.getopt(sys.argv[1:], 'ho', 590 ['format=','chunk=','quiet=']) 591 except: 592 (type, value, traceback) = sys.exc_info() 593 sys.stdout.write("Error parsing the options. The error was: %s\n" % value) 594 sys.stderr.write(usage) 595 sys.exit(0) 596 597 # default options 598 quiet = 0 599 chunk = 1000 600 format = 'NETCDF3_64BIT' 601 overwritefile = 0 602 603 # Get the options 604 for option in opts: 605 if option[0] == '-h': 606 sys.stderr.write(usage) 607 sys.exit(0) 608 elif option[0] == '-o': 609 overwritefile = 1 610 elif option[0] == '--quiet': 611 quiet = int(option[1]) 612 elif option[0] == '--format': 613 format = option[1] 614 elif option[0] == '--chunk': 615 chunk = int(option[1]) 616 else: 617 sys.stdout.write("%s : Unrecognized option\n" % options[0]) 618 sys.stderr.write(usage) 619 sys.exit(0) 620 621 # if we pass a number of files different from 2, abort 622 if len(pargs) < 2 or len(pargs) > 2: 623 sys.stdout.write("You need to pass both source and destination!\n.") 624 sys.stderr.write(usage) 625 sys.exit(0) 626 627 # Catch the files passed as the last arguments 628 filename4 = pargs[0] 629 filename3 = pargs[1] 630 631 # copy the data from filename4 to filename3. 632 _nc4tonc3(filename4,filename3,clobber=overwritefile,quiet=quiet,format=format)
633 634
635 -def _nc3tonc4(filename3,filename4,unpackshort=True, 636 zlib=True,complevel=6,shuffle=True,fletcher32=False, 637 clobber=False,lsd_dict=None,nchunk=10,quiet=False,classic=0, 638 vars=None,istart=0,istop=-1):
639 """convert a netcdf 3 file (filename3) to a netcdf 4 file 640 The default format is 'NETCDF4', but can be set 641 to NETCDF4_CLASSIC if classic=1. 642 If unpackshort=True, variables stored as short 643 integers with a scale and offset are unpacked to floats. 644 in the netcdf 4 file. If the lsd_dict is not None, variable names 645 corresponding to the keys of the dict will be truncated to the decimal place 646 specified by the values of the dict. This improves compression by 647 making it 'lossy'.. 648 If vars is not None, only variable names in the list 649 will be copied (plus all the dimension variables). 650 The zlib, complevel and shuffle keywords control 651 how the compression is done.""" 652 653 from netCDF4 import Dataset 654 655 ncfile3 = Dataset(filename3,'r') 656 if classic: 657 ncfile4 = Dataset(filename4,'w',clobber=clobber,format='NETCDF4_CLASSIC') 658 else: 659 ncfile4 = Dataset(filename4,'w',clobber=clobber,format='NETCDF4') 660 mval = 1.e30 # missing value if unpackshort=True 661 # create dimensions. Check for unlimited dim. 662 unlimdimname = False 663 unlimdim = None 664 # create global attributes. 665 if not quiet: sys.stdout.write('copying global attributes ..\n') 666 #for attname in ncfile3.ncattrs(): 667 # setattr(ncfile4,attname,getattr(ncfile3,attname)) 668 ncfile4.setncatts(ncfile3.__dict__) 669 if not quiet: sys.stdout.write('copying dimensions ..\n') 670 for dimname,dim in ncfile3.dimensions.items(): 671 if dim.isunlimited(): 672 unlimdimname = dimname 673 unlimdim = dim 674 ncfile4.createDimension(dimname,None) 675 if istop == -1: istop=len(unlimdim) 676 else: 677 ncfile4.createDimension(dimname,len(dim)) 678 # create variables. 679 if vars is None: 680 varnames = ncfile3.variables.keys() 681 else: 682 # variables to copy specified 683 varnames = vars 684 # add dimension variables 685 for dimname in ncfile3.dimensions.keys(): 686 if dimname in ncfile3.variables.keys() and\ 687 dimname not in varnames: 688 varnames.append(dimname) 689 for varname in varnames: 690 ncvar = ncfile3.variables[varname] 691 if not quiet: sys.stdout.write('copying variable %s\n' % varname) 692 # quantize data? 693 if lsd_dict is not None and lsd_dict.has_key(varname): 694 lsd = lsd_dict[varname] 695 if not quiet: sys.stdout.write('truncating to least_significant_digit = %d\n'%lsd) 696 else: 697 lsd = None # no quantization. 698 # unpack short integers to floats? 699 if unpackshort and hasattr(ncvar,'scale_factor') and hasattr(ncvar,'add_offset'): 700 dounpackshort = True 701 datatype = 'f4' 702 else: 703 dounpackshort = False 704 datatype = ncvar.dtype 705 # is there an unlimited dimension? 706 if unlimdimname and unlimdimname in ncvar.dimensions: 707 hasunlimdim = True 708 else: 709 hasunlimdim = False 710 if dounpackshort: 711 if not quiet: sys.stdout.write('unpacking short integers to floats ...\n') 712 sys.stdout.write('') 713 if hasattr(ncvar, '_FillValue'): 714 FillValue = ncvar._FillValue 715 else: 716 FillValue = None 717 var = ncfile4.createVariable(varname,datatype,ncvar.dimensions, fill_value=FillValue, least_significant_digit=lsd,zlib=zlib,complevel=complevel,shuffle=shuffle,fletcher32=fletcher32) 718 # fill variable attributes. 719 attdict = ncvar.__dict__ 720 if '_FillValue' in attdict: del attdict['_FillValue'] 721 if dounpackshort and 'add_offset' in attdict: 722 del attdict['add_offset'] 723 if dounpackshort and 'scale_factor' in attdict: 724 del attdict['scale_factor'] 725 if dounpackshort and 'missing_value' in attdict: 726 attdict['missing_value']=mval 727 var.setncatts(attdict) 728 #for attname in ncvar.ncattrs(): 729 # if attname == '_FillValue': continue 730 # if dounpackshort and attname in ['add_offset','scale_factor']: continue 731 # if dounpackshort and attname == 'missing_value': 732 # setattr(var,attname,mval) 733 # else: 734 # setattr(var,attname,getattr(ncvar,attname)) 735 # fill variables with data. 736 if hasunlimdim: # has an unlim dim, loop over unlim dim index. 737 # range to copy 738 if nchunk: 739 start = istart; stop = istop; step = nchunk 740 if step < 1: step = 1 741 for n in range(start, stop, step): 742 nmax = n+nchunk 743 if nmax > istop: nmax=istop 744 idata = ncvar[n:nmax] 745 if dounpackshort: 746 tmpdata = (ncvar.scale_factor*idata.astype('f')+ncvar.add_offset).astype('f') 747 if hasattr(ncvar,'missing_value'): 748 tmpdata = numpy.where(idata == ncvar.missing_value, mval, tmpdata) 749 else: 750 tmpdata = idata 751 var[n-istart:nmax-istart] = tmpdata 752 else: 753 idata = ncvar[:] 754 if dounpackshort: 755 tmpdata = (ncvar.scale_factor*idata.astype('f')+ncvar.add_offset).astype('f') 756 if hasattr(ncvar,'missing_value'): 757 tmpdata = numpy.where(idata == ncvar.missing_value, mval, tmpdata) 758 else: 759 tmpdata = idata 760 var[0:len(unlimdim)] = tmpdata 761 else: # no unlim dim or 1-d variable, just copy all data at once. 762 idata = ncvar[:] 763 if dounpackshort: 764 tmpdata = (ncvar.scale_factor*idata.astype('f')+ncvar.add_offset).astype('f') 765 if hasattr(ncvar,'missing_value'): 766 tmpdata = numpy.where(idata == ncvar.missing_value, mval, tmpdata) 767 else: 768 tmpdata = idata 769 var[:] = tmpdata 770 ncfile4.sync() # flush data to disk 771 # close files. 772 ncfile3.close() 773 ncfile4.close()
774 775
776 -def nc3tonc4():
777 usage = """ 778 Convert a netCDF 3 file to netCDF 4 format, optionally 779 unpacking variables packed as short integers (with scale_factor and add_offset) 780 to floats, and adding zlib compression (with the HDF5 shuffle filter and fletcher32 checksum). 781 Data may also be quantized (truncated) to a specified precision to improve compression. 782 783 usage: %s [-h] [-o] [--vars=var1,var2,..] [--zlib=(0|1)] [--complevel=(1-9)] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--unpackshort=(0|1)] [--quantize=var1=n1,var2=n2,..] netcdf3filename netcdf4filename 784 -h -- Print usage message. 785 -o -- Overwite destination file (default is to raise an error if output file already exists). 786 --vars -- comma separated list of variable names to copy (default is to copy 787 all variables) 788 --classic=(0|1) -- use NETCDF4_CLASSIC format instead of NETCDF4 (default 1) 789 --zlib=(0|1) -- Activate (or disable) zlib compression (default is activate). 790 --complevel=(1-9) -- Set zlib compression level (6 is default). 791 --shuffle=(0|1) -- Activate (or disable) the shuffle filter (active by default). 792 --fletcher32=(0|1) -- Activate (or disable) the fletcher32 checksum (not 793 active by default). 794 --unpackshort=(0|1) -- Unpack short integer variables to float variables 795 using scale_factor and add_offset netCDF variable attributes (active by default). 796 --quantize=(comma separated list of "variable name=integer" pairs) -- 797 Truncate the data in the specified variables to a given decimal precision. 798 For example, 'speed=2, height=-2, temp=0' will cause the variable 799 'speed' to be truncated to a precision of 0.01, 'height' to a precision of 100 800 and 'temp' to 1. This can significantly improve compression. The default 801 is not to quantize any of the variables. 802 --quiet=(0|1) -- if 1, don't print diagnostic information. 803 --chunk=(integer) -- number of records along unlimited dimension to 804 write at once. Default 10. Ignored if there is no unlimited 805 dimension. chunk=0 means write all the data at once. 806 --istart=(integer) -- number of record to start at along unlimited dimension. 807 Default 0. Ignored if there is no unlimited dimension. 808 --istop=(integer) -- number of record to stop at along unlimited dimension. 809 Default -1. Ignored if there is no unlimited dimension. 810 \n""" % os.path.basename(sys.argv[0]) 811 812 try: 813 opts, pargs = getopt.getopt(sys.argv[1:], 'ho', 814 ['classic=', 815 'vars=', 816 'zlib=', 817 'quiet=', 818 'complevel=', 819 'shuffle=', 820 'fletcher32=', 821 'unpackshort=', 822 'quantize=', 823 'chunk=', 824 'istart=', 825 'istop=']) 826 except: 827 (type, value, traceback) = sys.exc_info() 828 sys.stdout.write("Error parsing the options. The error was: %s\n" % value) 829 sys.stderr.write(usage) 830 sys.exit(0) 831 832 # default options 833 overwritefile = 0 834 complevel = 6 835 classic = 1 836 zlib = 1 837 shuffle = 1 838 fletcher32 = 0 839 unpackshort = 1 840 vars = None 841 quantize = None 842 quiet = 0 843 chunk = 1000 844 istart = 0 845 istop = -1 846 847 # Get the options 848 for option in opts: 849 if option[0] == '-h': 850 sys.stderr.write(usage) 851 sys.exit(0) 852 elif option[0] == '-o': 853 overwritefile = 1 854 elif option[0] == '--classic': 855 classic = int(option[1]) 856 elif option[0] == '--zlib': 857 zlib = int(option[1]) 858 elif option[0] == '--quiet': 859 quiet = int(option[1]) 860 elif option[0] == '--complevel': 861 complevel = int(option[1]) 862 elif option[0] == '--shuffle': 863 shuffle = int(option[1]) 864 elif option[0] == '--fletcher32': 865 fletcher32 = int(option[1]) 866 elif option[0] == '--unpackshort': 867 unpackshort = int(option[1]) 868 elif option[0] == '--chunk': 869 chunk = int(option[1]) 870 elif option[0] == '--vars': 871 vars = option[1] 872 elif option[0] == '--quantize': 873 quantize = option[1] 874 elif option[0] == '--istart': 875 istart = int(option[1]) 876 elif option[0] == '--istop': 877 istop = int(option[1]) 878 else: 879 sys.stdout.write("%s: Unrecognized option\n" % option[0]) 880 sys.stderr.write(usage) 881 sys.exit(0) 882 883 # if we pass a number of files different from 2, abort 884 if len(pargs) < 2 or len(pargs) > 2: 885 sys.stdout.write("You need to pass both source and destination!.\n") 886 sys.stderr.write(usage) 887 sys.exit(0) 888 889 # Catch the files passed as the last arguments 890 filename3 = pargs[0] 891 filename4 = pargs[1] 892 893 # Parse the quantize option, create a dictionary from key/value pairs. 894 if quantize is not None: 895 lsd_dict = {} 896 for p in quantize.split(','): 897 kv = p.split('=') 898 lsd_dict[kv[0]] = int(kv[1]) 899 else: 900 lsd_dict=None 901 902 # Parse the vars option, create a list of variable names. 903 if vars is not None: 904 vars = vars.split(',') 905 906 # copy the data from filename3 to filename4. 907 _nc3tonc4(filename3,filename4,unpackshort=unpackshort, 908 zlib=zlib,complevel=complevel,shuffle=shuffle, 909 fletcher32=fletcher32,clobber=overwritefile,lsd_dict=lsd_dict, 910 nchunk=chunk,quiet=quiet,vars=vars,classic=classic, 911 istart=istart,istop=istop)
912