NetCDF
4.3.2
|
00001 /********************************************************************* 00002 * Copyright 2010, University Corporation for Atmospheric Research 00003 * See netcdf/README file for copying and redistribution conditions. 00004 * Thanks to Philippe Poilbarbe and Antonio S. CofiƱo for 00005 * compression additions. 00006 * $Id: nccopy.c 400 2010-08-27 21:02:52Z russ $ 00007 *********************************************************************/ 00008 00009 #include "config.h" /* for USE_NETCDF4 macro */ 00010 #include <stdlib.h> 00011 #ifdef HAVE_GETOPT_H 00012 #include <getopt.h> 00013 #endif 00014 #ifdef HAVE_UNISTD_H 00015 #include <unistd.h> 00016 #endif 00017 #include <string.h> 00018 #include <netcdf.h> 00019 #include "nciter.h" 00020 #include "utils.h" 00021 #include "chunkspec.h" 00022 #include "dimmap.h" 00023 #include "nccomps.h" 00024 00025 #ifdef _MSC_VER 00026 #include "XGetopt.h" 00027 #define snprintf _snprintf 00028 int opterr; 00029 int optind; 00030 #endif 00031 00032 /* default bytes of memory we are willing to allocate for variable 00033 * values during copy */ 00034 #define COPY_BUFFER_SIZE (5000000) 00035 #define COPY_CHUNKCACHE_PREEMPTION (1.0f) /* for copying, can eject fully read chunks */ 00036 #define SAME_AS_INPUT (-1) /* default, if kind not specified */ 00037 #define CHUNK_THRESHOLD (8192) /* non-record variables with fewer bytes don't get chunked */ 00038 00039 #ifndef USE_NETCDF4 00040 #define NC_CLASSIC_MODEL 0x0100 /* Enforce classic model if netCDF-4 not available. */ 00041 #endif 00042 00043 /* Global variables for command-line requests */ 00044 char *progname; /* for error messages */ 00045 static int option_kind = SAME_AS_INPUT; 00046 static int option_deflate_level = -1; /* default, compress output only if input compressed */ 00047 static int option_shuffle_vars = NC_NOSHUFFLE; /* default, no shuffling on compression */ 00048 static int option_fix_unlimdims = 0; /* default, preserve unlimited dimensions */ 00049 static char* option_chunkspec = 0; /* default, no chunk specification */ 00050 static size_t option_copy_buffer_size = COPY_BUFFER_SIZE; 00051 static size_t option_chunk_cache_size = CHUNK_CACHE_SIZE; /* default from config.h */ 00052 static size_t option_chunk_cache_nelems = CHUNK_CACHE_NELEMS; /* default from config.h */ 00053 static int option_read_diskless = 0; /* default, don't read input into memory on open */ 00054 static int option_write_diskless = 0; /* default, don't write output to diskless file */ 00055 static int option_min_chunk_bytes = CHUNK_THRESHOLD; /* default, don't chunk variable if prod of 00056 * chunksizes of its dimensions is smaller 00057 * than this */ 00058 static int option_nlgrps = 0; /* Number of groups specified with -g 00059 * option on command line */ 00060 static char** option_lgrps = 0; /* list of group names specified with -g 00061 * option on command line */ 00062 static idnode_t* option_grpids = 0; /* list of grpids matching list specified with -g option */ 00063 static bool_t option_grpstruct = false; /* if -g set, copy structure for non-selected groups */ 00064 static int option_nlvars = 0; /* Number of variables specified with -v * option on command line */ 00065 static char** option_lvars = 0; /* list of variable names specified with -v 00066 * option on command line */ 00067 static bool_t option_varstruct = false; /* if -v set, copy structure for non-selected vars */ 00068 static int option_compute_chunkcaches = 0; /* default, don't try still flaky estimate of 00069 * chunk cache for each variable */ 00070 00071 /* get group id in output corresponding to group igrp in input, 00072 * given parent group id (or root group id) parid in output. */ 00073 static int 00074 get_grpid(int igrp, int parid, int *ogrpp) { 00075 int stat = NC_NOERR; 00076 int ogid = parid; /* like igrp but in output file */ 00077 #ifdef USE_NETCDF4 00078 int inparid; 00079 00080 /* if not root group, get corresponding output groupid from group name */ 00081 stat = nc_inq_grp_parent(igrp, &inparid); 00082 if(stat == NC_NOERR) { /* not root group */ 00083 char grpname[NC_MAX_NAME + 1]; 00084 NC_CHECK(nc_inq_grpname(igrp, grpname)); 00085 NC_CHECK(nc_inq_grp_ncid(parid, grpname, &ogid)); 00086 } else if(stat == NC_ENOGRP) { /* root group */ 00087 stat = NC_NOERR; 00088 } else { 00089 NC_CHECK(stat); 00090 } 00091 #endif /* USE_NETCDF4 */ 00092 *ogrpp = ogid; 00093 return stat; 00094 } 00095 00096 /* Return size in bytes of a variable value */ 00097 static size_t 00098 val_size(int grpid, int varid) { 00099 nc_type vartype; 00100 size_t value_size; 00101 NC_CHECK(nc_inq_vartype(grpid, varid, &vartype)); 00102 NC_CHECK(nc_inq_type(grpid, vartype, NULL, &value_size)); 00103 return value_size; 00104 } 00105 00106 #ifdef USE_NETCDF4 00107 /* Get parent id needed to define a new group from its full name in an 00108 * open file identified by ncid. Assumes all intermediate groups are 00109 * already defined. */ 00110 static int 00111 nc_inq_parid(int ncid, const char *fullname, int *locidp) { 00112 char *parent = strdup(fullname); 00113 char *slash = "/"; /* groupname separator */ 00114 char *last_slash; 00115 if(parent == NULL) { 00116 return NC_ENOMEM; /* exits */ 00117 } 00118 last_slash = strrchr(parent, '/'); 00119 if(last_slash == parent || last_slash == NULL) { /* parent is root */ 00120 free(parent); 00121 parent = strdup(slash); 00122 } else { 00123 *last_slash = '\0'; /* truncate to get parent name */ 00124 } 00125 NC_CHECK(nc_inq_grp_full_ncid(ncid, parent, locidp)); 00126 free(parent); 00127 return NC_NOERR; 00128 } 00129 00130 /* Return size of chunk in bytes for a variable varid in a group igrp, or 0 if 00131 * layout is contiguous */ 00132 static int 00133 inq_var_chunksize(int igrp, int varid, size_t* chunksizep) { 00134 int stat = NC_NOERR; 00135 int ndims; 00136 size_t *chunksizes; 00137 int dim; 00138 int contig = 1; 00139 nc_type vartype; 00140 size_t value_size; 00141 size_t prod; 00142 00143 NC_CHECK(nc_inq_vartype(igrp, varid, &vartype)); 00144 /* from type, get size in memory needed for each value */ 00145 NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size)); 00146 prod = value_size; 00147 NC_CHECK(nc_inq_varndims(igrp, varid, &ndims)); 00148 chunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t)); 00149 if(ndims > 0) { 00150 NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, NULL)); 00151 } 00152 if(contig == 1) { 00153 *chunksizep = 0; 00154 } else { 00155 NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, chunksizes)); 00156 for(dim = 0; dim < ndims; dim++) { 00157 prod *= chunksizes[dim]; 00158 } 00159 *chunksizep = prod; 00160 } 00161 free(chunksizes); 00162 return stat; 00163 } 00164 00165 /* Return estimated number of elems required in chunk cache and 00166 * estimated size of chunk cache adequate to efficiently copy input 00167 * variable ivarid to output variable ovarid, which may have different 00168 * chunk size and shape */ 00169 static int 00170 inq_var_chunking_params(int igrp, int ivarid, int ogrp, int ovarid, 00171 size_t* chunkcache_sizep, 00172 size_t *chunkcache_nelemsp, 00173 float * chunkcache_preemptionp) 00174 { 00175 int stat = NC_NOERR; 00176 int ndims; 00177 size_t *ichunksizes, *ochunksizes; 00178 int dim; 00179 int icontig = 1, ocontig = 1; 00180 nc_type vartype; 00181 size_t value_size; 00182 size_t prod, iprod, oprod; 00183 size_t nelems; 00184 *chunkcache_nelemsp = CHUNK_CACHE_NELEMS; 00185 *chunkcache_sizep = CHUNK_CACHE_SIZE; 00186 *chunkcache_preemptionp = COPY_CHUNKCACHE_PREEMPTION; 00187 00188 NC_CHECK(nc_inq_varndims(igrp, ivarid, &ndims)); 00189 if(ndims > 0) { 00190 NC_CHECK(nc_inq_var_chunking(igrp, ivarid, &icontig, NULL)); 00191 NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &ocontig, NULL)); 00192 } 00193 if(icontig == 1 && ocontig == 1) { /* no chunking in input or output */ 00194 *chunkcache_nelemsp = 0; 00195 *chunkcache_sizep = 0; 00196 *chunkcache_preemptionp = 0; 00197 return stat; 00198 } 00199 00200 NC_CHECK(nc_inq_vartype(igrp, ivarid, &vartype)); 00201 NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size)); 00202 iprod = value_size; 00203 00204 if(icontig == 0 && ocontig == 1) { /* chunking only in input */ 00205 *chunkcache_nelemsp = 1; /* read one input chunk at a time */ 00206 *chunkcache_sizep = iprod; 00207 *chunkcache_preemptionp = 1.0f; 00208 return stat; 00209 } 00210 00211 ichunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t)); 00212 if(icontig == 1) { /* if input contiguous, treat as if chunked on 00213 * first dimension */ 00214 ichunksizes[0] = 1; 00215 for(dim = 1; dim < ndims; dim++) { 00216 ichunksizes[dim] = dim; 00217 } 00218 } else { 00219 NC_CHECK(nc_inq_var_chunking(igrp, ivarid, &icontig, ichunksizes)); 00220 } 00221 00222 /* now can assume chunking in both input and output */ 00223 ochunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t)); 00224 NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &ocontig, ochunksizes)); 00225 00226 nelems = 1; 00227 oprod = value_size; 00228 for(dim = 0; dim < ndims; dim++) { 00229 nelems += 1 + (ichunksizes[dim] - 1) / ochunksizes[dim]; 00230 iprod *= ichunksizes[dim]; 00231 oprod *= ochunksizes[dim]; 00232 } 00233 prod = iprod + oprod * (nelems - 1); 00234 *chunkcache_nelemsp = nelems; 00235 *chunkcache_sizep = prod; 00236 free(ichunksizes); 00237 free(ochunksizes); 00238 return stat; 00239 } 00240 00241 /* Forward declaration, because copy_type, copy_vlen_type call each other */ 00242 static int copy_type(int igrp, nc_type typeid, int ogrp); 00243 00244 /* 00245 * copy a user-defined variable length type in the group igrp to the 00246 * group ogrp 00247 */ 00248 static int 00249 copy_vlen_type(int igrp, nc_type itype, int ogrp) 00250 { 00251 int stat = NC_NOERR; 00252 nc_type ibasetype; 00253 nc_type obasetype; /* base type in target group */ 00254 char name[NC_MAX_NAME]; 00255 size_t size; 00256 char basename[NC_MAX_NAME]; 00257 size_t basesize; 00258 nc_type vlen_type; 00259 00260 NC_CHECK(nc_inq_vlen(igrp, itype, name, &size, &ibasetype)); 00261 /* to get base type id in target group, use name of base type in 00262 * source group */ 00263 NC_CHECK(nc_inq_type(igrp, ibasetype, basename, &basesize)); 00264 stat = nc_inq_typeid(ogrp, basename, &obasetype); 00265 /* if no such type, create it now */ 00266 if(stat == NC_EBADTYPE) { 00267 NC_CHECK(copy_type(igrp, ibasetype, ogrp)); 00268 stat = nc_inq_typeid(ogrp, basename, &obasetype); 00269 } 00270 NC_CHECK(stat); 00271 00272 /* Now we know base type exists in output and we know its type id */ 00273 NC_CHECK(nc_def_vlen(ogrp, name, obasetype, &vlen_type)); 00274 00275 return stat; 00276 } 00277 00278 /* 00279 * copy a user-defined opaque type in the group igrp to the group ogrp 00280 */ 00281 static int 00282 copy_opaque_type(int igrp, nc_type itype, int ogrp) 00283 { 00284 int stat = NC_NOERR; 00285 nc_type otype; 00286 char name[NC_MAX_NAME]; 00287 size_t size; 00288 00289 NC_CHECK(nc_inq_opaque(igrp, itype, name, &size)); 00290 NC_CHECK(nc_def_opaque(ogrp, size, name, &otype)); 00291 00292 return stat; 00293 } 00294 00295 /* 00296 * copy a user-defined enum type in the group igrp to the group ogrp 00297 */ 00298 static int 00299 copy_enum_type(int igrp, nc_type itype, int ogrp) 00300 { 00301 int stat = NC_NOERR; 00302 nc_type otype; 00303 nc_type basetype; 00304 size_t basesize; 00305 size_t nmembers; 00306 char name[NC_MAX_NAME]; 00307 int i; 00308 00309 NC_CHECK(nc_inq_enum(igrp, itype, name, &basetype, &basesize, &nmembers)); 00310 NC_CHECK(nc_def_enum(ogrp, basetype, name, &otype)); 00311 for(i = 0; i < nmembers; i++) { /* insert enum members */ 00312 char ename[NC_MAX_NAME]; 00313 long long val; /* large enough to hold any integer type */ 00314 NC_CHECK(nc_inq_enum_member(igrp, itype, i, ename, &val)); 00315 NC_CHECK(nc_insert_enum(ogrp, otype, ename, &val)); 00316 } 00317 return stat; 00318 } 00319 00320 /* 00321 * copy a user-defined compound type in the group igrp to the group ogrp 00322 */ 00323 static int 00324 copy_compound_type(int igrp, nc_type itype, int ogrp) 00325 { 00326 int stat = NC_NOERR; 00327 char name[NC_MAX_NAME]; 00328 size_t size; 00329 size_t nfields; 00330 nc_type otype; 00331 int fid; 00332 00333 NC_CHECK(nc_inq_compound(igrp, itype, name, &size, &nfields)); 00334 NC_CHECK(nc_def_compound(ogrp, size, name, &otype)); 00335 00336 for (fid = 0; fid < nfields; fid++) { 00337 char fname[NC_MAX_NAME]; 00338 char ftypename[NC_MAX_NAME]; 00339 size_t foff; 00340 nc_type iftype, oftype; 00341 int fndims; 00342 00343 NC_CHECK(nc_inq_compound_field(igrp, itype, fid, fname, &foff, &iftype, &fndims, NULL)); 00344 /* type ids in source don't necessarily correspond to same 00345 * typeids in destination, so look up destination typeid by using 00346 * field type name */ 00347 NC_CHECK(nc_inq_type(igrp, iftype, ftypename, NULL)); 00348 NC_CHECK(nc_inq_typeid(ogrp, ftypename, &oftype)); 00349 if(fndims == 0) { 00350 NC_CHECK(nc_insert_compound(ogrp, otype, fname, foff, oftype)); 00351 } else { /* field is array type */ 00352 int *fdimsizes; 00353 fdimsizes = (int *) emalloc((fndims + 1) * sizeof(int)); 00354 stat = nc_inq_compound_field(igrp, itype, fid, NULL, NULL, NULL, 00355 NULL, fdimsizes); 00356 NC_CHECK(nc_insert_array_compound(ogrp, otype, fname, foff, oftype, fndims, fdimsizes)); 00357 free(fdimsizes); 00358 } 00359 } 00360 return stat; 00361 } 00362 00363 00364 /* 00365 * copy a user-defined type in the group igrp to the group ogrp 00366 */ 00367 static int 00368 copy_type(int igrp, nc_type typeid, int ogrp) 00369 { 00370 int stat = NC_NOERR; 00371 nc_type type_class; 00372 00373 NC_CHECK(nc_inq_user_type(igrp, typeid, NULL, NULL, NULL, NULL, &type_class)); 00374 00375 switch(type_class) { 00376 case NC_VLEN: 00377 NC_CHECK(copy_vlen_type(igrp, typeid, ogrp)); 00378 break; 00379 case NC_OPAQUE: 00380 NC_CHECK(copy_opaque_type(igrp, typeid, ogrp)); 00381 break; 00382 case NC_ENUM: 00383 NC_CHECK(copy_enum_type(igrp, typeid, ogrp)); 00384 break; 00385 case NC_COMPOUND: 00386 NC_CHECK(copy_compound_type(igrp, typeid, ogrp)); 00387 break; 00388 default: 00389 NC_CHECK(NC_EBADTYPE); 00390 } 00391 return stat; 00392 } 00393 00394 /* Copy a group and all its subgroups, recursively, from iroot to 00395 * oroot, the ncids of input file and output file. This just creates 00396 * all the groups in the destination, but doesn't copy anything that's 00397 * in the groups yet. */ 00398 static int 00399 copy_groups(int iroot, int oroot) 00400 { 00401 int stat = NC_NOERR; 00402 int numgrps; 00403 int *grpids; 00404 int i; 00405 00406 /* get total number of groups and their ids, including all descendants */ 00407 NC_CHECK(nc_inq_grps_full(iroot, &numgrps, NULL)); 00408 if(numgrps > 1) { /* there's always 1 root group */ 00409 grpids = emalloc(numgrps * sizeof(int)); 00410 NC_CHECK(nc_inq_grps_full(iroot, NULL, grpids)); 00411 /* create corresponding new groups in ogrp, except for root group */ 00412 for(i = 1; i < numgrps; i++) { 00413 char *grpname_full; 00414 char grpname[NC_MAX_NAME]; 00415 size_t len_name; 00416 int ogid = 0, oparid = 0, iparid = 0; 00417 /* get full group name of input group */ 00418 NC_CHECK(nc_inq_grpname(grpids[i], grpname)); 00419 if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) { 00420 NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, NULL)); 00421 grpname_full = emalloc(len_name + 1); 00422 NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, grpname_full)); 00423 /* Make sure, the parent group is also wanted (root group is always wanted) */ 00424 NC_CHECK(nc_inq_parid(iroot, grpname_full, &iparid)); 00425 if (!option_grpstruct && !group_wanted(iparid, option_nlgrps, option_grpids) 00426 && iparid != iroot) { 00427 error("ERROR: trying to copy a group but not the parent: %s", grpname_full); 00428 } 00429 /* get id of parent group of corresponding group in output. 00430 * Note that this exists, because nc_inq_groups returned 00431 * grpids in preorder, so parents are always copied before 00432 * their subgroups */ 00433 NC_CHECK(nc_inq_parid(oroot, grpname_full, &oparid)); 00434 NC_CHECK(nc_inq_grpname(grpids[i], grpname)); 00435 /* define corresponding group in output */ 00436 NC_CHECK(nc_def_grp(oparid, grpname, &ogid)); 00437 free(grpname_full); 00438 } 00439 } 00440 free(grpids); 00441 } 00442 return stat; 00443 } 00444 00445 /* 00446 * Copy the user-defined types in this group (igrp) and all its 00447 * subgroups, recursively, to corresponding group in output (ogrp) 00448 */ 00449 static int 00450 copy_types(int igrp, int ogrp) 00451 { 00452 int stat = NC_NOERR; 00453 int ntypes; 00454 nc_type *types = NULL; 00455 int numgrps; 00456 int *grpids = NULL; 00457 int i; 00458 00459 NC_CHECK(nc_inq_typeids(igrp, &ntypes, NULL)); 00460 00461 if(ntypes > 0) { 00462 types = (nc_type *) emalloc(ntypes * sizeof(nc_type)); 00463 NC_CHECK(nc_inq_typeids(igrp, &ntypes, types)); 00464 for (i = 0; i < ntypes; i++) { 00465 NC_CHECK(copy_type(igrp, types[i], ogrp)); 00466 } 00467 free(types); 00468 } 00469 00470 /* Copy types from subgroups */ 00471 NC_CHECK(nc_inq_grps(igrp, &numgrps, NULL)); 00472 if(numgrps > 0) { 00473 grpids = (int *)emalloc(sizeof(int) * numgrps); 00474 NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids)); 00475 for(i = 0; i < numgrps; i++) { 00476 if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) { 00477 int ogid; 00478 /* get groupid in output corresponding to grpids[i] in 00479 * input, given parent group (or root group) ogrp in 00480 * output */ 00481 NC_CHECK(get_grpid(grpids[i], ogrp, &ogid)); 00482 NC_CHECK(copy_types(grpids[i], ogid)); 00483 } 00484 } 00485 free(grpids); 00486 } 00487 return stat; 00488 } 00489 00490 /* Copy all netCDF-4 specific variable properties such as chunking, 00491 * endianness, deflation, checksumming, fill, etc. */ 00492 static int 00493 copy_var_specials(int igrp, int varid, int ogrp, int o_varid) 00494 { 00495 int stat = NC_NOERR; 00496 { /* handle chunking parameters */ 00497 int ndims; 00498 NC_CHECK(nc_inq_varndims(igrp, varid, &ndims)); 00499 if (ndims > 0) { /* no chunking for scalar variables */ 00500 int contig = 0; 00501 size_t *chunkp = (size_t *) emalloc(ndims * sizeof(size_t)); 00502 int *dimids = (int *) emalloc(ndims * sizeof(int)); 00503 int idim; 00504 /* size of a chunk: product of dimension chunksizes and size of value */ 00505 size_t csprod = val_size(ogrp, o_varid); 00506 int is_unlimited = 0; 00507 NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, chunkp)); 00508 NC_CHECK(nc_inq_vardimid(igrp, varid, dimids)); 00509 00510 for(idim = 0; idim < ndims; idim++) { 00511 int idimid = dimids[idim]; 00512 int odimid = dimmap_odimid(idimid); 00513 size_t chunksize = chunkspec_size(idimid); 00514 if(chunksize > 0) { /* found in chunkspec */ 00515 chunkp[idim] = chunksize; 00516 } 00517 csprod *= chunkp[idim]; 00518 if(dimmap_ounlim(odimid)) 00519 is_unlimited = 1; 00520 } 00521 /* Explicitly set chunking, even if default */ 00522 /* If product of chunksizes is too small and no unlimited 00523 * dimensions used, don't chunk. Also if chunking 00524 * explicitly turned off with chunk spec, don't chunk. */ 00525 if ((csprod < option_min_chunk_bytes && !is_unlimited) || contig == 1 00526 || chunkspec_omit() == true) { 00527 NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CONTIGUOUS, NULL)); 00528 } else { 00529 NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, chunkp)); 00530 } 00531 free(dimids); 00532 free(chunkp); 00533 } 00534 } 00535 { /* handle compression parameters, copying from input, overriding 00536 * with command-line options */ 00537 int shuffle_in=0, deflate_in=0, deflate_level_in=0; 00538 int shuffle_out=0, deflate_out=0, deflate_level_out=0; 00539 if(option_deflate_level != 0) { 00540 NC_CHECK(nc_inq_var_deflate(igrp, varid, &shuffle_in, &deflate_in, &deflate_level_in)); 00541 if(option_deflate_level == -1) { /* not specified, copy input compression and shuffling */ 00542 shuffle_out = shuffle_in; 00543 deflate_out = deflate_in; 00544 deflate_level_out = deflate_level_in; 00545 } else if(option_deflate_level > 0) { /* change to specified compression, shuffling */ 00546 shuffle_out = option_shuffle_vars; 00547 deflate_out=1; 00548 deflate_level_out = option_deflate_level; 00549 } 00550 NC_CHECK(nc_def_var_deflate(ogrp, o_varid, shuffle_out, deflate_out, deflate_level_out)); 00551 } 00552 } 00553 { /* handle checksum parameters */ 00554 int fletcher32 = 0; 00555 NC_CHECK(nc_inq_var_fletcher32(igrp, varid, &fletcher32)); 00556 if(fletcher32 != 0) { 00557 NC_CHECK(nc_def_var_fletcher32(ogrp, o_varid, fletcher32)); 00558 } 00559 } 00560 { /* handle endianness */ 00561 int endianness = 0; 00562 NC_CHECK(nc_inq_var_endian(igrp, varid, &endianness)); 00563 if(endianness != NC_ENDIAN_NATIVE) { /* native is the default */ 00564 NC_CHECK(nc_def_var_endian(ogrp, o_varid, endianness)); 00565 } 00566 } 00567 return stat; 00568 } 00569 00570 /* Set output variable o_varid (in group ogrp) to use chunking 00571 * specified on command line, only called for classic format input and 00572 * netCDF-4 format output, so no existing chunk lengths to override. */ 00573 static int 00574 set_var_chunked(int ogrp, int o_varid) 00575 { 00576 int stat = NC_NOERR; 00577 int ndims; 00578 int odim; 00579 size_t chunk_threshold = CHUNK_THRESHOLD; 00580 00581 if(chunkspec_ndims() == 0) /* no chunking specified on command line */ 00582 return stat; 00583 NC_CHECK(nc_inq_varndims(ogrp, o_varid, &ndims)); 00584 00585 if (ndims > 0) { /* no chunking for scalar variables */ 00586 int chunked = 0; 00587 int *dimids = (int *) emalloc(ndims * sizeof(int)); 00588 size_t varsize; 00589 nc_type vartype; 00590 size_t value_size; 00591 int is_unlimited = 0; 00592 00593 NC_CHECK(nc_inq_vardimid (ogrp, o_varid, dimids)); 00594 NC_CHECK(nc_inq_vartype(ogrp, o_varid, &vartype)); 00595 /* from type, get size in memory needed for each value */ 00596 NC_CHECK(nc_inq_type(ogrp, vartype, NULL, &value_size)); 00597 varsize = value_size; 00598 00599 /* Determine if this variable should be chunked. A variable 00600 * should be chunked if any of its dims are in command-line 00601 * chunk spec. It will also be chunked if any of its 00602 * dims are unlimited. */ 00603 for(odim = 0; odim < ndims; odim++) { 00604 int odimid = dimids[odim]; 00605 int idimid = dimmap_idimid(odimid); /* corresponding dimid in input file */ 00606 if(dimmap_ounlim(odimid)) 00607 is_unlimited = 1; 00608 if(idimid != -1) { 00609 size_t chunksize = chunkspec_size(idimid); /* from chunkspec */ 00610 size_t dimlen; 00611 NC_CHECK(nc_inq_dimlen(ogrp, odimid, &dimlen)); 00612 if( (chunksize > 0) || dimmap_ounlim(odimid)) { 00613 chunked = 1; 00614 } 00615 varsize *= dimlen; 00616 } 00617 } 00618 /* Don't chunk small variables that don't use an unlimited 00619 * dimension. */ 00620 if(varsize < chunk_threshold && !is_unlimited) 00621 chunked = 0; 00622 00623 if(chunked) { 00624 /* Allocate chunksizes and set defaults to dimsize for any 00625 * dimensions not mentioned in chunkspec. */ 00626 size_t *chunkp = (size_t *) emalloc(ndims * sizeof(size_t)); 00627 for(odim = 0; odim < ndims; odim++) { 00628 int odimid = dimids[odim]; 00629 int idimid = dimmap_idimid(odimid); 00630 size_t chunksize = chunkspec_size(idimid); 00631 if(chunksize > 0) { 00632 chunkp[odim] = chunksize; 00633 } else { 00634 NC_CHECK(nc_inq_dimlen(ogrp, odimid, &chunkp[odim])); 00635 } 00636 } 00637 NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, chunkp)); 00638 free(chunkp); 00639 } 00640 free(dimids); 00641 } 00642 return stat; 00643 } 00644 00645 /* Set variable to compression specified on command line */ 00646 static int 00647 set_var_compressed(int ogrp, int o_varid) 00648 { 00649 int stat = NC_NOERR; 00650 if (option_deflate_level > 0) { 00651 int deflate = 1; 00652 NC_CHECK(nc_def_var_deflate(ogrp, o_varid, option_shuffle_vars, deflate, option_deflate_level)); 00653 } 00654 return stat; 00655 } 00656 00657 /* Release the variable chunk cache allocated for variable varid in 00658 * group grp. This is not necessary, but will save some memory when 00659 * processing one variable at a time. */ 00660 #ifdef UNUSED 00661 static int 00662 free_var_chunk_cache(int grp, int varid) 00663 { 00664 int stat = NC_NOERR; 00665 size_t chunk_cache_size = 1; 00666 size_t cache_nelems = 1; 00667 float cache_preemp = 0; 00668 int kind; 00669 NC_CHECK(nc_inq_format(grp, &kind)); 00670 if(kind == NC_FORMAT_NETCDF4 || kind == NC_FORMAT_NETCDF4_CLASSIC) { 00671 int contig = 1; 00672 NC_CHECK(nc_inq_var_chunking(grp, varid, &contig, NULL)); 00673 if(contig == 0) { /* chunked */ 00674 NC_CHECK(nc_set_var_chunk_cache(grp, varid, chunk_cache_size, cache_nelems, cache_preemp)); 00675 } 00676 } 00677 return stat; 00678 } 00679 #endif 00680 00681 #endif /* USE_NETCDF4 */ 00682 00683 /* Copy dimensions from group igrp to group ogrp, also associate input 00684 * dimids with output dimids (they need not match, because the input 00685 * dimensions may have been defined in a different order than we define 00686 * the output dimensions here. */ 00687 static int 00688 copy_dims(int igrp, int ogrp) 00689 { 00690 int stat = NC_NOERR; 00691 int ndims; 00692 int dgrp; 00693 #ifdef USE_NETCDF4 00694 int nunlims; 00695 int *dimids; 00696 int *unlimids; 00697 #else 00698 int unlimid; 00699 #endif /* USE_NETCDF4 */ 00700 00701 NC_CHECK(nc_inq_ndims(igrp, &ndims)); 00702 00703 #ifdef USE_NETCDF4 00704 /* In netCDF-4 files, dimids may not be sequential because they 00705 * may be defined in various groups, and we are only looking at one 00706 * group at a time. */ 00707 /* Find the dimension ids in this group, don't include parents. */ 00708 dimids = (int *) emalloc((ndims + 1) * sizeof(int)); 00709 NC_CHECK(nc_inq_dimids(igrp, NULL, dimids, 0)); 00710 /* Find the number of unlimited dimensions and get their IDs */ 00711 NC_CHECK(nc_inq_unlimdims(igrp, &nunlims, NULL)); 00712 unlimids = (int *) emalloc((nunlims + 1) * sizeof(int)); 00713 NC_CHECK(nc_inq_unlimdims(igrp, NULL, unlimids)); 00714 #else 00715 NC_CHECK(nc_inq_unlimdim(igrp, &unlimid)); 00716 #endif /* USE_NETCDF4 */ 00717 00718 /* Copy each dimension to output, including unlimited dimension(s) */ 00719 for (dgrp = 0; dgrp < ndims; dgrp++) { 00720 char name[NC_MAX_NAME]; 00721 size_t length; 00722 int i_is_unlim; 00723 int o_is_unlim; 00724 int idimid, odimid; 00725 #ifdef USE_NETCDF4 00726 int uld; 00727 #endif 00728 00729 i_is_unlim = 0; 00730 #ifdef USE_NETCDF4 00731 idimid = dimids[dgrp]; 00732 for (uld = 0; uld < nunlims; uld++) { 00733 if(idimid == unlimids[uld]) { 00734 i_is_unlim = 1; 00735 break; 00736 } 00737 } 00738 #else 00739 idimid = dgrp; 00740 if(unlimid != -1 && (idimid == unlimid)) { 00741 i_is_unlim = 1; 00742 } 00743 #endif /* USE_NETCDF4 */ 00744 00745 stat = nc_inq_dim(igrp, idimid, name, &length); 00746 if (stat == NC_EDIMSIZE && sizeof(size_t) < 8) { 00747 error("dimension \"%s\" requires 64-bit platform", name); 00748 } 00749 NC_CHECK(stat); 00750 o_is_unlim = i_is_unlim; 00751 if(i_is_unlim && !option_fix_unlimdims) { 00752 NC_CHECK(nc_def_dim(ogrp, name, NC_UNLIMITED, &odimid)); 00753 } else { 00754 NC_CHECK(nc_def_dim(ogrp, name, length, &odimid)); 00755 o_is_unlim = 0; 00756 } 00757 /* Store (idimid, odimid) mapping for later use, also whether unlimited */ 00758 dimmap_store(idimid, odimid, i_is_unlim, o_is_unlim); 00759 } 00760 #ifdef USE_NETCDF4 00761 free(dimids); 00762 free(unlimids); 00763 #endif /* USE_NETCDF4 */ 00764 return stat; 00765 } 00766 00767 /* Copy the attributes for variable ivar in group igrp to variable 00768 * ovar in group ogrp. Global (group) attributes are specified by 00769 * using the varid NC_GLOBAL */ 00770 static int 00771 copy_atts(int igrp, int ivar, int ogrp, int ovar) 00772 { 00773 int natts; 00774 int iatt; 00775 int stat = NC_NOERR; 00776 00777 NC_CHECK(nc_inq_varnatts(igrp, ivar, &natts)); 00778 00779 for(iatt = 0; iatt < natts; iatt++) { 00780 char name[NC_MAX_NAME]; 00781 NC_CHECK(nc_inq_attname(igrp, ivar, iatt, name)); 00782 NC_CHECK(nc_copy_att(igrp, ivar, name, ogrp, ovar)); 00783 } 00784 return stat; 00785 } 00786 00787 /* copy the schema for a single variable in group igrp to group ogrp */ 00788 static int 00789 copy_var(int igrp, int varid, int ogrp) 00790 { 00791 int stat = NC_NOERR; 00792 int ndims; 00793 int *idimids; /* ids of dims for input variable */ 00794 int *odimids; /* ids of dims for output variable */ 00795 char name[NC_MAX_NAME]; 00796 nc_type typeid, o_typeid; 00797 int natts; 00798 int i; 00799 int o_varid; 00800 00801 NC_CHECK(nc_inq_varndims(igrp, varid, &ndims)); 00802 idimids = (int *) emalloc((ndims + 1) * sizeof(int)); 00803 NC_CHECK(nc_inq_var(igrp, varid, name, &typeid, NULL, idimids, &natts)); 00804 o_typeid = typeid; 00805 #ifdef USE_NETCDF4 00806 if (typeid > NC_STRING) { /* user-defined type */ 00807 /* type ids in source don't necessarily correspond to same 00808 * typeids in destination, so look up destination typeid by 00809 * using type name */ 00810 char type_name[NC_MAX_NAME]; 00811 NC_CHECK(nc_inq_type(igrp, typeid, type_name, NULL)); 00812 NC_CHECK(nc_inq_typeid(ogrp, type_name, &o_typeid)); 00813 } 00814 #endif /* USE_NETCDF4 */ 00815 00816 /* get the corresponding dimids in the output file */ 00817 odimids = (int *) emalloc((ndims + 1) * sizeof(int)); 00818 for(i = 0; i < ndims; i++) { 00819 odimids[i] = dimmap_odimid(idimids[i]); 00820 if(odimids[i] == -1) { 00821 error("Oops, no dimension in output associated with input dimid %d", idimids[i]); 00822 } 00823 } 00824 00825 /* define the output variable */ 00826 NC_CHECK(nc_def_var(ogrp, name, o_typeid, ndims, odimids, &o_varid)); 00827 /* attach the variable attributes to the output variable */ 00828 NC_CHECK(copy_atts(igrp, varid, ogrp, o_varid)); 00829 #ifdef USE_NETCDF4 00830 { 00831 int inkind; 00832 int outkind; 00833 NC_CHECK(nc_inq_format(igrp, &inkind)); 00834 NC_CHECK(nc_inq_format(ogrp, &outkind)); 00835 if(outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC) { 00836 if((inkind == NC_FORMAT_NETCDF4 || inkind == NC_FORMAT_NETCDF4_CLASSIC)) { 00837 /* Copy all netCDF-4 specific variable properties such as 00838 * chunking, endianness, deflation, checksumming, fill, etc. */ 00839 NC_CHECK(copy_var_specials(igrp, varid, ogrp, o_varid)); 00840 } else { 00841 /* Set chunking if specified in command line option */ 00842 NC_CHECK(set_var_chunked(ogrp, o_varid)); 00843 /* Set compression if specified in command line option */ 00844 NC_CHECK(set_var_compressed(ogrp, o_varid)); 00845 } 00846 } 00847 } 00848 #endif /* USE_NETCDF4 */ 00849 free(idimids); 00850 free(odimids); 00851 return stat; 00852 } 00853 00854 /* copy the schema for all the variables in group igrp to group ogrp */ 00855 static int 00856 copy_vars(int igrp, int ogrp) 00857 { 00858 int stat = NC_NOERR; 00859 int nvars; 00860 int varid; 00861 00862 int iv; /* variable number */ 00863 idnode_t* vlist = 0; /* list for vars specified with -v option */ 00864 00865 /* 00866 * If any vars were specified with -v option, get list of 00867 * associated variable ids relative to this group. Assume vars 00868 * specified with syntax like "grp1/grp2/varname" or 00869 * "/grp1/grp2/varname" if they are in groups. 00870 */ 00871 vlist = newidlist(); /* list for vars specified with -v option */ 00872 for (iv=0; iv < option_nlvars; iv++) { 00873 if(nc_inq_gvarid(igrp, option_lvars[iv], &varid) == NC_NOERR) 00874 idadd(vlist, varid); 00875 } 00876 00877 NC_CHECK(nc_inq_nvars(igrp, &nvars)); 00878 for (varid = 0; varid < nvars; varid++) { 00879 if (!option_varstruct && option_nlvars > 0 && ! idmember(vlist, varid)) 00880 continue; 00881 NC_CHECK(copy_var(igrp, varid, ogrp)); 00882 } 00883 freeidlist(vlist); 00884 return stat; 00885 } 00886 00887 /* Copy the schema in a group and all its subgroups, recursively, from 00888 * group igrp in input to parent group ogrp in destination. Use 00889 * dimmap array to map input dimids to output dimids. */ 00890 static int 00891 copy_schema(int igrp, int ogrp) 00892 { 00893 int stat = NC_NOERR; 00894 int ogid; /* like igrp but in output file */ 00895 00896 /* get groupid in output corresponding to group igrp in input, 00897 * given parent group (or root group) ogrp in output */ 00898 NC_CHECK(get_grpid(igrp, ogrp, &ogid)); 00899 00900 NC_CHECK(copy_dims(igrp, ogid)); 00901 NC_CHECK(copy_atts(igrp, NC_GLOBAL, ogid, NC_GLOBAL)); 00902 NC_CHECK(copy_vars(igrp, ogid)); 00903 #ifdef USE_NETCDF4 00904 { 00905 int numgrps; 00906 int *grpids; 00907 int i; 00908 /* Copy schema from subgroups */ 00909 stat = nc_inq_grps(igrp, &numgrps, NULL); 00910 grpids = (int *)emalloc((numgrps + 1) * sizeof(int)); 00911 NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids)); 00912 00913 for(i = 0; i < numgrps; i++) { 00914 if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) { 00915 NC_CHECK(copy_schema(grpids[i], ogid)); 00916 } 00917 } 00918 free(grpids); 00919 } 00920 #endif /* USE_NETCDF4 */ 00921 return stat; 00922 } 00923 00924 /* Return number of values for a variable varid in a group igrp */ 00925 static int 00926 inq_nvals(int igrp, int varid, long long *nvalsp) { 00927 int stat = NC_NOERR; 00928 int ndims; 00929 int *dimids; 00930 int dim; 00931 long long nvals = 1; 00932 00933 NC_CHECK(nc_inq_varndims(igrp, varid, &ndims)); 00934 dimids = (int *) emalloc((ndims + 1) * sizeof(int)); 00935 NC_CHECK(nc_inq_vardimid (igrp, varid, dimids)); 00936 for(dim = 0; dim < ndims; dim++) { 00937 size_t len; 00938 NC_CHECK(nc_inq_dimlen(igrp, dimids[dim], &len)); 00939 nvals *= len; 00940 } 00941 if(nvalsp) 00942 *nvalsp = nvals; 00943 free(dimids); 00944 return stat; 00945 } 00946 00947 /* Copy data from variable varid in group igrp to corresponding group 00948 * ogrp. */ 00949 static int 00950 copy_var_data(int igrp, int varid, int ogrp) { 00951 int stat = NC_NOERR; 00952 nc_type vartype; 00953 long long nvalues; /* number of values for this variable */ 00954 size_t ntoget; /* number of values to access this iteration */ 00955 size_t value_size; /* size of a single value of this variable */ 00956 static void *buf = 0; /* buffer for the variable values */ 00957 char varname[NC_MAX_NAME]; 00958 int ovarid; 00959 size_t *start; 00960 size_t *count; 00961 nciter_t *iterp; /* opaque structure for iteration status */ 00962 int do_realloc = 0; 00963 #ifdef USE_NETCDF4 00964 int okind; 00965 size_t chunksize; 00966 #endif 00967 00968 NC_CHECK(inq_nvals(igrp, varid, &nvalues)); 00969 if(nvalues == 0) 00970 return stat; 00971 /* get corresponding output variable */ 00972 NC_CHECK(nc_inq_varname(igrp, varid, varname)); 00973 NC_CHECK(nc_inq_varid(ogrp, varname, &ovarid)); 00974 NC_CHECK(nc_inq_vartype(igrp, varid, &vartype)); 00975 value_size = val_size(igrp, varid); 00976 if(value_size > option_copy_buffer_size) { 00977 option_copy_buffer_size = value_size; 00978 do_realloc = 1; 00979 } 00980 #ifdef USE_NETCDF4 00981 NC_CHECK(nc_inq_format(ogrp, &okind)); 00982 if(okind == NC_FORMAT_NETCDF4 || okind == NC_FORMAT_NETCDF4_CLASSIC) { 00983 /* if this variable chunked, set variable chunk cache size */ 00984 int contig = 1; 00985 NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &contig, NULL)); 00986 if(contig == 0) { /* chunked */ 00987 if(option_compute_chunkcaches) { 00988 /* Try to estimate variable-specific chunk cache, 00989 * depending on specific size and shape of this 00990 * variable's chunks. This doesn't work yet. */ 00991 size_t chunkcache_size, chunkcache_nelems; 00992 float chunkcache_preemption; 00993 NC_CHECK(inq_var_chunking_params(igrp, varid, ogrp, ovarid, 00994 &chunkcache_size, 00995 &chunkcache_nelems, 00996 &chunkcache_preemption)); 00997 NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, 00998 chunkcache_size, 00999 chunkcache_nelems, 01000 chunkcache_preemption)); 01001 } else { 01002 /* by default, use same chunk cache for all chunked variables */ 01003 NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, 01004 option_chunk_cache_size, 01005 option_chunk_cache_nelems, 01006 COPY_CHUNKCACHE_PREEMPTION)); 01007 } 01008 } 01009 } 01010 /* For chunked variables, option_copy_buffer_size must also be at least as large as 01011 * size of a chunk in input, otherwise resize it. */ 01012 { 01013 NC_CHECK(inq_var_chunksize(igrp, varid, &chunksize)); 01014 if(chunksize > option_copy_buffer_size) { 01015 option_copy_buffer_size = chunksize; 01016 do_realloc = 1; 01017 } 01018 } 01019 #endif /* USE_NETCDF4 */ 01020 if(buf && do_realloc) { 01021 free(buf); 01022 buf = 0; 01023 } 01024 if(buf == 0) { /* first time or needs to grow */ 01025 buf = emalloc(option_copy_buffer_size); 01026 memset((void*)buf,0,option_copy_buffer_size); 01027 } 01028 01029 /* initialize variable iteration */ 01030 NC_CHECK(nc_get_iter(igrp, varid, option_copy_buffer_size, &iterp)); 01031 01032 start = (size_t *) emalloc((iterp->rank + 1) * sizeof(size_t)); 01033 count = (size_t *) emalloc((iterp->rank + 1) * sizeof(size_t)); 01034 /* nc_next_iter() initializes start and count on first call, 01035 * changes start and count to iterate through whole variable on 01036 * subsequent calls. */ 01037 while((ntoget = nc_next_iter(iterp, start, count)) > 0) { 01038 NC_CHECK(nc_get_vara(igrp, varid, start, count, buf)); 01039 NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf)); 01040 #ifdef USE_NETCDF4 01041 /* we have to explicitly free values for strings and vlens */ 01042 if(vartype == NC_STRING) { 01043 NC_CHECK(nc_free_string(ntoget, (char **)buf)); 01044 } else if(vartype > NC_STRING) { /* user-defined type */ 01045 nc_type vclass; 01046 NC_CHECK(nc_inq_user_type(igrp, vartype, NULL, NULL, NULL, NULL, &vclass)); 01047 if(vclass == NC_VLEN) { 01048 NC_CHECK(nc_free_vlens(ntoget, (nc_vlen_t *)buf)); 01049 } 01050 } 01051 #endif /* USE_NETCDF4 */ 01052 } /* end main iteration loop */ 01053 #ifdef USE_NETCDF4 01054 /* We're all done with this input and output variable, so if 01055 * either variable is chunked, free up its variable chunk cache */ 01056 /* NC_CHECK(free_var_chunk_cache(igrp, varid)); */ 01057 /* NC_CHECK(free_var_chunk_cache(ogrp, ovarid)); */ 01058 #endif /* USE_NETCDF4 */ 01059 free(start); 01060 free(count); 01061 NC_CHECK(nc_free_iter(iterp)); 01062 return stat; 01063 } 01064 01065 /* Copy data from variables in group igrp to variables in 01066 * corresponding group with parent ogrp, and all subgroups 01067 * recursively */ 01068 static int 01069 copy_data(int igrp, int ogrp) 01070 { 01071 int stat = NC_NOERR; 01072 int ogid; 01073 int nvars; 01074 int varid; 01075 #ifdef USE_NETCDF4 01076 int numgrps; 01077 int *grpids; 01078 int i; 01079 #endif 01080 01081 int iv; /* variable number */ 01082 idnode_t* vlist = NULL; /* list for vars specified with -v option */ 01083 01084 /* 01085 * If any vars were specified with -v option, get list of 01086 * associated variable ids relative to this group. Assume vars 01087 * specified with syntax like "grp1/grp2/varname" or 01088 * "/grp1/grp2/varname" if they are in groups. 01089 */ 01090 vlist = newidlist(); /* list for vars specified with -v option */ 01091 for (iv=0; iv < option_nlvars; iv++) { 01092 if(nc_inq_gvarid(igrp, option_lvars[iv], &varid) == NC_NOERR) 01093 idadd(vlist, varid); 01094 } 01095 01096 /* get groupid in output corresponding to group igrp in input, 01097 * given parent group (or root group) ogrp in output */ 01098 NC_CHECK(get_grpid(igrp, ogrp, &ogid)); 01099 01100 /* Copy data from this group */ 01101 NC_CHECK(nc_inq_nvars(igrp, &nvars)); 01102 01103 for (varid = 0; varid < nvars; varid++) { 01104 if (option_nlvars > 0 && ! idmember(vlist, varid)) 01105 continue; 01106 if (!group_wanted(igrp, option_nlgrps, option_grpids)) 01107 continue; 01108 NC_CHECK(copy_var_data(igrp, varid, ogid)); 01109 } 01110 #ifdef USE_NETCDF4 01111 /* Copy data from subgroups */ 01112 stat = nc_inq_grps(igrp, &numgrps, NULL); 01113 grpids = (int *)emalloc((numgrps + 1) * sizeof(int)); 01114 NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids)); 01115 01116 for(i = 0; i < numgrps; i++) { 01117 if (!option_grpstruct && !group_wanted(grpids[i], option_nlgrps, option_grpids)) 01118 continue; 01119 NC_CHECK(copy_data(grpids[i], ogid)); 01120 } 01121 free(grpids); 01122 #endif /* USE_NETCDF4 */ 01123 freeidlist(vlist); 01124 return stat; 01125 } 01126 01127 /* Count total number of dimensions in ncid and all its descendant subgroups */ 01128 int 01129 count_dims(ncid) { 01130 int numgrps; 01131 int ndims; 01132 NC_CHECK(nc_inq_ndims(ncid, &ndims)); 01133 #ifdef USE_NETCDF4 01134 NC_CHECK(nc_inq_grps(ncid, &numgrps, NULL)); 01135 if(numgrps > 0) { 01136 int igrp; 01137 int *grpids = emalloc(numgrps * sizeof(int)); 01138 NC_CHECK(nc_inq_grps(ncid, &numgrps, grpids)); 01139 for(igrp = 0; igrp < numgrps; igrp++) { 01140 ndims += count_dims(grpids[igrp]); 01141 } 01142 free(grpids); 01143 } 01144 #endif /* USE_NETCDF4 */ 01145 return ndims; 01146 } 01147 01148 /* Test if special case: netCDF-3 file with more than one record 01149 * variable. Performance can be very slow for this case when the disk 01150 * block size is large, there are many record variables, and a 01151 * record's worth of data for some variables is smaller than the disk 01152 * block size. In this case, copying the record variables a variable 01153 * at a time causes much rereading of record data, so instead we want 01154 * to copy data a record at a time. */ 01155 static int 01156 nc3_special_case(int ncid, int kind) { 01157 if (kind == NC_FORMAT_CLASSIC || kind == NC_FORMAT_64BIT) { 01158 int recdimid = 0; 01159 NC_CHECK(nc_inq_unlimdim(ncid, &recdimid)); 01160 if (recdimid != -1) { /* we have a record dimension */ 01161 int nvars; 01162 int varid; 01163 NC_CHECK(nc_inq_nvars(ncid, &nvars)); 01164 for (varid = 0; varid < nvars; varid++) { 01165 int *dimids = 0; 01166 int ndims; 01167 NC_CHECK( nc_inq_varndims(ncid, varid, &ndims) ); 01168 if (ndims > 0) { 01169 int dimids0; 01170 dimids = (int *) emalloc((ndims + 1) * sizeof(int)); 01171 NC_CHECK( nc_inq_vardimid(ncid, varid, dimids) ); 01172 dimids0 = dimids[0]; 01173 free(dimids); 01174 if(dimids0 == recdimid) { 01175 return 1; /* found a record variable */ 01176 } 01177 } 01178 } 01179 } 01180 } 01181 return 0; 01182 } 01183 01184 /* Classify variables in ncid as either fixed-size variables (with no 01185 * unlimited dimension) or as record variables (with an unlimited 01186 * dimension) */ 01187 static int 01188 classify_vars( 01189 int ncid, /* netCDF ID */ 01190 size_t *nf, /* for returning number of fixed-size variables */ 01191 int **fvars, /* the array of fixed_size variable IDS, caller should free */ 01192 size_t *nr, /* for returning number of record variables */ 01193 int **rvars) /* the array of record variable IDs, caller should free */ 01194 { 01195 int varid; 01196 int nvars; 01197 NC_CHECK(nc_inq_nvars(ncid, &nvars)); 01198 *nf = 0; 01199 *fvars = (int *) emalloc(nvars * sizeof(int)); 01200 *nr = 0; 01201 *rvars = (int *) emalloc(nvars * sizeof(int)); 01202 for (varid = 0; varid < nvars; varid++) { 01203 if (isrecvar(ncid, varid)) { 01204 (*rvars)[*nr] = varid; 01205 (*nr)++; 01206 } else { 01207 (*fvars)[*nf] = varid; 01208 (*nf)++; 01209 } 01210 } 01211 return NC_NOERR; 01212 } 01213 01214 /* Only called for classic format or 64-bit offset format files, to speed up special case */ 01215 static int 01216 copy_fixed_size_data(int igrp, int ogrp, size_t nfixed_vars, int *fixed_varids) { 01217 size_t ivar; 01218 /* for each fixed-size variable, copy data */ 01219 for (ivar = 0; ivar < nfixed_vars; ivar++) { 01220 int varid = fixed_varids[ivar]; 01221 NC_CHECK(copy_var_data(igrp, varid, ogrp)); 01222 } 01223 if (fixed_varids) 01224 free(fixed_varids); 01225 return NC_NOERR; 01226 } 01227 01228 /* copy a record's worth of data for a variable from input to output */ 01229 static int 01230 copy_rec_var_data(int ncid, /* input */ 01231 int ogrp, /* output */ 01232 int irec, /* record number */ 01233 int varid, /* input variable id */ 01234 int ovarid, /* output variable id */ 01235 size_t *start, /* start indices for record data */ 01236 size_t *count, /* edge lengths for record data */ 01237 void *buf /* buffer large enough to hold data */ 01238 ) 01239 { 01240 NC_CHECK(nc_get_vara(ncid, varid, start, count, buf)); 01241 NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf)); 01242 return NC_NOERR; 01243 } 01244 01245 /* Only called for classic format or 64-bit offset format files, to speed up special case */ 01246 static int 01247 copy_record_data(int ncid, int ogrp, size_t nrec_vars, int *rec_varids) { 01248 int unlimid; 01249 size_t nrecs = 0; /* how many records? */ 01250 size_t irec; 01251 size_t ivar; 01252 void **buf; /* space for reading in data for each variable */ 01253 int *rec_ovarids; /* corresponding varids in output */ 01254 size_t **start; 01255 size_t **count; 01256 NC_CHECK(nc_inq_unlimdim(ncid, &unlimid)); 01257 NC_CHECK(nc_inq_dimlen(ncid, unlimid, &nrecs)); 01258 buf = (void **) emalloc(nrec_vars * sizeof(void *)); 01259 rec_ovarids = (int *) emalloc(nrec_vars * sizeof(int)); 01260 start = (size_t **) emalloc(nrec_vars * sizeof(size_t*)); 01261 count = (size_t **) emalloc(nrec_vars * sizeof(size_t*)); 01262 /* get space to hold one record's worth of data for each record variable */ 01263 for (ivar = 0; ivar < nrec_vars; ivar++) { 01264 int varid; 01265 int ndims; 01266 int *dimids; 01267 size_t value_size; 01268 int dimid; 01269 int ii; 01270 size_t nvals; 01271 char varname[NC_MAX_NAME]; 01272 varid = rec_varids[ivar]; 01273 NC_CHECK(nc_inq_varndims(ncid, varid, &ndims)); 01274 dimids = (int *) emalloc((1 + ndims) * sizeof(int)); 01275 start[ivar] = (size_t *) emalloc(ndims * sizeof(size_t)); 01276 count[ivar] = (size_t *) emalloc(ndims * sizeof(size_t)); 01277 NC_CHECK(nc_inq_vardimid (ncid, varid, dimids)); 01278 value_size = val_size(ncid, varid); 01279 nvals = 1; 01280 for(ii = 1; ii < ndims; ii++) { /* for rec size, don't include first record dimension */ 01281 size_t dimlen; 01282 dimid = dimids[ii]; 01283 NC_CHECK(nc_inq_dimlen(ncid, dimid, &dimlen)); 01284 nvals *= dimlen; 01285 start[ivar][ii] = 0; 01286 count[ivar][ii] = dimlen; 01287 } 01288 start[ivar][0] = 0; 01289 count[ivar][0] = 1; /* 1 record */ 01290 buf[ivar] = (void *) emalloc(nvals * value_size); 01291 NC_CHECK(nc_inq_varname(ncid, varid, varname)); 01292 NC_CHECK(nc_inq_varid(ogrp, varname, &rec_ovarids[ivar])); 01293 if(dimids) 01294 free(dimids); 01295 } 01296 01297 /* for each record, copy all variable data */ 01298 for(irec = 0; irec < nrecs; irec++) { 01299 for (ivar = 0; ivar < nrec_vars; ivar++) { 01300 int varid, ovarid; 01301 varid = rec_varids[ivar]; 01302 ovarid = rec_ovarids[ivar]; 01303 start[ivar][0] = irec; 01304 NC_CHECK(copy_rec_var_data(ncid, ogrp, irec, varid, ovarid, 01305 start[ivar], count[ivar], buf[ivar])); 01306 } 01307 } 01308 for (ivar = 0; ivar < nrec_vars; ivar++) { 01309 if(start[ivar]) 01310 free(start[ivar]); 01311 if(count[ivar]) 01312 free(count[ivar]); 01313 } 01314 if(start) 01315 free(start); 01316 if(count) 01317 free(count); 01318 for (ivar = 0; ivar < nrec_vars; ivar++) { 01319 if(buf[ivar]) { 01320 free(buf[ivar]); 01321 } 01322 } 01323 if (rec_varids) 01324 free(rec_varids); 01325 if(buf) 01326 free(buf); 01327 if(rec_ovarids) 01328 free(rec_ovarids); 01329 return NC_NOERR; 01330 } 01331 01332 /* copy infile to outfile using netCDF API 01333 */ 01334 static int 01335 copy(char* infile, char* outfile) 01336 { 01337 int stat = NC_NOERR; 01338 int igrp, ogrp; 01339 int inkind, outkind; 01340 int open_mode = NC_NOWRITE; 01341 int create_mode = NC_CLOBBER; 01342 size_t ndims; 01343 01344 if(option_read_diskless) { 01345 open_mode |= NC_DISKLESS; 01346 } 01347 01348 NC_CHECK(nc_open(infile, open_mode, &igrp)); 01349 01350 NC_CHECK(nc_inq_format(igrp, &inkind)); 01351 01352 /* option_kind specifies which netCDF format for output: 01353 * -1 -> same as input, 01354 * 1 -> classic 01355 * 2 -> 64-bit offset 01356 * 3 -> netCDF-4, 01357 * 4 -> netCDF-4 classic model 01358 * 01359 * However, if compression or shuffling was specified and kind was -1, 01360 * kind is changed to format 4 that supports compression for input of 01361 * type 1 or 2. 01362 */ 01363 outkind = option_kind; 01364 if (option_kind == SAME_AS_INPUT) { /* default, kind not specified */ 01365 outkind = inkind; 01366 /* Deduce output kind if netCDF-4 features requested */ 01367 if (inkind == NC_FORMAT_CLASSIC || inkind == NC_FORMAT_64BIT) { 01368 if (option_deflate_level > 0 || 01369 option_shuffle_vars == NC_SHUFFLE || 01370 option_chunkspec) 01371 { 01372 outkind = NC_FORMAT_NETCDF4_CLASSIC; 01373 } 01374 } 01375 } 01376 01377 #ifdef USE_NETCDF4 01378 if(option_chunkspec) { 01379 /* Now that input is open, can parse option_chunkspec into binary 01380 * structure. */ 01381 NC_CHECK(chunkspec_parse(igrp, option_chunkspec)); 01382 } 01383 #endif /* USE_NETCDF4 */ 01384 01385 /* Check if any vars in -v don't exist */ 01386 if(missing_vars(igrp, option_nlvars, option_lvars)) 01387 exit(EXIT_FAILURE); 01388 01389 if(option_nlgrps > 0) { 01390 if(inkind != NC_FORMAT_NETCDF4) { 01391 error("Group list (-g ...) only permitted for netCDF-4 file"); 01392 exit(EXIT_FAILURE); 01393 } 01394 /* Check if any grps in -g don't exist */ 01395 if(grp_matches(igrp, option_nlgrps, option_lgrps, option_grpids) == 0) 01396 exit(EXIT_FAILURE); 01397 } 01398 01399 if(option_write_diskless) 01400 create_mode |= NC_WRITE | NC_DISKLESS; /* NC_WRITE persists diskless file on close */ 01401 switch(outkind) { 01402 case NC_FORMAT_CLASSIC: 01403 /* nothing to do */ 01404 break; 01405 case NC_FORMAT_64BIT: 01406 create_mode |= NC_64BIT_OFFSET; 01407 break; 01408 #ifdef USE_NETCDF4 01409 case NC_FORMAT_NETCDF4: 01410 create_mode |= NC_NETCDF4; 01411 break; 01412 case NC_FORMAT_NETCDF4_CLASSIC: 01413 create_mode |= NC_NETCDF4 | NC_CLASSIC_MODEL; 01414 break; 01415 #else 01416 case NC_FORMAT_NETCDF4: 01417 case NC_FORMAT_NETCDF4_CLASSIC: 01418 error("nccopy built with --disable-netcdf4, can't create netCDF-4 files"); 01419 break; 01420 #endif /* USE_NETCDF4 */ 01421 default: 01422 error("bad value (%d) for -k option\n", option_kind); 01423 break; 01424 } 01425 NC_CHECK(nc_create(outfile, create_mode, &ogrp)); 01426 NC_CHECK(nc_set_fill(ogrp, NC_NOFILL, NULL)); 01427 01428 #ifdef USE_NETCDF4 01429 /* Because types in one group may depend on types in a different 01430 * group, need to create all groups before defining types */ 01431 if(inkind == NC_FORMAT_NETCDF4) { 01432 NC_CHECK(copy_groups(igrp, ogrp)); 01433 NC_CHECK(copy_types(igrp, ogrp)); 01434 } 01435 #endif /* USE_NETCDF4 */ 01436 01437 ndims = count_dims(igrp); 01438 NC_CHECK(dimmap_init(ndims)); 01439 NC_CHECK(copy_schema(igrp, ogrp)); 01440 NC_CHECK(nc_enddef(ogrp)); 01441 01442 /* For performance, special case netCDF-3 input or output file with record 01443 * variables, to copy a record-at-a-time instead of a 01444 * variable-at-a-time. */ 01445 /* TODO: check that these special cases work with -v option */ 01446 if(nc3_special_case(igrp, inkind)) { 01447 size_t nfixed_vars, nrec_vars; 01448 int *fixed_varids; 01449 int *rec_varids; 01450 NC_CHECK(classify_vars(igrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids)); 01451 NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids)); 01452 NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids)); 01453 } else if (nc3_special_case(ogrp, outkind)) { 01454 size_t nfixed_vars, nrec_vars; 01455 int *fixed_varids; 01456 int *rec_varids; 01457 /* classifies output vars, but returns input varids */ 01458 NC_CHECK(classify_vars(ogrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids)); 01459 NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids)); 01460 NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids)); 01461 } else { 01462 NC_CHECK(copy_data(igrp, ogrp)); /* recursive, to handle nested groups */ 01463 } 01464 01465 NC_CHECK(nc_close(igrp)); 01466 NC_CHECK(nc_close(ogrp)); 01467 return stat; 01468 } 01469 01470 /* 01471 * For non-negative numeric string with multiplier suffix K, M, G, T, 01472 * or P (or lower-case equivalent), return corresponding value 01473 * incorporating multiplier 1000, 1000000, 1.0d9, ... 1.0d15, or -1.0 01474 * for error. 01475 */ 01476 static double 01477 double_with_suffix(char *str) { 01478 double dval; 01479 char *suffix = 0; 01480 errno = 0; 01481 dval = strtod(str, &suffix); 01482 if(dval < 0 || errno != 0) 01483 return -1.0; 01484 if(*suffix) { 01485 switch (*suffix) { 01486 case 'k': case 'K': 01487 dval *= 1000; 01488 break; 01489 case 'm': case 'M': 01490 dval *= 1000000; 01491 break; 01492 case 'g': case 'G': 01493 dval *= 1000000000; 01494 break; 01495 case 't': case 'T': 01496 dval *= 1.0e12; 01497 break; 01498 case 'p': case 'P': 01499 dval *= 1.0e15; 01500 break; 01501 default: 01502 dval = -1.0; /* error, suffix multiplier must be K, M, G, or T */ 01503 } 01504 } 01505 return dval; 01506 } 01507 01508 static void 01509 usage(void) 01510 { 01511 #define USAGE "\ 01512 [-k n] specify kind of netCDF format for output file, default same as input\n\ 01513 1 classic, 2 64-bit offset, 3 netCDF-4, 4 netCDF-4 classic model\n\ 01514 [-d n] set deflation compression level, default same as input (0=none 9=max)\n\ 01515 [-s] add shuffle option to deflation compression\n\ 01516 [-c chunkspec] specify chunking for dimensions, e.g. \"dim1/N1,dim2/N2,...\"\n\ 01517 [-u] convert unlimited dimensions to fixed-size dimensions in output copy\n\ 01518 [-w] write whole output file from diskless netCDF on close\n\ 01519 [-v var1,...] include data for only listed variables, but definitions for all variables\n\ 01520 [-V var1,...] include definitions and data for only listed variables\n\ 01521 [-g grp1,...] include data for only variables in listed groups, but all definitions\n\ 01522 [-G grp1,...] include definitions and data only for variables in listed groups\n\ 01523 [-m n] set size in bytes of copy buffer, default is 5000000 bytes\n\ 01524 [-h n] set size in bytes of chunk_cache for chunked variables\n\ 01525 [-e n] set number of elements that chunk_cache can hold\n\ 01526 [-r] read whole input file into diskless file on open (classic or 64-bit offset format only)\n\ 01527 infile name of netCDF input file\n\ 01528 outfile name for netCDF output file\n" 01529 01530 /* Don't document this flaky option until it works better */ 01531 /* [-x] use experimental computed estimates for variable-specific chunk caches\n\ */ 01532 01533 error("%s [-k n] [-d n] [-s] [-c chunkspec] [-u] [-w] [-[v|V] varlist] [-[g|G] grplist] [-m n] [-h n] [-e n] [-r] infile outfile\n%s", 01534 progname, USAGE); 01535 } 01536 01537 int 01538 main(int argc, char**argv) 01539 { 01540 char* inputfile = NULL; 01541 char* outputfile = NULL; 01542 int c; 01543 01544 /* table of formats for legal -k values */ 01545 struct Kvalues { 01546 char* name; 01547 int kind; 01548 } legalkinds[] = { 01549 {"1", NC_FORMAT_CLASSIC}, 01550 {"classic", NC_FORMAT_CLASSIC}, 01551 01552 /* The 64-bit offset kind (2) */ 01553 {"2", NC_FORMAT_64BIT}, 01554 {"64-bit-offset", NC_FORMAT_64BIT}, 01555 {"64-bit offset", NC_FORMAT_64BIT}, 01556 01557 /* NetCDF-4 HDF5 format */ 01558 {"3", NC_FORMAT_NETCDF4}, 01559 {"hdf5", NC_FORMAT_NETCDF4}, 01560 {"netCDF-4", NC_FORMAT_NETCDF4}, 01561 {"netCDF4", NC_FORMAT_NETCDF4}, 01562 {"enhanced", NC_FORMAT_NETCDF4}, 01563 01564 /* NetCDF-4 HDF5 format, but using only nc3 data model */ 01565 {"4", NC_FORMAT_NETCDF4_CLASSIC}, 01566 {"hdf5-nc3", NC_FORMAT_NETCDF4_CLASSIC}, 01567 {"netCDF-4 classic model", NC_FORMAT_NETCDF4_CLASSIC}, 01568 {"netCDF4_classic", NC_FORMAT_NETCDF4_CLASSIC}, 01569 {"enhanced-nc3", NC_FORMAT_NETCDF4_CLASSIC}, 01570 01571 /* null terminate*/ 01572 {NULL,0} 01573 }; 01574 01575 opterr = 1; 01576 progname = argv[0]; 01577 01578 if (argc <= 1) 01579 { 01580 usage(); 01581 } 01582 01583 while ((c = getopt(argc, argv, "k:d:sum:c:h:e:rwxg:G:v:V:")) != -1) { 01584 switch(c) { 01585 case 'k': /* for specifying variant of netCDF format to be generated 01586 Possible values are: 01587 1 (=> classic 32 bit) 01588 2 (=> classic 64 bit offsets) 01589 3 (=> netCDF-4/HDF5) 01590 4 (=> classic, but stored in netCDF-4/HDF5 format) 01591 Also allow string versions of above 01592 "classic" 01593 "64-bit-offset" 01594 "64-bit offset" 01595 "enhanced" | "hdf5" | "netCDF-4" 01596 "enhanced-nc3" | "hdf5-nc3" | "netCDF-4 classic model" 01597 */ 01598 { 01599 struct Kvalues* kvalue; 01600 char *kind_name = (char *) emalloc(strlen(optarg)+1); 01601 (void)strcpy(kind_name, optarg); 01602 for(kvalue=legalkinds;kvalue->name;kvalue++) { 01603 if(strcmp(kind_name,kvalue->name) == 0) { 01604 option_kind = kvalue->kind; 01605 break; 01606 } 01607 } 01608 if(kvalue->name == NULL) { 01609 error("invalid format: %s", kind_name); 01610 } 01611 } 01612 break; 01613 case 'd': /* non-default compression level specified */ 01614 option_deflate_level = strtol(optarg, NULL, 10); 01615 if(option_deflate_level < 0 || option_deflate_level > 9) { 01616 error("invalid deflation level: %d", option_deflate_level); 01617 } 01618 break; 01619 case 's': /* shuffling, may improve compression */ 01620 option_shuffle_vars = NC_SHUFFLE; 01621 break; 01622 case 'u': /* convert unlimited dimensions to fixed size */ 01623 option_fix_unlimdims = 1; 01624 break; 01625 case 'm': /* non-default size of data copy buffer */ 01626 { 01627 double dval = double_with_suffix(optarg); /* "K" for kilobytes. "M" for megabytes, ... */ 01628 if(dval < 0) 01629 error("Suffix used for '-m' option value must be K, M, G, T, or P"); 01630 option_copy_buffer_size = dval; 01631 break; 01632 } 01633 case 'h': /* non-default size of chunk cache */ 01634 { 01635 double dval = double_with_suffix(optarg); /* "K" for kilobytes. "M" for megabytes, ... */ 01636 if(dval < 0) 01637 error("Suffix used for '-h' option value must be K, M, G, T, or P"); 01638 option_chunk_cache_size = dval; 01639 break; 01640 } 01641 case 'e': /* number of elements chunk cache can hold */ 01642 { 01643 double dval = double_with_suffix(optarg); /* "K" for kilobytes. "M" for megabytes, ... */ 01644 if(dval < 0 ) 01645 error("Suffix used for '-e' option value must be K, M, G, T, or P"); 01646 option_chunk_cache_nelems = (long)dval; 01647 break; 01648 } 01649 case 'r': 01650 option_read_diskless = 1; /* read into memory on open */ 01651 break; 01652 case 'w': 01653 option_write_diskless = 1; /* write to memory, persist on close */ 01654 break; 01655 case 'x': /* use experimental variable-specific chunk caches */ 01656 option_compute_chunkcaches = 1; 01657 break; 01658 case 'c': /* optional chunking spec for each dimension in list */ 01659 /* save chunkspec string for parsing later, once we know input ncid */ 01660 option_chunkspec = strdup(optarg); 01661 break; 01662 case 'g': /* group names */ 01663 /* make list of names of groups specified */ 01664 make_lgrps (optarg, &option_nlgrps, &option_lgrps, &option_grpids); 01665 option_grpstruct = true; 01666 break; 01667 case 'G': /* group names */ 01668 /* make list of names of groups specified */ 01669 make_lgrps (optarg, &option_nlgrps, &option_lgrps, &option_grpids); 01670 option_grpstruct = false; 01671 break; 01672 case 'v': /* variable names */ 01673 /* make list of names of variables specified */ 01674 make_lvars (optarg, &option_nlvars, &option_lvars); 01675 option_varstruct = true; 01676 break; 01677 case 'V': /* variable names */ 01678 /* make list of names of variables specified */ 01679 make_lvars (optarg, &option_nlvars, &option_lvars); 01680 option_varstruct = false; 01681 break; 01682 default: 01683 usage(); 01684 } 01685 } 01686 argc -= optind; 01687 argv += optind; 01688 01689 if (argc != 2) { 01690 error("one input file and one output file required"); 01691 } 01692 inputfile = argv[0]; 01693 outputfile = argv[1]; 01694 01695 if(strcmp(inputfile, outputfile) == 0) { 01696 error("output would overwrite input"); 01697 } 01698 01699 if(copy(inputfile, outputfile) != NC_NOERR) 01700 exit(EXIT_FAILURE); 01701 exit(EXIT_SUCCESS); 01702 } 01703 END_OF_MAIN();