NetCDF  4.3.2
nccopy.c
00001 /*********************************************************************
00002  *   Copyright 2010, University Corporation for Atmospheric Research
00003  *   See netcdf/README file for copying and redistribution conditions.
00004  *   Thanks to Philippe Poilbarbe and Antonio S. CofiƱo for 
00005  *   compression additions.
00006  *   $Id: nccopy.c 400 2010-08-27 21:02:52Z russ $
00007  *********************************************************************/
00008 
00009 #include "config.h"             /* for USE_NETCDF4 macro */
00010 #include <stdlib.h>
00011 #ifdef HAVE_GETOPT_H
00012 #include <getopt.h>
00013 #endif
00014 #ifdef HAVE_UNISTD_H
00015 #include <unistd.h>
00016 #endif
00017 #include <string.h>
00018 #include <netcdf.h>
00019 #include "nciter.h"
00020 #include "utils.h"
00021 #include "chunkspec.h"
00022 #include "dimmap.h"
00023 #include "nccomps.h"
00024 
00025 #ifdef _MSC_VER
00026 #include "XGetopt.h"
00027 #define snprintf _snprintf
00028 int opterr;
00029 int optind;
00030 #endif
00031 
00032 /* default bytes of memory we are willing to allocate for variable
00033  * values during copy */
00034 #define COPY_BUFFER_SIZE (5000000)
00035 #define COPY_CHUNKCACHE_PREEMPTION (1.0f) /* for copying, can eject fully read chunks */
00036 #define SAME_AS_INPUT (-1)      /* default, if kind not specified */
00037 #define CHUNK_THRESHOLD (8192)  /* non-record variables with fewer bytes don't get chunked */
00038 
00039 #ifndef USE_NETCDF4
00040 #define NC_CLASSIC_MODEL 0x0100 /* Enforce classic model if netCDF-4 not available. */
00041 #endif
00042 
00043 /* Global variables for command-line requests */
00044 char *progname;        /* for error messages */
00045 static int option_kind = SAME_AS_INPUT;
00046 static int option_deflate_level = -1;   /* default, compress output only if input compressed */
00047 static int option_shuffle_vars = NC_NOSHUFFLE; /* default, no shuffling on compression */
00048 static int option_fix_unlimdims = 0; /* default, preserve unlimited dimensions */
00049 static char* option_chunkspec = 0;   /* default, no chunk specification */
00050 static size_t option_copy_buffer_size = COPY_BUFFER_SIZE;
00051 static size_t option_chunk_cache_size = CHUNK_CACHE_SIZE; /* default from config.h */
00052 static size_t option_chunk_cache_nelems = CHUNK_CACHE_NELEMS; /* default from config.h */
00053 static int option_read_diskless = 0; /* default, don't read input into memory on open */
00054 static int option_write_diskless = 0; /* default, don't write output to diskless file */
00055 static int option_min_chunk_bytes = CHUNK_THRESHOLD; /* default, don't chunk variable if prod of
00056                                                       * chunksizes of its dimensions is smaller
00057                                                       * than this */
00058 static int option_nlgrps = 0;               /* Number of groups specified with -g
00059                                              * option on command line */
00060 static char** option_lgrps = 0;             /* list of group names specified with -g
00061                                              * option on command line */
00062 static idnode_t* option_grpids = 0; /* list of grpids matching list specified with -g option */
00063 static bool_t option_grpstruct = false; /* if -g set, copy structure for non-selected groups */
00064 static int option_nlvars = 0; /* Number of variables specified with -v * option on command line */
00065 static char** option_lvars = 0;         /* list of variable names specified with -v
00066                                          * option on command line */
00067 static bool_t option_varstruct = false;   /* if -v set, copy structure for non-selected vars */
00068 static int option_compute_chunkcaches = 0; /* default, don't try still flaky estimate of
00069                                             * chunk cache for each variable */
00070 
00071 /* get group id in output corresponding to group igrp in input,
00072  * given parent group id (or root group id) parid in output. */
00073 static int
00074 get_grpid(int igrp, int parid, int *ogrpp) {
00075     int stat = NC_NOERR;
00076     int ogid = parid;           /* like igrp but in output file */
00077 #ifdef USE_NETCDF4
00078     int inparid;
00079 
00080     /* if not root group, get corresponding output groupid from group name */
00081     stat = nc_inq_grp_parent(igrp, &inparid);
00082     if(stat == NC_NOERR) {      /* not root group */
00083         char grpname[NC_MAX_NAME + 1];
00084         NC_CHECK(nc_inq_grpname(igrp, grpname));
00085         NC_CHECK(nc_inq_grp_ncid(parid, grpname, &ogid));
00086     } else if(stat == NC_ENOGRP) { /* root group */
00087         stat = NC_NOERR;
00088     } else {
00089         NC_CHECK(stat);
00090     }
00091 #endif  /* USE_NETCDF4 */
00092     *ogrpp = ogid;
00093     return stat;
00094 }
00095 
00096 /* Return size in bytes of a variable value */
00097 static size_t
00098 val_size(int grpid, int varid) {
00099     nc_type vartype;
00100     size_t value_size;
00101     NC_CHECK(nc_inq_vartype(grpid, varid, &vartype));
00102     NC_CHECK(nc_inq_type(grpid, vartype, NULL, &value_size));
00103     return value_size;
00104 }
00105 
00106 #ifdef USE_NETCDF4
00107 /* Get parent id needed to define a new group from its full name in an
00108  * open file identified by ncid.  Assumes all intermediate groups are
00109  * already defined.  */
00110 static int
00111 nc_inq_parid(int ncid, const char *fullname, int *locidp) {
00112     char *parent = strdup(fullname);
00113     char *slash = "/";          /* groupname separator */
00114     char *last_slash;
00115     if(parent == NULL) {
00116         return NC_ENOMEM;       /* exits */
00117     }
00118     last_slash = strrchr(parent, '/');
00119     if(last_slash == parent || last_slash == NULL) {    /* parent is root */
00120         free(parent);
00121         parent = strdup(slash);
00122     } else {
00123         *last_slash = '\0';     /* truncate to get parent name */
00124     }
00125     NC_CHECK(nc_inq_grp_full_ncid(ncid, parent, locidp));
00126        free(parent);
00127     return NC_NOERR;
00128 }
00129 
00130 /* Return size of chunk in bytes for a variable varid in a group igrp, or 0 if
00131  * layout is contiguous */
00132 static int
00133 inq_var_chunksize(int igrp, int varid, size_t* chunksizep) {
00134     int stat = NC_NOERR;
00135     int ndims;
00136     size_t *chunksizes;
00137     int dim;
00138     int contig = 1;
00139     nc_type vartype;
00140     size_t value_size;
00141     size_t prod;
00142 
00143     NC_CHECK(nc_inq_vartype(igrp, varid, &vartype));
00144     /* from type, get size in memory needed for each value */
00145     NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size));
00146     prod = value_size;
00147     NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
00148     chunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t));
00149     if(ndims > 0) {
00150         NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, NULL));
00151     }
00152     if(contig == 1) {
00153         *chunksizep = 0;
00154     } else {
00155         NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, chunksizes));
00156         for(dim = 0; dim < ndims; dim++) {
00157             prod *= chunksizes[dim];
00158         }
00159         *chunksizep = prod;
00160     }
00161     free(chunksizes);
00162     return stat;
00163 }
00164 
00165 /* Return estimated number of elems required in chunk cache and
00166  * estimated size of chunk cache adequate to efficiently copy input
00167  * variable ivarid to output variable ovarid, which may have different
00168  * chunk size and shape */
00169 static int
00170 inq_var_chunking_params(int igrp, int ivarid, int ogrp, int ovarid,
00171                         size_t* chunkcache_sizep,
00172                         size_t *chunkcache_nelemsp,
00173                         float * chunkcache_preemptionp)
00174 {
00175     int stat = NC_NOERR;
00176     int ndims;
00177     size_t *ichunksizes, *ochunksizes;
00178     int dim;
00179     int icontig = 1, ocontig = 1;
00180     nc_type vartype;
00181     size_t value_size;
00182     size_t prod, iprod, oprod;
00183     size_t nelems;
00184     *chunkcache_nelemsp = CHUNK_CACHE_NELEMS;
00185     *chunkcache_sizep = CHUNK_CACHE_SIZE;
00186     *chunkcache_preemptionp = COPY_CHUNKCACHE_PREEMPTION;
00187 
00188     NC_CHECK(nc_inq_varndims(igrp, ivarid, &ndims));
00189     if(ndims > 0) {
00190         NC_CHECK(nc_inq_var_chunking(igrp, ivarid, &icontig, NULL));
00191         NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &ocontig, NULL));
00192     }
00193     if(icontig == 1 && ocontig == 1) { /* no chunking in input or output */
00194         *chunkcache_nelemsp = 0;
00195         *chunkcache_sizep = 0;
00196         *chunkcache_preemptionp = 0;
00197         return stat;
00198     }
00199 
00200     NC_CHECK(nc_inq_vartype(igrp, ivarid, &vartype));
00201     NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size));
00202     iprod = value_size;
00203 
00204     if(icontig == 0 && ocontig == 1) { /* chunking only in input */
00205         *chunkcache_nelemsp = 1;       /* read one input chunk at a time */
00206         *chunkcache_sizep = iprod;
00207         *chunkcache_preemptionp = 1.0f;
00208         return stat;
00209     }
00210 
00211     ichunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t));
00212     if(icontig == 1) { /* if input contiguous, treat as if chunked on
00213                         * first dimension */
00214         ichunksizes[0] = 1;
00215         for(dim = 1; dim < ndims; dim++) {
00216             ichunksizes[dim] = dim;
00217         }
00218     } else {
00219         NC_CHECK(nc_inq_var_chunking(igrp, ivarid, &icontig, ichunksizes));
00220     }
00221 
00222     /* now can assume chunking in both input and output */
00223     ochunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t));
00224     NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &ocontig, ochunksizes));
00225 
00226     nelems = 1;
00227     oprod = value_size;
00228     for(dim = 0; dim < ndims; dim++) {
00229         nelems += 1 + (ichunksizes[dim] - 1) / ochunksizes[dim];
00230         iprod *= ichunksizes[dim];
00231         oprod *= ochunksizes[dim];
00232     }
00233     prod = iprod + oprod * (nelems - 1);
00234     *chunkcache_nelemsp = nelems;
00235     *chunkcache_sizep = prod;
00236     free(ichunksizes);
00237     free(ochunksizes);
00238     return stat;
00239 }
00240 
00241 /* Forward declaration, because copy_type, copy_vlen_type call each other */
00242 static int copy_type(int igrp, nc_type typeid, int ogrp);
00243 
00244 /* 
00245  * copy a user-defined variable length type in the group igrp to the
00246  * group ogrp
00247  */
00248 static int
00249 copy_vlen_type(int igrp, nc_type itype, int ogrp)
00250 {
00251     int stat = NC_NOERR; 
00252     nc_type ibasetype;
00253     nc_type obasetype;          /* base type in target group */
00254     char name[NC_MAX_NAME];
00255     size_t size;
00256     char basename[NC_MAX_NAME];
00257     size_t basesize;
00258     nc_type vlen_type;
00259 
00260     NC_CHECK(nc_inq_vlen(igrp, itype, name, &size, &ibasetype));
00261     /* to get base type id in target group, use name of base type in
00262      * source group */
00263     NC_CHECK(nc_inq_type(igrp, ibasetype, basename, &basesize));
00264     stat = nc_inq_typeid(ogrp, basename, &obasetype);
00265     /* if no such type, create it now */
00266     if(stat == NC_EBADTYPE) {
00267         NC_CHECK(copy_type(igrp, ibasetype, ogrp));
00268         stat = nc_inq_typeid(ogrp, basename, &obasetype);
00269     }
00270     NC_CHECK(stat);
00271 
00272     /* Now we know base type exists in output and we know its type id */
00273     NC_CHECK(nc_def_vlen(ogrp, name, obasetype, &vlen_type));
00274 
00275     return stat;
00276 }
00277 
00278 /* 
00279  * copy a user-defined opaque type in the group igrp to the group ogrp
00280  */
00281 static int
00282 copy_opaque_type(int igrp, nc_type itype, int ogrp)
00283 {
00284     int stat = NC_NOERR; 
00285     nc_type otype;
00286     char name[NC_MAX_NAME];
00287     size_t size;
00288 
00289     NC_CHECK(nc_inq_opaque(igrp, itype, name, &size));
00290     NC_CHECK(nc_def_opaque(ogrp, size, name, &otype));
00291 
00292     return stat;
00293 }
00294 
00295 /* 
00296  * copy a user-defined enum type in the group igrp to the group ogrp
00297  */
00298 static int
00299 copy_enum_type(int igrp, nc_type itype, int ogrp)
00300 {
00301     int stat = NC_NOERR; 
00302     nc_type otype;
00303     nc_type basetype;
00304     size_t basesize;
00305     size_t nmembers;
00306     char name[NC_MAX_NAME];
00307     int i;
00308 
00309     NC_CHECK(nc_inq_enum(igrp, itype, name, &basetype, &basesize, &nmembers));
00310     NC_CHECK(nc_def_enum(ogrp, basetype, name, &otype));
00311     for(i = 0; i < nmembers; i++) { /* insert enum members */
00312         char ename[NC_MAX_NAME];
00313         long long val;          /* large enough to hold any integer type */
00314         NC_CHECK(nc_inq_enum_member(igrp, itype, i, ename, &val));
00315         NC_CHECK(nc_insert_enum(ogrp, otype, ename, &val));
00316     }
00317     return stat;
00318 }
00319 
00320 /* 
00321  * copy a user-defined compound type in the group igrp to the group ogrp
00322  */
00323 static int
00324 copy_compound_type(int igrp, nc_type itype, int ogrp)
00325 {
00326     int stat = NC_NOERR; 
00327     char name[NC_MAX_NAME];
00328     size_t size;
00329     size_t nfields;
00330     nc_type otype;
00331     int fid;
00332 
00333     NC_CHECK(nc_inq_compound(igrp, itype, name, &size, &nfields));
00334     NC_CHECK(nc_def_compound(ogrp, size, name, &otype));
00335 
00336     for (fid = 0; fid < nfields; fid++) {
00337         char fname[NC_MAX_NAME];
00338         char ftypename[NC_MAX_NAME];
00339         size_t foff;
00340         nc_type iftype, oftype;
00341         int fndims;
00342 
00343         NC_CHECK(nc_inq_compound_field(igrp, itype, fid, fname, &foff, &iftype, &fndims, NULL));
00344         /* type ids in source don't necessarily correspond to same
00345          * typeids in destination, so look up destination typeid by using
00346          * field type name */
00347         NC_CHECK(nc_inq_type(igrp, iftype, ftypename, NULL));
00348         NC_CHECK(nc_inq_typeid(ogrp, ftypename, &oftype));
00349         if(fndims == 0) {
00350             NC_CHECK(nc_insert_compound(ogrp, otype, fname, foff, oftype));
00351         } else {                /* field is array type */
00352             int *fdimsizes;
00353             fdimsizes = (int *) emalloc((fndims + 1) * sizeof(int));
00354             stat = nc_inq_compound_field(igrp, itype, fid, NULL, NULL, NULL, 
00355                                          NULL, fdimsizes);
00356             NC_CHECK(nc_insert_array_compound(ogrp, otype, fname, foff, oftype, fndims, fdimsizes));
00357             free(fdimsizes);
00358         }
00359     }
00360     return stat;
00361 }
00362 
00363 
00364 /* 
00365  * copy a user-defined type in the group igrp to the group ogrp
00366  */
00367 static int
00368 copy_type(int igrp, nc_type typeid, int ogrp)
00369 {
00370     int stat = NC_NOERR; 
00371     nc_type type_class;
00372 
00373     NC_CHECK(nc_inq_user_type(igrp, typeid, NULL, NULL, NULL, NULL, &type_class));
00374 
00375     switch(type_class) {
00376     case NC_VLEN:
00377         NC_CHECK(copy_vlen_type(igrp, typeid, ogrp));
00378         break;
00379     case NC_OPAQUE:
00380         NC_CHECK(copy_opaque_type(igrp, typeid, ogrp));
00381         break;
00382     case NC_ENUM:
00383         NC_CHECK(copy_enum_type(igrp, typeid, ogrp));
00384         break;
00385     case NC_COMPOUND:
00386         NC_CHECK(copy_compound_type(igrp, typeid, ogrp));
00387         break;
00388     default:
00389         NC_CHECK(NC_EBADTYPE);
00390     }
00391     return stat;
00392 }
00393 
00394 /* Copy a group and all its subgroups, recursively, from iroot to
00395  * oroot, the ncids of input file and output file.  This just creates
00396  * all the groups in the destination, but doesn't copy anything that's
00397  * in the groups yet. */
00398 static int
00399 copy_groups(int iroot, int oroot)
00400 {
00401     int stat = NC_NOERR;
00402     int numgrps;
00403     int *grpids;
00404     int i;
00405 
00406     /* get total number of groups and their ids, including all descendants */
00407     NC_CHECK(nc_inq_grps_full(iroot, &numgrps, NULL));
00408     if(numgrps > 1) {           /* there's always 1 root group */
00409         grpids = emalloc(numgrps * sizeof(int));
00410         NC_CHECK(nc_inq_grps_full(iroot, NULL, grpids));
00411         /* create corresponding new groups in ogrp, except for root group */
00412         for(i = 1; i < numgrps; i++) {
00413             char *grpname_full;
00414             char grpname[NC_MAX_NAME];
00415             size_t len_name;
00416             int ogid = 0, oparid = 0, iparid = 0;
00417             /* get full group name of input group */
00418             NC_CHECK(nc_inq_grpname(grpids[i], grpname));
00419             if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) {
00420                 NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, NULL));
00421                 grpname_full = emalloc(len_name + 1);
00422                 NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, grpname_full));
00423                 /* Make sure, the parent group is also wanted (root group is always wanted) */
00424                 NC_CHECK(nc_inq_parid(iroot, grpname_full, &iparid));
00425                 if (!option_grpstruct && !group_wanted(iparid, option_nlgrps, option_grpids) 
00426                     && iparid != iroot) {
00427                     error("ERROR: trying to copy a group but not the parent: %s", grpname_full);
00428                 }
00429                 /* get id of parent group of corresponding group in output.
00430                  * Note that this exists, because nc_inq_groups returned
00431                  * grpids in preorder, so parents are always copied before
00432                  * their subgroups */
00433                 NC_CHECK(nc_inq_parid(oroot, grpname_full, &oparid));
00434                 NC_CHECK(nc_inq_grpname(grpids[i], grpname));
00435                 /* define corresponding group in output */
00436                 NC_CHECK(nc_def_grp(oparid, grpname, &ogid));
00437                 free(grpname_full);
00438             }
00439         }
00440         free(grpids);
00441     }
00442     return stat;    
00443 }
00444 
00445 /* 
00446  * Copy the user-defined types in this group (igrp) and all its
00447  * subgroups, recursively, to corresponding group in output (ogrp)
00448  */
00449 static int
00450 copy_types(int igrp, int ogrp)
00451 {
00452     int stat = NC_NOERR; 
00453     int ntypes;
00454     nc_type *types = NULL;
00455     int numgrps;
00456     int *grpids = NULL;
00457     int i;
00458 
00459     NC_CHECK(nc_inq_typeids(igrp, &ntypes, NULL));
00460 
00461     if(ntypes > 0) {
00462         types = (nc_type *) emalloc(ntypes * sizeof(nc_type));
00463         NC_CHECK(nc_inq_typeids(igrp, &ntypes, types));
00464         for (i = 0; i < ntypes; i++) {
00465             NC_CHECK(copy_type(igrp, types[i], ogrp));
00466         }
00467         free(types);
00468     }
00469 
00470     /* Copy types from subgroups */
00471     NC_CHECK(nc_inq_grps(igrp, &numgrps, NULL));
00472     if(numgrps > 0) {
00473         grpids = (int *)emalloc(sizeof(int) * numgrps);
00474         NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
00475         for(i = 0; i < numgrps; i++) {
00476             if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) {
00477                 int ogid;
00478                 /* get groupid in output corresponding to grpids[i] in
00479                  * input, given parent group (or root group) ogrp in
00480                  * output */
00481                 NC_CHECK(get_grpid(grpids[i], ogrp, &ogid));
00482                 NC_CHECK(copy_types(grpids[i], ogid));
00483             }
00484         }
00485         free(grpids);
00486     }
00487     return stat;
00488 }
00489 
00490 /* Copy all netCDF-4 specific variable properties such as chunking,
00491  * endianness, deflation, checksumming, fill, etc. */
00492 static int
00493 copy_var_specials(int igrp, int varid, int ogrp, int o_varid)
00494 {
00495     int stat = NC_NOERR;
00496     {                           /* handle chunking parameters */
00497         int ndims;
00498         NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
00499         if (ndims > 0) {                /* no chunking for scalar variables */
00500             int contig = 0;
00501             size_t *chunkp = (size_t *) emalloc(ndims * sizeof(size_t));
00502             int *dimids = (int *) emalloc(ndims * sizeof(int));
00503             int idim;
00504              /* size of a chunk: product of dimension chunksizes and size of value */ 
00505             size_t csprod = val_size(ogrp, o_varid);
00506             int is_unlimited = 0;
00507             NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, chunkp));
00508             NC_CHECK(nc_inq_vardimid(igrp, varid, dimids));
00509 
00510             for(idim = 0; idim < ndims; idim++) {
00511                 int idimid = dimids[idim];
00512                 int odimid = dimmap_odimid(idimid);
00513                 size_t chunksize = chunkspec_size(idimid);
00514                 if(chunksize > 0) { /* found in chunkspec */
00515                     chunkp[idim] = chunksize;
00516                 }
00517                 csprod *= chunkp[idim];
00518                 if(dimmap_ounlim(odimid))
00519                     is_unlimited = 1;
00520             }
00521             /* Explicitly set chunking, even if default */
00522             /* If product of chunksizes is too small and no unlimited
00523              * dimensions used, don't chunk.  Also if chunking
00524              * explicitly turned off with chunk spec, don't chunk. */
00525             if ((csprod < option_min_chunk_bytes && !is_unlimited) || contig == 1
00526                 || chunkspec_omit() == true) {
00527                 NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CONTIGUOUS, NULL));
00528             } else {
00529                 NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, chunkp));
00530             }
00531             free(dimids);
00532             free(chunkp);
00533         }
00534     }
00535     { /* handle compression parameters, copying from input, overriding
00536        * with command-line options */
00537         int shuffle_in=0, deflate_in=0, deflate_level_in=0;
00538         int shuffle_out=0, deflate_out=0, deflate_level_out=0;
00539         if(option_deflate_level != 0) {
00540             NC_CHECK(nc_inq_var_deflate(igrp, varid, &shuffle_in, &deflate_in, &deflate_level_in));
00541             if(option_deflate_level == -1) { /* not specified, copy input compression and shuffling */
00542                 shuffle_out = shuffle_in;
00543                 deflate_out = deflate_in;
00544                 deflate_level_out = deflate_level_in;
00545             } else if(option_deflate_level > 0) { /* change to specified compression, shuffling */
00546                 shuffle_out = option_shuffle_vars;
00547                 deflate_out=1;
00548                 deflate_level_out = option_deflate_level;
00549             }
00550             NC_CHECK(nc_def_var_deflate(ogrp, o_varid, shuffle_out, deflate_out, deflate_level_out));
00551         }
00552     }
00553     {                           /* handle checksum parameters */
00554         int fletcher32 = 0;
00555         NC_CHECK(nc_inq_var_fletcher32(igrp, varid, &fletcher32));
00556         if(fletcher32 != 0) {
00557             NC_CHECK(nc_def_var_fletcher32(ogrp, o_varid, fletcher32));
00558         }
00559     }
00560     {                           /* handle endianness */
00561         int endianness = 0;
00562         NC_CHECK(nc_inq_var_endian(igrp, varid, &endianness));
00563         if(endianness != NC_ENDIAN_NATIVE) { /* native is the default */
00564             NC_CHECK(nc_def_var_endian(ogrp, o_varid, endianness));
00565         }
00566     }
00567     return stat;
00568 }
00569 
00570 /* Set output variable o_varid (in group ogrp) to use chunking
00571  * specified on command line, only called for classic format input and
00572  * netCDF-4 format output, so no existing chunk lengths to override. */
00573 static int
00574 set_var_chunked(int ogrp, int o_varid)
00575 {
00576     int stat = NC_NOERR;
00577     int ndims;
00578     int odim;
00579     size_t chunk_threshold = CHUNK_THRESHOLD;
00580 
00581     if(chunkspec_ndims() == 0)  /* no chunking specified on command line */
00582         return stat;
00583     NC_CHECK(nc_inq_varndims(ogrp, o_varid, &ndims));
00584 
00585     if (ndims > 0) {            /* no chunking for scalar variables */
00586         int chunked = 0;
00587         int *dimids = (int *) emalloc(ndims * sizeof(int));
00588         size_t varsize;
00589         nc_type vartype;
00590         size_t value_size;
00591         int is_unlimited = 0;
00592 
00593         NC_CHECK(nc_inq_vardimid (ogrp, o_varid, dimids));
00594         NC_CHECK(nc_inq_vartype(ogrp, o_varid, &vartype));
00595         /* from type, get size in memory needed for each value */
00596         NC_CHECK(nc_inq_type(ogrp, vartype, NULL, &value_size));
00597         varsize = value_size;
00598 
00599         /* Determine if this variable should be chunked.  A variable
00600          * should be chunked if any of its dims are in command-line
00601          * chunk spec. It will also be chunked if any of its
00602          * dims are unlimited. */
00603         for(odim = 0; odim < ndims; odim++) {
00604             int odimid = dimids[odim];
00605             int idimid = dimmap_idimid(odimid); /* corresponding dimid in input file */
00606             if(dimmap_ounlim(odimid))
00607                 is_unlimited = 1;
00608             if(idimid != -1) {
00609                 size_t chunksize = chunkspec_size(idimid); /* from chunkspec */
00610                 size_t dimlen;
00611                 NC_CHECK(nc_inq_dimlen(ogrp, odimid, &dimlen));
00612                 if( (chunksize > 0) || dimmap_ounlim(odimid)) {
00613                     chunked = 1;                    
00614                 }
00615                 varsize *= dimlen;
00616             }
00617         }
00618         /* Don't chunk small variables that don't use an unlimited
00619          * dimension. */
00620         if(varsize < chunk_threshold && !is_unlimited)
00621             chunked = 0;
00622 
00623         if(chunked) {
00624             /* Allocate chunksizes and set defaults to dimsize for any
00625              * dimensions not mentioned in chunkspec. */
00626             size_t *chunkp = (size_t *) emalloc(ndims * sizeof(size_t));
00627             for(odim = 0; odim < ndims; odim++) {
00628                 int odimid = dimids[odim];
00629                 int idimid = dimmap_idimid(odimid);
00630                 size_t chunksize = chunkspec_size(idimid);
00631                 if(chunksize > 0) {
00632                     chunkp[odim] = chunksize;
00633                 } else {
00634                     NC_CHECK(nc_inq_dimlen(ogrp, odimid, &chunkp[odim]));
00635                 }
00636             }
00637             NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, chunkp));
00638             free(chunkp);
00639         }
00640         free(dimids);
00641     }
00642     return stat;
00643 }
00644 
00645 /* Set variable to compression specified on command line */
00646 static int
00647 set_var_compressed(int ogrp, int o_varid)
00648 {
00649     int stat = NC_NOERR;
00650     if (option_deflate_level > 0) {
00651         int deflate = 1;
00652         NC_CHECK(nc_def_var_deflate(ogrp, o_varid, option_shuffle_vars, deflate, option_deflate_level));
00653     }
00654     return stat;
00655 }
00656 
00657 /* Release the variable chunk cache allocated for variable varid in
00658  * group grp.  This is not necessary, but will save some memory when
00659  * processing one variable at a time.  */
00660 #ifdef UNUSED
00661 static int
00662 free_var_chunk_cache(int grp, int varid)
00663 {
00664     int stat = NC_NOERR;
00665     size_t chunk_cache_size = 1;
00666     size_t cache_nelems = 1;
00667     float cache_preemp = 0;
00668     int kind;
00669     NC_CHECK(nc_inq_format(grp, &kind));
00670     if(kind == NC_FORMAT_NETCDF4 || kind == NC_FORMAT_NETCDF4_CLASSIC) {
00671         int contig = 1;
00672         NC_CHECK(nc_inq_var_chunking(grp, varid, &contig, NULL));
00673         if(contig == 0) {       /* chunked */
00674             NC_CHECK(nc_set_var_chunk_cache(grp, varid, chunk_cache_size, cache_nelems, cache_preemp));
00675         }
00676     }
00677     return stat;
00678 }
00679 #endif
00680 
00681 #endif /* USE_NETCDF4 */
00682 
00683 /* Copy dimensions from group igrp to group ogrp, also associate input
00684  * dimids with output dimids (they need not match, because the input
00685  * dimensions may have been defined in a different order than we define
00686  * the output dimensions here. */
00687 static int
00688 copy_dims(int igrp, int ogrp)
00689 {
00690     int stat = NC_NOERR;
00691     int ndims;
00692     int dgrp;
00693 #ifdef USE_NETCDF4
00694     int nunlims;
00695     int *dimids;
00696     int *unlimids;
00697 #else
00698     int unlimid;
00699 #endif /* USE_NETCDF4 */    
00700 
00701     NC_CHECK(nc_inq_ndims(igrp, &ndims));
00702 
00703 #ifdef USE_NETCDF4
00704    /* In netCDF-4 files, dimids may not be sequential because they
00705     * may be defined in various groups, and we are only looking at one
00706     * group at a time. */
00707     /* Find the dimension ids in this group, don't include parents. */
00708     dimids = (int *) emalloc((ndims + 1) * sizeof(int));
00709     NC_CHECK(nc_inq_dimids(igrp, NULL, dimids, 0));
00710     /* Find the number of unlimited dimensions and get their IDs */
00711     NC_CHECK(nc_inq_unlimdims(igrp, &nunlims, NULL));
00712     unlimids = (int *) emalloc((nunlims + 1) * sizeof(int));
00713     NC_CHECK(nc_inq_unlimdims(igrp, NULL, unlimids));
00714 #else
00715     NC_CHECK(nc_inq_unlimdim(igrp, &unlimid));
00716 #endif /* USE_NETCDF4 */
00717 
00718     /* Copy each dimension to output, including unlimited dimension(s) */
00719     for (dgrp = 0; dgrp < ndims; dgrp++) {
00720         char name[NC_MAX_NAME];
00721         size_t length;
00722         int i_is_unlim;
00723         int o_is_unlim;
00724         int idimid, odimid;
00725 #ifdef USE_NETCDF4
00726         int uld;
00727 #endif
00728 
00729         i_is_unlim = 0;
00730 #ifdef USE_NETCDF4
00731         idimid = dimids[dgrp];
00732         for (uld = 0; uld < nunlims; uld++) {
00733             if(idimid == unlimids[uld]) {
00734                 i_is_unlim = 1;
00735                 break;
00736             }     
00737         }
00738 #else
00739         idimid = dgrp;
00740         if(unlimid != -1 && (idimid == unlimid)) {
00741             i_is_unlim = 1;
00742         }
00743 #endif /* USE_NETCDF4 */
00744 
00745         stat = nc_inq_dim(igrp, idimid, name, &length);
00746         if (stat == NC_EDIMSIZE && sizeof(size_t) < 8) {
00747             error("dimension \"%s\" requires 64-bit platform", name);
00748         }       
00749         NC_CHECK(stat);
00750         o_is_unlim = i_is_unlim;
00751         if(i_is_unlim && !option_fix_unlimdims) {
00752             NC_CHECK(nc_def_dim(ogrp, name, NC_UNLIMITED, &odimid));
00753         } else {
00754             NC_CHECK(nc_def_dim(ogrp, name, length, &odimid));
00755             o_is_unlim = 0;
00756         }
00757         /* Store (idimid, odimid) mapping for later use, also whether unlimited */
00758         dimmap_store(idimid, odimid, i_is_unlim, o_is_unlim);
00759     }
00760 #ifdef USE_NETCDF4
00761     free(dimids);
00762     free(unlimids);
00763 #endif /* USE_NETCDF4 */    
00764     return stat;
00765 }
00766 
00767 /* Copy the attributes for variable ivar in group igrp to variable
00768  * ovar in group ogrp.  Global (group) attributes are specified by
00769  * using the varid NC_GLOBAL */
00770 static int
00771 copy_atts(int igrp, int ivar, int ogrp, int ovar)
00772 {
00773     int natts;
00774     int iatt;
00775     int stat = NC_NOERR;
00776 
00777     NC_CHECK(nc_inq_varnatts(igrp, ivar, &natts));
00778     
00779     for(iatt = 0; iatt < natts; iatt++) {
00780         char name[NC_MAX_NAME];
00781         NC_CHECK(nc_inq_attname(igrp, ivar, iatt, name));
00782         NC_CHECK(nc_copy_att(igrp, ivar, name, ogrp, ovar));
00783     }
00784     return stat;
00785 }
00786 
00787 /* copy the schema for a single variable in group igrp to group ogrp */
00788 static int
00789 copy_var(int igrp, int varid, int ogrp)
00790 {
00791     int stat = NC_NOERR;
00792     int ndims;
00793     int *idimids;               /* ids of dims for input variable */
00794     int *odimids;               /* ids of dims for output variable */
00795     char name[NC_MAX_NAME];
00796     nc_type typeid, o_typeid;
00797     int natts;
00798     int i;
00799     int o_varid;
00800 
00801     NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
00802     idimids = (int *) emalloc((ndims + 1) * sizeof(int));
00803     NC_CHECK(nc_inq_var(igrp, varid, name, &typeid, NULL, idimids, &natts));
00804     o_typeid = typeid;
00805 #ifdef USE_NETCDF4
00806     if (typeid > NC_STRING) {   /* user-defined type */
00807         /* type ids in source don't necessarily correspond to same
00808          * typeids in destination, so look up destination typeid by
00809          * using type name */
00810         char type_name[NC_MAX_NAME];
00811         NC_CHECK(nc_inq_type(igrp, typeid, type_name, NULL));
00812         NC_CHECK(nc_inq_typeid(ogrp, type_name, &o_typeid));
00813     }
00814 #endif  /* USE_NETCDF4 */
00815 
00816     /* get the corresponding dimids in the output file */
00817     odimids = (int *) emalloc((ndims + 1) * sizeof(int));
00818     for(i = 0; i < ndims; i++) {
00819         odimids[i] = dimmap_odimid(idimids[i]);
00820         if(odimids[i] == -1) {
00821             error("Oops, no dimension in output associated with input dimid %d", idimids[i]);
00822         }
00823     }
00824 
00825     /* define the output variable */
00826     NC_CHECK(nc_def_var(ogrp, name, o_typeid, ndims, odimids, &o_varid));
00827     /* attach the variable attributes to the output variable */
00828     NC_CHECK(copy_atts(igrp, varid, ogrp, o_varid));
00829 #ifdef USE_NETCDF4    
00830     {
00831         int inkind;
00832         int outkind;
00833         NC_CHECK(nc_inq_format(igrp, &inkind));
00834         NC_CHECK(nc_inq_format(ogrp, &outkind));
00835         if(outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC) {
00836             if((inkind == NC_FORMAT_NETCDF4 || inkind == NC_FORMAT_NETCDF4_CLASSIC)) {
00837                 /* Copy all netCDF-4 specific variable properties such as
00838                  * chunking, endianness, deflation, checksumming, fill, etc. */
00839                 NC_CHECK(copy_var_specials(igrp, varid, ogrp, o_varid));
00840             } else {
00841                 /* Set chunking if specified in command line option */
00842                 NC_CHECK(set_var_chunked(ogrp, o_varid));
00843                 /* Set compression if specified in command line option */
00844                 NC_CHECK(set_var_compressed(ogrp, o_varid));
00845             }
00846         }
00847     }
00848 #endif  /* USE_NETCDF4 */
00849     free(idimids);
00850     free(odimids);
00851     return stat;
00852 }
00853 
00854 /* copy the schema for all the variables in group igrp to group ogrp */
00855 static int
00856 copy_vars(int igrp, int ogrp)
00857 {
00858     int stat = NC_NOERR;
00859     int nvars;
00860     int varid;
00861 
00862     int iv;                     /* variable number */
00863     idnode_t* vlist = 0;                /* list for vars specified with -v option */
00864 
00865     /*
00866      * If any vars were specified with -v option, get list of
00867      * associated variable ids relative to this group.  Assume vars
00868      * specified with syntax like "grp1/grp2/varname" or
00869      * "/grp1/grp2/varname" if they are in groups.
00870      */
00871     vlist = newidlist();        /* list for vars specified with -v option */
00872     for (iv=0; iv < option_nlvars; iv++) {
00873         if(nc_inq_gvarid(igrp, option_lvars[iv], &varid) == NC_NOERR)
00874             idadd(vlist, varid);
00875     }
00876     
00877     NC_CHECK(nc_inq_nvars(igrp, &nvars));
00878     for (varid = 0; varid < nvars; varid++) {
00879         if (!option_varstruct && option_nlvars > 0 && ! idmember(vlist, varid))
00880             continue;
00881         NC_CHECK(copy_var(igrp, varid, ogrp));
00882     }
00883     freeidlist(vlist);
00884     return stat;
00885 }
00886 
00887 /* Copy the schema in a group and all its subgroups, recursively, from
00888  * group igrp in input to parent group ogrp in destination.  Use
00889  * dimmap array to map input dimids to output dimids. */
00890 static int
00891 copy_schema(int igrp, int ogrp) 
00892 {
00893     int stat = NC_NOERR;
00894     int ogid;                   /* like igrp but in output file */
00895 
00896     /* get groupid in output corresponding to group igrp in input,
00897      * given parent group (or root group) ogrp in output */
00898     NC_CHECK(get_grpid(igrp, ogrp, &ogid));
00899 
00900     NC_CHECK(copy_dims(igrp, ogid));
00901     NC_CHECK(copy_atts(igrp, NC_GLOBAL, ogid, NC_GLOBAL));
00902     NC_CHECK(copy_vars(igrp, ogid));
00903 #ifdef USE_NETCDF4    
00904     {
00905         int numgrps;
00906         int *grpids;
00907         int i;
00908         /* Copy schema from subgroups */
00909         stat = nc_inq_grps(igrp, &numgrps, NULL);
00910         grpids = (int *)emalloc((numgrps + 1) * sizeof(int));
00911         NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
00912         
00913         for(i = 0; i < numgrps; i++) {
00914             if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) {
00915                 NC_CHECK(copy_schema(grpids[i], ogid));
00916             }
00917         }
00918         free(grpids);
00919     }
00920 #endif  /* USE_NETCDF4 */
00921     return stat;    
00922 }
00923 
00924 /* Return number of values for a variable varid in a group igrp */
00925 static int
00926 inq_nvals(int igrp, int varid, long long *nvalsp) {
00927     int stat = NC_NOERR;
00928     int ndims;
00929     int *dimids;
00930     int dim;
00931     long long nvals = 1;
00932 
00933     NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
00934     dimids = (int *) emalloc((ndims + 1) * sizeof(int));
00935     NC_CHECK(nc_inq_vardimid (igrp, varid, dimids));
00936     for(dim = 0; dim < ndims; dim++) {
00937         size_t len;
00938         NC_CHECK(nc_inq_dimlen(igrp, dimids[dim], &len));
00939         nvals *= len;
00940     }
00941     if(nvalsp)
00942         *nvalsp = nvals;
00943     free(dimids);
00944     return stat;
00945 }
00946 
00947 /* Copy data from variable varid in group igrp to corresponding group
00948  * ogrp. */
00949 static int
00950 copy_var_data(int igrp, int varid, int ogrp) {
00951     int stat = NC_NOERR;
00952     nc_type vartype;
00953     long long nvalues;          /* number of values for this variable */
00954     size_t ntoget;              /* number of values to access this iteration */
00955     size_t value_size;          /* size of a single value of this variable */
00956     static void *buf = 0;       /* buffer for the variable values */
00957     char varname[NC_MAX_NAME];
00958     int ovarid;
00959     size_t *start;
00960     size_t *count;
00961     nciter_t *iterp;            /* opaque structure for iteration status */
00962     int do_realloc = 0;
00963 #ifdef USE_NETCDF4    
00964     int okind;
00965     size_t chunksize;
00966 #endif
00967 
00968     NC_CHECK(inq_nvals(igrp, varid, &nvalues));
00969     if(nvalues == 0)
00970         return stat;
00971     /* get corresponding output variable */
00972     NC_CHECK(nc_inq_varname(igrp, varid, varname));
00973     NC_CHECK(nc_inq_varid(ogrp, varname, &ovarid));
00974     NC_CHECK(nc_inq_vartype(igrp, varid, &vartype));
00975     value_size = val_size(igrp, varid);
00976     if(value_size > option_copy_buffer_size) {
00977         option_copy_buffer_size = value_size;
00978         do_realloc = 1;
00979     }
00980 #ifdef USE_NETCDF4    
00981     NC_CHECK(nc_inq_format(ogrp, &okind));
00982     if(okind == NC_FORMAT_NETCDF4 || okind == NC_FORMAT_NETCDF4_CLASSIC) {
00983         /* if this variable chunked, set variable chunk cache size */ 
00984         int contig = 1;
00985         NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &contig, NULL));
00986         if(contig == 0) {       /* chunked */
00987             if(option_compute_chunkcaches) {
00988                 /* Try to estimate variable-specific chunk cache,
00989                  * depending on specific size and shape of this
00990                  * variable's chunks.  This doesn't work yet. */
00991                 size_t chunkcache_size, chunkcache_nelems;
00992                 float chunkcache_preemption;
00993                 NC_CHECK(inq_var_chunking_params(igrp, varid, ogrp, ovarid,
00994                                                  &chunkcache_size, 
00995                                                  &chunkcache_nelems, 
00996                                                  &chunkcache_preemption));
00997                 NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, 
00998                                                 chunkcache_size, 
00999                                                 chunkcache_nelems, 
01000                                                 chunkcache_preemption)); 
01001             } else {            
01002                 /* by default, use same chunk cache for all chunked variables */
01003                 NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, 
01004                                                 option_chunk_cache_size,
01005                                                 option_chunk_cache_nelems,
01006                                                 COPY_CHUNKCACHE_PREEMPTION));
01007             }
01008         }
01009     }
01010     /* For chunked variables, option_copy_buffer_size must also be at least as large as
01011      * size of a chunk in input, otherwise resize it. */
01012     {
01013         NC_CHECK(inq_var_chunksize(igrp, varid, &chunksize));
01014         if(chunksize > option_copy_buffer_size) {
01015             option_copy_buffer_size = chunksize;
01016             do_realloc = 1;
01017         }
01018     }
01019 #endif  /* USE_NETCDF4 */
01020     if(buf && do_realloc) {
01021         free(buf);
01022         buf = 0;
01023     }
01024     if(buf == 0) {              /* first time or needs to grow */
01025         buf = emalloc(option_copy_buffer_size);
01026         memset((void*)buf,0,option_copy_buffer_size);
01027     }
01028 
01029     /* initialize variable iteration */
01030     NC_CHECK(nc_get_iter(igrp, varid, option_copy_buffer_size, &iterp));
01031 
01032     start = (size_t *) emalloc((iterp->rank + 1) * sizeof(size_t));
01033     count = (size_t *) emalloc((iterp->rank + 1) * sizeof(size_t));
01034     /* nc_next_iter() initializes start and count on first call,
01035      * changes start and count to iterate through whole variable on
01036      * subsequent calls. */
01037     while((ntoget = nc_next_iter(iterp, start, count)) > 0) {
01038         NC_CHECK(nc_get_vara(igrp, varid, start, count, buf));
01039         NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf));
01040 #ifdef USE_NETCDF4
01041         /* we have to explicitly free values for strings and vlens */
01042         if(vartype == NC_STRING) {
01043             NC_CHECK(nc_free_string(ntoget, (char **)buf));
01044         } else if(vartype > NC_STRING) { /* user-defined type */
01045             nc_type vclass;
01046             NC_CHECK(nc_inq_user_type(igrp, vartype, NULL, NULL, NULL, NULL, &vclass));
01047             if(vclass == NC_VLEN) {
01048                 NC_CHECK(nc_free_vlens(ntoget, (nc_vlen_t *)buf));
01049             }
01050         }
01051 #endif  /* USE_NETCDF4 */
01052     } /* end main iteration loop */
01053 #ifdef USE_NETCDF4
01054     /* We're all done with this input and output variable, so if
01055      * either variable is chunked, free up its variable chunk cache */
01056     /* NC_CHECK(free_var_chunk_cache(igrp, varid)); */
01057     /* NC_CHECK(free_var_chunk_cache(ogrp, ovarid)); */
01058 #endif  /* USE_NETCDF4 */
01059     free(start);
01060     free(count);
01061     NC_CHECK(nc_free_iter(iterp));
01062     return stat;
01063 }
01064 
01065 /* Copy data from variables in group igrp to variables in
01066  * corresponding group with parent ogrp, and all subgroups
01067  * recursively  */
01068 static int
01069 copy_data(int igrp, int ogrp)
01070 {
01071     int stat = NC_NOERR;
01072     int ogid;
01073     int nvars;
01074     int varid;
01075 #ifdef USE_NETCDF4
01076     int numgrps;
01077     int *grpids;
01078     int i;
01079 #endif
01080 
01081     int iv;                     /* variable number */
01082     idnode_t* vlist = NULL;     /* list for vars specified with -v option */
01083 
01084     /*
01085      * If any vars were specified with -v option, get list of
01086      * associated variable ids relative to this group.  Assume vars
01087      * specified with syntax like "grp1/grp2/varname" or
01088      * "/grp1/grp2/varname" if they are in groups.
01089      */
01090     vlist = newidlist();        /* list for vars specified with -v option */
01091     for (iv=0; iv < option_nlvars; iv++) {
01092         if(nc_inq_gvarid(igrp, option_lvars[iv], &varid) == NC_NOERR)
01093             idadd(vlist, varid);
01094     }
01095     
01096     /* get groupid in output corresponding to group igrp in input,
01097      * given parent group (or root group) ogrp in output */
01098     NC_CHECK(get_grpid(igrp, ogrp, &ogid));
01099     
01100     /* Copy data from this group */
01101     NC_CHECK(nc_inq_nvars(igrp, &nvars));
01102 
01103     for (varid = 0; varid < nvars; varid++) {
01104         if (option_nlvars > 0 && ! idmember(vlist, varid))
01105             continue;
01106         if (!group_wanted(igrp, option_nlgrps, option_grpids))
01107             continue;
01108         NC_CHECK(copy_var_data(igrp, varid, ogid));
01109     }
01110 #ifdef USE_NETCDF4
01111     /* Copy data from subgroups */
01112     stat = nc_inq_grps(igrp, &numgrps, NULL);
01113     grpids = (int *)emalloc((numgrps + 1) * sizeof(int));
01114     NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
01115 
01116     for(i = 0; i < numgrps; i++) {
01117         if (!option_grpstruct && !group_wanted(grpids[i], option_nlgrps, option_grpids))
01118             continue;
01119         NC_CHECK(copy_data(grpids[i], ogid));
01120     }
01121     free(grpids);
01122 #endif  /* USE_NETCDF4 */
01123     freeidlist(vlist);
01124     return stat;
01125 }
01126 
01127 /* Count total number of dimensions in ncid and all its descendant subgroups */
01128 int
01129 count_dims(ncid) {
01130     int numgrps;
01131     int ndims;
01132     NC_CHECK(nc_inq_ndims(ncid, &ndims));
01133 #ifdef USE_NETCDF4
01134     NC_CHECK(nc_inq_grps(ncid, &numgrps, NULL));
01135     if(numgrps > 0) {
01136         int igrp;
01137         int *grpids = emalloc(numgrps * sizeof(int));
01138         NC_CHECK(nc_inq_grps(ncid, &numgrps, grpids));
01139         for(igrp = 0; igrp < numgrps; igrp++) {
01140             ndims += count_dims(grpids[igrp]);
01141         }
01142         free(grpids); 
01143     }
01144 #endif  /* USE_NETCDF4 */
01145     return ndims;
01146 }
01147 
01148 /* Test if special case: netCDF-3 file with more than one record
01149  * variable.  Performance can be very slow for this case when the disk
01150  * block size is large, there are many record variables, and a
01151  * record's worth of data for some variables is smaller than the disk
01152  * block size.  In this case, copying the record variables a variable
01153  * at a time causes much rereading of record data, so instead we want
01154  * to copy data a record at a time. */
01155 static int
01156 nc3_special_case(int ncid, int kind) {
01157     if (kind == NC_FORMAT_CLASSIC ||  kind == NC_FORMAT_64BIT) {
01158         int recdimid = 0;
01159         NC_CHECK(nc_inq_unlimdim(ncid, &recdimid));
01160         if (recdimid != -1) {   /* we have a record dimension */
01161             int nvars;
01162             int varid;
01163             NC_CHECK(nc_inq_nvars(ncid, &nvars));
01164             for (varid = 0; varid < nvars; varid++) {
01165                 int *dimids = 0;
01166                 int ndims;
01167                 NC_CHECK( nc_inq_varndims(ncid, varid, &ndims) );
01168                 if (ndims > 0) {
01169                     int dimids0;
01170                     dimids = (int *) emalloc((ndims + 1) * sizeof(int));
01171                     NC_CHECK( nc_inq_vardimid(ncid, varid, dimids) );
01172                     dimids0 = dimids[0];
01173                     free(dimids);
01174                     if(dimids0 == recdimid) {
01175                         return 1; /* found a record variable */
01176                     }
01177                 }
01178             }
01179         }
01180     }
01181     return 0;
01182 }
01183 
01184 /* Classify variables in ncid as either fixed-size variables (with no
01185  * unlimited dimension) or as record variables (with an unlimited
01186  * dimension) */
01187 static int
01188 classify_vars(
01189     int ncid,   /* netCDF ID */
01190     size_t *nf, /* for returning number of fixed-size variables */
01191     int **fvars,        /* the array of fixed_size variable IDS, caller should free */
01192     size_t *nr, /* for returning number of record variables */
01193     int **rvars)        /* the array of record variable IDs, caller should free */
01194 {
01195     int varid;
01196     int nvars;
01197     NC_CHECK(nc_inq_nvars(ncid, &nvars));
01198     *nf = 0;
01199     *fvars = (int *) emalloc(nvars * sizeof(int));
01200     *nr = 0;
01201     *rvars = (int *) emalloc(nvars * sizeof(int));
01202     for (varid = 0; varid < nvars; varid++) {
01203         if (isrecvar(ncid, varid)) {
01204             (*rvars)[*nr] = varid;
01205             (*nr)++;
01206         } else {
01207             (*fvars)[*nf] = varid;
01208             (*nf)++;
01209         }
01210     }
01211     return NC_NOERR;
01212 }
01213 
01214 /* Only called for classic format or 64-bit offset format files, to speed up special case */
01215 static int
01216 copy_fixed_size_data(int igrp, int ogrp, size_t nfixed_vars, int *fixed_varids) {
01217     size_t ivar;
01218     /* for each fixed-size variable, copy data */
01219     for (ivar = 0; ivar < nfixed_vars; ivar++) {
01220         int varid = fixed_varids[ivar];
01221         NC_CHECK(copy_var_data(igrp, varid, ogrp));
01222     }
01223     if (fixed_varids)
01224         free(fixed_varids);
01225     return NC_NOERR;
01226 }
01227 
01228 /* copy a record's worth of data for a variable from input to output */
01229 static int
01230 copy_rec_var_data(int ncid,     /* input */
01231                   int ogrp,     /* output */
01232                   int irec,     /* record number */
01233                   int varid,    /* input variable id */
01234                   int ovarid,   /* output variable id */
01235                   size_t *start,   /* start indices for record data */
01236                   size_t *count,   /* edge lengths for record data */
01237                   void *buf        /* buffer large enough to hold data */
01238     ) 
01239 {
01240     NC_CHECK(nc_get_vara(ncid, varid, start, count, buf));
01241     NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf));
01242     return NC_NOERR;
01243 }
01244 
01245 /* Only called for classic format or 64-bit offset format files, to speed up special case */
01246 static int
01247 copy_record_data(int ncid, int ogrp, size_t nrec_vars, int *rec_varids) {
01248     int unlimid;
01249     size_t nrecs = 0;           /* how many records? */
01250     size_t irec;
01251     size_t ivar;
01252     void **buf;                 /* space for reading in data for each variable */
01253     int *rec_ovarids;           /* corresponding varids in output */
01254     size_t **start;
01255     size_t **count;
01256     NC_CHECK(nc_inq_unlimdim(ncid, &unlimid));
01257     NC_CHECK(nc_inq_dimlen(ncid, unlimid, &nrecs));
01258     buf = (void **) emalloc(nrec_vars * sizeof(void *));
01259     rec_ovarids = (int *) emalloc(nrec_vars * sizeof(int));
01260     start = (size_t **) emalloc(nrec_vars * sizeof(size_t*));
01261     count = (size_t **) emalloc(nrec_vars * sizeof(size_t*));
01262     /* get space to hold one record's worth of data for each record variable */
01263     for (ivar = 0; ivar < nrec_vars; ivar++) {
01264         int varid;
01265         int ndims;
01266         int *dimids;
01267         size_t value_size;
01268         int dimid;
01269         int ii;
01270         size_t nvals;
01271         char varname[NC_MAX_NAME];
01272         varid = rec_varids[ivar];
01273         NC_CHECK(nc_inq_varndims(ncid, varid, &ndims));
01274         dimids = (int *) emalloc((1 + ndims) * sizeof(int));
01275         start[ivar] = (size_t *) emalloc(ndims * sizeof(size_t));
01276         count[ivar] = (size_t *) emalloc(ndims * sizeof(size_t));
01277         NC_CHECK(nc_inq_vardimid (ncid, varid, dimids));
01278         value_size = val_size(ncid, varid);
01279         nvals = 1;
01280         for(ii = 1; ii < ndims; ii++) { /* for rec size, don't include first record dimension */
01281             size_t dimlen;
01282             dimid = dimids[ii];
01283             NC_CHECK(nc_inq_dimlen(ncid, dimid, &dimlen));
01284             nvals *= dimlen;
01285             start[ivar][ii] = 0;
01286             count[ivar][ii] = dimlen;
01287         }
01288         start[ivar][0] = 0;     
01289         count[ivar][0] = 1;     /* 1 record */
01290         buf[ivar] = (void *) emalloc(nvals * value_size);
01291         NC_CHECK(nc_inq_varname(ncid, varid, varname));
01292         NC_CHECK(nc_inq_varid(ogrp, varname, &rec_ovarids[ivar]));
01293         if(dimids)
01294             free(dimids);
01295     }
01296 
01297     /* for each record, copy all variable data */
01298     for(irec = 0; irec < nrecs; irec++) {
01299         for (ivar = 0; ivar < nrec_vars; ivar++) {
01300             int varid, ovarid;
01301             varid = rec_varids[ivar];
01302             ovarid = rec_ovarids[ivar];
01303             start[ivar][0] = irec;
01304             NC_CHECK(copy_rec_var_data(ncid, ogrp, irec, varid, ovarid, 
01305                                        start[ivar], count[ivar], buf[ivar]));
01306         }
01307     }
01308     for (ivar = 0; ivar < nrec_vars; ivar++) {
01309         if(start[ivar])
01310             free(start[ivar]);
01311         if(count[ivar])
01312             free(count[ivar]);
01313     }
01314     if(start)
01315         free(start);
01316     if(count)
01317         free(count);
01318     for (ivar = 0; ivar < nrec_vars; ivar++) {
01319         if(buf[ivar]) {
01320             free(buf[ivar]);
01321         }
01322     }
01323     if (rec_varids)
01324         free(rec_varids);
01325     if(buf)
01326         free(buf);
01327     if(rec_ovarids)
01328         free(rec_ovarids);
01329     return NC_NOERR;
01330 }
01331 
01332 /* copy infile to outfile using netCDF API
01333  */
01334 static int
01335 copy(char* infile, char* outfile)
01336 {
01337     int stat = NC_NOERR;
01338     int igrp, ogrp;
01339     int inkind, outkind;
01340     int open_mode = NC_NOWRITE;
01341     int create_mode = NC_CLOBBER;
01342     size_t ndims;
01343 
01344     if(option_read_diskless) {
01345         open_mode |= NC_DISKLESS;
01346     }
01347 
01348     NC_CHECK(nc_open(infile, open_mode, &igrp));
01349 
01350     NC_CHECK(nc_inq_format(igrp, &inkind));
01351 
01352 /* option_kind specifies which netCDF format for output: 
01353  *   -1 -> same as input, 
01354  *    1 -> classic
01355  *    2 -> 64-bit offset
01356  *    3 -> netCDF-4, 
01357  *    4 -> netCDF-4 classic model
01358  *
01359  * However, if compression or shuffling was specified and kind was -1,
01360  * kind is changed to format 4 that supports compression for input of
01361  * type 1 or 2.  
01362  */
01363     outkind = option_kind;
01364     if (option_kind == SAME_AS_INPUT) { /* default, kind not specified */
01365         outkind = inkind;
01366         /* Deduce output kind if netCDF-4 features requested */
01367         if (inkind == NC_FORMAT_CLASSIC || inkind == NC_FORMAT_64BIT) { 
01368             if (option_deflate_level > 0 || 
01369                 option_shuffle_vars == NC_SHUFFLE || 
01370                 option_chunkspec) 
01371             { 
01372                 outkind = NC_FORMAT_NETCDF4_CLASSIC;
01373             }
01374         }
01375     }
01376 
01377 #ifdef USE_NETCDF4
01378     if(option_chunkspec) {
01379         /* Now that input is open, can parse option_chunkspec into binary
01380          * structure. */
01381         NC_CHECK(chunkspec_parse(igrp, option_chunkspec));
01382     }
01383 #endif  /* USE_NETCDF4 */
01384 
01385         /* Check if any vars in -v don't exist */
01386     if(missing_vars(igrp, option_nlvars, option_lvars))
01387         exit(EXIT_FAILURE);
01388 
01389     if(option_nlgrps > 0) {
01390         if(inkind != NC_FORMAT_NETCDF4) {
01391             error("Group list (-g ...) only permitted for netCDF-4 file");
01392             exit(EXIT_FAILURE);
01393         }
01394         /* Check if any grps in -g don't exist */
01395         if(grp_matches(igrp, option_nlgrps, option_lgrps, option_grpids) == 0)
01396             exit(EXIT_FAILURE);
01397     }
01398 
01399     if(option_write_diskless)
01400         create_mode |= NC_WRITE | NC_DISKLESS; /* NC_WRITE persists diskless file on close */
01401     switch(outkind) {
01402     case NC_FORMAT_CLASSIC:
01403         /* nothing to do */
01404         break;
01405     case NC_FORMAT_64BIT:
01406         create_mode |= NC_64BIT_OFFSET;
01407         break;
01408 #ifdef USE_NETCDF4
01409     case NC_FORMAT_NETCDF4:
01410         create_mode |= NC_NETCDF4;
01411         break;
01412     case NC_FORMAT_NETCDF4_CLASSIC:
01413         create_mode |= NC_NETCDF4 | NC_CLASSIC_MODEL;
01414         break;
01415 #else
01416     case NC_FORMAT_NETCDF4:
01417     case NC_FORMAT_NETCDF4_CLASSIC:
01418         error("nccopy built with --disable-netcdf4, can't create netCDF-4 files");
01419         break;
01420 #endif  /* USE_NETCDF4 */
01421     default:
01422         error("bad value (%d) for -k option\n", option_kind);
01423         break;
01424     }
01425     NC_CHECK(nc_create(outfile, create_mode, &ogrp));
01426     NC_CHECK(nc_set_fill(ogrp, NC_NOFILL, NULL));
01427 
01428 #ifdef USE_NETCDF4
01429     /* Because types in one group may depend on types in a different
01430      * group, need to create all groups before defining types */
01431     if(inkind == NC_FORMAT_NETCDF4) {
01432         NC_CHECK(copy_groups(igrp, ogrp));
01433         NC_CHECK(copy_types(igrp, ogrp));
01434     }
01435 #endif  /* USE_NETCDF4 */
01436 
01437     ndims = count_dims(igrp);
01438     NC_CHECK(dimmap_init(ndims));
01439     NC_CHECK(copy_schema(igrp, ogrp));
01440     NC_CHECK(nc_enddef(ogrp));
01441 
01442     /* For performance, special case netCDF-3 input or output file with record
01443      * variables, to copy a record-at-a-time instead of a
01444      * variable-at-a-time. */
01445     /* TODO: check that these special cases work with -v option */
01446     if(nc3_special_case(igrp, inkind)) {
01447         size_t nfixed_vars, nrec_vars;
01448         int *fixed_varids;
01449         int *rec_varids;
01450         NC_CHECK(classify_vars(igrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids));
01451         NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids));
01452         NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids));
01453     } else if (nc3_special_case(ogrp, outkind)) {
01454         size_t nfixed_vars, nrec_vars;
01455         int *fixed_varids;
01456         int *rec_varids;
01457         /* classifies output vars, but returns input varids */
01458         NC_CHECK(classify_vars(ogrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids));
01459         NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids));
01460         NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids));
01461     } else {        
01462         NC_CHECK(copy_data(igrp, ogrp)); /* recursive, to handle nested groups */
01463     }
01464 
01465     NC_CHECK(nc_close(igrp));
01466     NC_CHECK(nc_close(ogrp));
01467     return stat;
01468 }
01469 
01470 /* 
01471  * For non-negative numeric string with multiplier suffix K, M, G, T,
01472  * or P (or lower-case equivalent), return corresponding value
01473  * incorporating multiplier 1000, 1000000, 1.0d9, ... 1.0d15, or -1.0
01474  * for error.
01475  */
01476 static double
01477 double_with_suffix(char *str) {
01478     double dval;
01479     char *suffix = 0;
01480     errno = 0;
01481     dval = strtod(str, &suffix);
01482     if(dval < 0 || errno != 0)
01483         return -1.0;
01484     if(*suffix) {
01485         switch (*suffix) {
01486         case 'k': case 'K':
01487             dval *= 1000;
01488             break;
01489         case 'm': case 'M':
01490             dval *= 1000000;
01491             break;
01492         case 'g': case 'G':
01493             dval *= 1000000000;
01494             break;
01495         case 't': case 'T':
01496             dval *= 1.0e12;
01497             break;
01498         case 'p': case 'P':
01499             dval *= 1.0e15;
01500             break;
01501         default:
01502             dval = -1.0;        /* error, suffix multiplier must be K, M, G, or T */
01503         }               
01504     }
01505     return dval;
01506 }
01507 
01508 static void
01509 usage(void)
01510 {
01511 #define USAGE   "\
01512   [-k n]    specify kind of netCDF format for output file, default same as input\n\
01513             1 classic, 2 64-bit offset, 3 netCDF-4, 4 netCDF-4 classic model\n\
01514   [-d n]    set deflation compression level, default same as input (0=none 9=max)\n\
01515   [-s]      add shuffle option to deflation compression\n\
01516   [-c chunkspec] specify chunking for dimensions, e.g. \"dim1/N1,dim2/N2,...\"\n\
01517   [-u]      convert unlimited dimensions to fixed-size dimensions in output copy\n\
01518   [-w]      write whole output file from diskless netCDF on close\n\
01519   [-v var1,...] include data for only listed variables, but definitions for all variables\n\
01520   [-V var1,...] include definitions and data for only listed variables\n\
01521   [-g grp1,...] include data for only variables in listed groups, but all definitions\n\
01522   [-G grp1,...] include definitions and data only for variables in listed groups\n\
01523   [-m n]    set size in bytes of copy buffer, default is 5000000 bytes\n\
01524   [-h n]    set size in bytes of chunk_cache for chunked variables\n\
01525   [-e n]    set number of elements that chunk_cache can hold\n\
01526   [-r]      read whole input file into diskless file on open (classic or 64-bit offset format only)\n\
01527   infile    name of netCDF input file\n\
01528   outfile   name for netCDF output file\n"
01529 
01530     /* Don't document this flaky option until it works better */
01531     /* [-x]      use experimental computed estimates for variable-specific chunk caches\n\ */
01532 
01533     error("%s [-k n] [-d n] [-s] [-c chunkspec] [-u] [-w] [-[v|V] varlist] [-[g|G] grplist] [-m n] [-h n] [-e n] [-r] infile outfile\n%s",
01534           progname, USAGE);
01535 }
01536 
01537 int
01538 main(int argc, char**argv)
01539 {
01540     char* inputfile = NULL;
01541     char* outputfile = NULL;
01542     int c;
01543 
01544 /* table of formats for legal -k values */
01545     struct Kvalues {
01546         char* name;
01547         int kind;
01548     } legalkinds[] = {
01549         {"1", NC_FORMAT_CLASSIC},
01550         {"classic", NC_FORMAT_CLASSIC},
01551         
01552         /* The 64-bit offset kind (2) */
01553         {"2", NC_FORMAT_64BIT},
01554         {"64-bit-offset", NC_FORMAT_64BIT},
01555         {"64-bit offset", NC_FORMAT_64BIT},
01556         
01557         /* NetCDF-4 HDF5 format */
01558         {"3", NC_FORMAT_NETCDF4},
01559         {"hdf5", NC_FORMAT_NETCDF4},
01560         {"netCDF-4", NC_FORMAT_NETCDF4},
01561         {"netCDF4", NC_FORMAT_NETCDF4},
01562         {"enhanced", NC_FORMAT_NETCDF4},
01563 
01564         /* NetCDF-4 HDF5 format, but using only nc3 data model */
01565         {"4", NC_FORMAT_NETCDF4_CLASSIC},
01566         {"hdf5-nc3", NC_FORMAT_NETCDF4_CLASSIC},
01567         {"netCDF-4 classic model", NC_FORMAT_NETCDF4_CLASSIC},
01568         {"netCDF4_classic", NC_FORMAT_NETCDF4_CLASSIC},
01569         {"enhanced-nc3", NC_FORMAT_NETCDF4_CLASSIC},
01570 
01571         /* null terminate*/
01572         {NULL,0}
01573     };
01574 
01575     opterr = 1;
01576     progname = argv[0];
01577 
01578     if (argc <= 1)
01579     {
01580        usage();
01581     }
01582 
01583     while ((c = getopt(argc, argv, "k:d:sum:c:h:e:rwxg:G:v:V:")) != -1) {
01584         switch(c) {
01585         case 'k': /* for specifying variant of netCDF format to be generated 
01586                      Possible values are:
01587                      1 (=> classic 32 bit)
01588                      2 (=> classic 64 bit offsets)
01589                      3 (=> netCDF-4/HDF5)
01590                      4 (=> classic, but stored in netCDF-4/HDF5 format)
01591                      Also allow string versions of above
01592                      "classic"
01593                      "64-bit-offset"
01594                      "64-bit offset"
01595                      "enhanced" | "hdf5" | "netCDF-4"
01596                      "enhanced-nc3" | "hdf5-nc3" | "netCDF-4 classic model"
01597                    */
01598             {
01599                 struct Kvalues* kvalue;
01600                 char *kind_name = (char *) emalloc(strlen(optarg)+1);
01601                 (void)strcpy(kind_name, optarg);
01602                 for(kvalue=legalkinds;kvalue->name;kvalue++) {
01603                     if(strcmp(kind_name,kvalue->name) == 0) {
01604                         option_kind = kvalue->kind;
01605                         break;
01606                     }
01607                 }
01608                 if(kvalue->name == NULL) {
01609                     error("invalid format: %s", kind_name);
01610                 }
01611             }
01612             break;
01613         case 'd':               /* non-default compression level specified */
01614             option_deflate_level = strtol(optarg, NULL, 10);
01615             if(option_deflate_level < 0 || option_deflate_level > 9) {
01616                 error("invalid deflation level: %d", option_deflate_level);
01617             }
01618             break;
01619         case 's':               /* shuffling, may improve compression */
01620             option_shuffle_vars = NC_SHUFFLE;
01621             break;
01622         case 'u':               /* convert unlimited dimensions to fixed size */
01623             option_fix_unlimdims = 1;
01624             break;
01625         case 'm':               /* non-default size of data copy buffer */
01626         {
01627             double dval = double_with_suffix(optarg);   /* "K" for kilobytes. "M" for megabytes, ... */
01628             if(dval < 0)
01629                 error("Suffix used for '-m' option value must be K, M, G, T, or P");
01630             option_copy_buffer_size = dval;
01631             break;
01632         }
01633         case 'h':               /* non-default size of chunk cache */
01634         {
01635             double dval = double_with_suffix(optarg);   /* "K" for kilobytes. "M" for megabytes, ... */
01636             if(dval < 0)
01637                 error("Suffix used for '-h' option value must be K, M, G, T, or P");
01638             option_chunk_cache_size = dval;
01639             break;
01640         }
01641         case 'e':               /* number of elements chunk cache can hold */
01642         {
01643             double dval = double_with_suffix(optarg);   /* "K" for kilobytes. "M" for megabytes, ... */
01644             if(dval < 0 )
01645                 error("Suffix used for '-e' option value must be K, M, G, T, or P");
01646             option_chunk_cache_nelems = (long)dval;
01647             break;
01648         }
01649         case 'r':
01650             option_read_diskless = 1; /* read into memory on open */
01651             break;
01652         case 'w':
01653             option_write_diskless = 1; /* write to memory, persist on close */
01654             break;
01655         case 'x':               /* use experimental variable-specific chunk caches */
01656             option_compute_chunkcaches = 1;
01657             break;
01658         case 'c':               /* optional chunking spec for each dimension in list */
01659             /* save chunkspec string for parsing later, once we know input ncid */
01660             option_chunkspec = strdup(optarg);
01661             break;
01662         case 'g':               /* group names */
01663             /* make list of names of groups specified */
01664             make_lgrps (optarg, &option_nlgrps, &option_lgrps, &option_grpids);
01665             option_grpstruct = true;
01666             break;
01667         case 'G':               /* group names */
01668             /* make list of names of groups specified */
01669             make_lgrps (optarg, &option_nlgrps, &option_lgrps, &option_grpids);
01670             option_grpstruct = false;
01671             break;
01672         case 'v':               /* variable names */
01673             /* make list of names of variables specified */
01674             make_lvars (optarg, &option_nlvars, &option_lvars);
01675             option_varstruct = true;
01676             break;
01677         case 'V':               /* variable names */
01678             /* make list of names of variables specified */
01679             make_lvars (optarg, &option_nlvars, &option_lvars);
01680             option_varstruct = false;
01681             break;
01682         default: 
01683             usage();
01684         }
01685     }
01686     argc -= optind;
01687     argv += optind;
01688 
01689     if (argc != 2) {
01690         error("one input file and one output file required");
01691     }
01692     inputfile = argv[0];
01693     outputfile = argv[1];
01694 
01695     if(strcmp(inputfile, outputfile) == 0) {
01696         error("output would overwrite input");
01697     }
01698 
01699     if(copy(inputfile, outputfile) != NC_NOERR)
01700         exit(EXIT_FAILURE);
01701     exit(EXIT_SUCCESS);
01702 }
01703 END_OF_MAIN();
 All Data Structures Files Functions Variables Typedefs Defines