Leptonica  1.54
Файл src/utils.c
#include <string.h>
#include <time.h>
#include <unistd.h>
#include "allheaders.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <math.h>
#include <stddef.h>
#include <sys/time.h>
#include <sys/resource.h>

Функции

l_int32 setMsgSeverity (l_int32 newsev)
l_int32 returnErrorInt (const char *msg, const char *procname, l_int32 ival)
l_float32 returnErrorFloat (const char *msg, const char *procname, l_float32 fval)
void * returnErrorPtr (const char *msg, const char *procname, void *pval)
char * stringNew (const char *src)
l_int32 stringCopy (char *dest, const char *src, l_int32 n)
l_int32 stringReplace (char **pdest, const char *src)
l_int32 stringLength (const char *src, size_t size)
l_int32 stringCat (char *dest, size_t size, const char *src)
char * stringConcatNew (const char *first,...)
char * stringJoin (const char *src1, const char *src2)
l_int32 stringJoinIP (char **psrc1, const char *src2)
char * stringReverse (const char *src)
char * strtokSafe (char *cstr, const char *seps, char **psaveptr)
l_int32 stringSplitOnToken (char *cstr, const char *seps, char **phead, char **ptail)
char * stringRemoveChars (const char *src, const char *remchars)
l_int32 stringFindSubstr (const char *src, const char *sub, l_int32 *ploc)
char * stringReplaceSubstr (const char *src, const char *sub1, const char *sub2, l_int32 *pfound, l_int32 *ploc)
char * stringReplaceEachSubstr (const char *src, const char *sub1, const char *sub2, l_int32 *pcount)
L_DNAarrayFindEachSequence (const l_uint8 *data, size_t datalen, const l_uint8 *sequence, size_t seqlen)
l_int32 arrayFindSequence (const l_uint8 *data, size_t datalen, const l_uint8 *sequence, size_t seqlen, l_int32 *poffset, l_int32 *pfound)
void * reallocNew (void **pindata, l_int32 oldsize, l_int32 newsize)
l_uint8l_binaryRead (const char *filename, size_t *pnbytes)
l_uint8l_binaryReadStream (FILE *fp, size_t *pnbytes)
l_uint8l_binaryReadSelect (const char *filename, size_t start, size_t nbytes, size_t *pnread)
l_uint8l_binaryReadSelectStream (FILE *fp, size_t start, size_t nbytes, size_t *pnread)
l_int32 l_binaryWrite (const char *filename, const char *operation, void *data, size_t nbytes)
size_t nbytesInFile (const char *filename)
size_t fnbytesInFile (FILE *fp)
l_uint8l_binaryCopy (l_uint8 *datas, size_t size)
l_int32 fileCopy (const char *srcfile, const char *newfile)
l_int32 fileConcatenate (const char *srcfile, const char *destfile)
l_int32 fileAppendString (const char *filename, const char *str)
l_int32 filesAreIdentical (const char *fname1, const char *fname2, l_int32 *psame)
l_uint16 convertOnLittleEnd16 (l_uint16 shortin)
l_uint16 convertOnBigEnd16 (l_uint16 shortin)
l_uint32 convertOnLittleEnd32 (l_uint32 wordin)
l_uint32 convertOnBigEnd32 (l_uint32 wordin)
FILE * fopenReadStream (const char *filename)
FILE * fopenWriteStream (const char *filename, const char *modestring)
FILE * lept_fopen (const char *filename, const char *mode)
l_int32 lept_fclose (FILE *fp)
void * lept_calloc (size_t nmemb, size_t size)
void lept_free (void *ptr)
l_int32 lept_mkdir (const char *subdir)
l_int32 lept_rmdir (const char *subdir)
void lept_direxists (const char *dir, l_int32 *pexists)
l_int32 lept_rm_match (const char *subdir, const char *substr)
l_int32 lept_rm (const char *subdir, const char *tail)
l_int32 lept_rmfile (const char *filepath)
l_int32 lept_mv (const char *srcfile, const char *newdir, const char *newtail, char **pnewpath)
l_int32 lept_cp (const char *srcfile, const char *newdir, const char *newtail, char **pnewpath)
l_int32 splitPathAtDirectory (const char *pathname, char **pdir, char **ptail)
l_int32 splitPathAtExtension (const char *pathname, char **pbasename, char **pextension)
char * pathJoin (const char *dir, const char *fname)
char * appendSubdirs (const char *basedir, const char *subdirs)
l_int32 convertSepCharsInPath (char *path, l_int32 type)
char * genPathname (const char *dir, const char *fname)
l_int32 makeTempDirname (char *result, size_t nbytes, const char *subdir)
l_int32 modifyTrailingSlash (char *path, size_t nbytes, l_int32 flag)
char * genTempFilename (const char *dir, const char *tail, l_int32 usetime, l_int32 usepid)
l_int32 extractNumberFromFilename (const char *fname, l_int32 numpre, l_int32 numpost)
l_int32 fileCorruptByDeletion (const char *filein, l_float32 loc, l_float32 size, const char *fileout)
l_int32 fileCorruptByMutation (const char *filein, l_float32 loc, l_float32 size, const char *fileout)
l_int32 genRandomIntegerInRange (l_int32 range, l_int32 seed, l_int32 *pval)
l_int32 lept_roundftoi (l_float32 fval)
l_int32 l_hashStringToUint64 (const char *str, l_uint64 *phash)
l_int32 l_hashPtToUint64 (l_int32 x, l_int32 y, l_uint64 *phash)
l_int32 l_hashPtToUint64Fast (l_int32 nbuckets, l_int32 x, l_int32 y, l_uint64 *phash)
l_int32 l_hashFloat64ToUint64 (l_int32 nbuckets, l_float64 val, l_uint64 *phash)
l_int32 findNextLargerPrime (l_int32 start, l_uint32 *pprime)
l_int32 lept_isPrime (l_uint64 n, l_int32 *pis_prime, l_uint32 *pfactor)
l_uint32 convertBinaryToGrayCode (l_uint32 val)
l_uint32 convertGrayCodeToBinary (l_uint32 val)
char * getLeptonicaVersion ()
void startTimer (void)
l_float32 stopTimer (void)
L_TIMER startTimerNested (void)
l_float32 stopTimerNested (L_TIMER rusage_start)
void l_getCurrentTime (l_int32 *sec, l_int32 *usec)
L_WALLTIMERstartWallTimer (void)
l_float32 stopWallTimer (L_WALLTIMER **ptimer)
char * l_getFormattedDate ()

Переменные

LEPT_DLL l_int32 LeptMsgSeverity = DEFAULT_SEVERITY
static struct rusage rusage_before
static struct rusage rusage_after

Функции

char* appendSubdirs ( const char *  basedir,
const char *  subdirs 
)

appendSubdirs()

Input: basedir subdirs Return: concatenated full directory path without trailing slash, or null on error

Notes: (1) Use unix pathname separators (2) Allocates a new string: <basedir>/<subdirs>

L_DNA* arrayFindEachSequence ( const l_uint8 data,
size_t  datalen,
const l_uint8 sequence,
size_t  seqlen 
)

arrayFindEachSequence()

Input: data (byte array) datalen (length of data, in bytes) sequence (subarray of bytes to find in data) seqlen (length of sequence, in bytes) Return: dna of offsets where the sequence is found, or null if none are found or on error

Notes: (1) The byte arrays and are not C strings, as they can contain null bytes. Therefore, for each we must give the length of the array. (2) This finds every occurrence in of .

l_int32 arrayFindSequence ( const l_uint8 data,
size_t  datalen,
const l_uint8 sequence,
size_t  seqlen,
l_int32 poffset,
l_int32 pfound 
)

arrayFindSequence()

Input: data (byte array) datalen (length of data, in bytes) sequence (subarray of bytes to find in data) seqlen (length of sequence, in bytes) &offset (return> offset from beginning of data where the sequence begins) &found (<return> 1 if sequence is found; 0 otherwise) Return: 0 if OK, 1 on error

Notes: (1) The byte arrays 'data' and 'sequence' are not C strings, as they can contain null bytes. Therefore, for each we must give the length of the array. (2) This searches for the first occurrence in of , which consists of bytes. The parameter must not exceed the actual length of the byte array. (3) If the sequence is not found, the offset will be 0, so you must check .

convertBinaryToGrayCode()

Input: val Return: gray code value

Notes: (1) Gray code values corresponding to integers differ by only one bit transition between successive integers.

convertGrayCodeToBinary()

Input: gray code value Return: binary value

l_int32 convertSepCharsInPath ( char *  path,
l_int32  type 
)

convertSepCharsInPath()

Input: path type (UNIX_PATH_SEPCHAR, WIN_PATH_SEPCHAR) Return: 0 if OK, 1 on error

Notes: (1) In-place conversion. (2) Type is the resulting type: * UNIX_PATH_SEPCHAR: '\' ==> '/' * WIN_PATH_SEPCHAR: '/' ==> '\' (3) Virtually all path operations in leptonica use unix separators.

l_int32 extractNumberFromFilename ( const char *  fname,
l_int32  numpre,
l_int32  numpost 
)

extractNumberFromFilename()

Input: fname numpre (number of characters before the digits to be found) numpost (number of characters after the digits to be found) Return: num (number embedded in the filename); -1 on error or if not found

Notes: (1) The number is to be found in the basename, which is the filename without either the directory or the last extension. (2) When a number is found, it is non-negative. If no number is found, this returns -1, without an error message. The caller needs to check.

l_int32 fileAppendString ( const char *  filename,
const char *  str 
)

fileAppendString()

Input: filename str (string to append to file) Return: 0 if OK, 1 on error

l_int32 fileConcatenate ( const char *  srcfile,
const char *  destfile 
)

fileConcatenate()

Input: srcfile (file to append) destfile (file to add to) Return: 0 if OK, 1 on error

l_int32 fileCopy ( const char *  srcfile,
const char *  newfile 
)

fileCopy()

Input: srcfile (copy this file) newfile (to this file) Return: 0 if OK, 1 on error

l_int32 fileCorruptByDeletion ( const char *  filein,
l_float32  loc,
l_float32  size,
const char *  fileout 
)

fileCorruptByDeletion()

Input: filein loc (fractional location of start of deletion) size (fractional size of deletion) fileout (corrupted file) Return: 0 if OK, 1 on error

Notes: (1) and are expressed as a fraction of the file size. (2) This makes a copy of the data in , where bytes in the specified region have deleted. (3) If ( + ) >= 1.0, this deletes from the position represented by to the end of the file. (4) It is useful for testing robustness of I/O wrappers when the data is corrupted, by simulating data corruption by deletion.

l_int32 fileCorruptByMutation ( const char *  filein,
l_float32  loc,
l_float32  size,
const char *  fileout 
)

fileCorruptByMutation()

Input: filein loc (fractional location of start of randomization) size (fractional size of randomization) fileout (corrupted file) Return: 0 if OK, 1 on error

Notes: (1) and are expressed as a fraction of the file size. (2) This makes a copy of the data in , where bytes in the specified region have been replaced by random data. (3) If ( + ) >= 1.0, this modifies data from the position represented by to the end of the file. (4) It is useful for testing robustness of I/O wrappers when the data is corrupted, by simulating data corruption.

l_int32 filesAreIdentical ( const char *  fname1,
const char *  fname2,
l_int32 psame 
)

filesAreIdentical()

Input: fname1 fname2 &same (<return> 1 if identical; 0 if different) Return: 0 if OK, 1 on error

l_int32 findNextLargerPrime ( l_int32  start,
l_uint32 pprime 
)

findNextLargerPrime()

Input: start &prime (<return> first prime larger than ) Return: 0 if OK, 1 on error

size_t fnbytesInFile ( FILE *  fp)

fnbytesInFile()

Input: file stream Return: nbytes in file; 0 on error

FILE* fopenReadStream ( const char *  filename)

fopenReadStream()

Input: filename Return: stream, or null on error

Notes: (1) This should be used whenever you want to run fopen() to read from a stream. Never call fopen() directory. (2) This also handles pathname conversions, if necessary: ==> /tmp (unix) [default] ==> /tmp/leptonica (unix) [if ADD_LEPTONICA_SUBDIR == 1] ==> <Temp>/leptonica (windows)

FILE* fopenWriteStream ( const char *  filename,
const char *  modestring 
)

fopenWriteStream()

Input: filename modestring Return: stream, or null on error

Notes: (1) This should be used whenever you want to run fopen() to write or append to a stream. Never call fopen() directory. (2) This also handles pathname conversions, if necessary: ==> /tmp (unix) [default] ==> /tmp/leptonica (unix) [if ADD_LEPTONICA_SUBDIR == 1] ==> <Temp>/leptonica (windows)

char* genPathname ( const char *  dir,
const char *  fname 
)
l_int32 genRandomIntegerInRange ( l_int32  range,
l_int32  seed,
l_int32 pval 
)

genRandomIntegerInRange()

Input: range (size of range; must be >= 2) seed (use 0 to skip; otherwise call srand) val (<return> random integer in range {0 ... range-1} Return: 0 if OK, 1 on error

Notes: (1) For example, to choose a rand integer between 0 and 99, use = 100.

char* genTempFilename ( const char *  dir,
const char *  tail,
l_int32  usetime,
l_int32  usepid 
)

getLeptonicaVersion()

Return: string of version number (e.g., 'leptonica-1.68')

Notes: (1) The caller has responsibility to free the memory.

l_uint8* l_binaryCopy ( l_uint8 datas,
size_t  size 
)

l_binaryCopy()

Input: datas size (of data array) Return: datad (on heap), or null on error

Notes: (1) We add 4 bytes to the zeroed output because in some cases (e.g., string handling) it is important to have the data be null terminated. This guarantees that after the memcpy, the result is automatically null terminated.

l_uint8* l_binaryRead ( const char *  filename,
size_t *  pnbytes 
)

l_binaryRead()

Input: filename &nbytes (<return> number of bytes read) Return: data, or null on error

l_uint8* l_binaryReadSelect ( const char *  filename,
size_t  start,
size_t  nbytes,
size_t *  pnread 
)

l_binaryReadSelect()

Input: filename start (first byte to read) nbytes (number of bytes to read; use 0 to read to end of file) &nread (<return> number of bytes actually read) Return: data, or null on error

Notes: (1) The returned array is terminated with a null byte so that it can be used to read ascii data from a file into a proper C string.

l_uint8* l_binaryReadSelectStream ( FILE *  fp,
size_t  start,
size_t  nbytes,
size_t *  pnread 
)

l_binaryReadSelectStream()

Input: stream start (first byte to read) nbytes (number of bytes to read; use 0 to read to end of file) &nread (<return> number of bytes actually read) Return: null-terminated array, or null on error (reading 0 bytes is not an error)

Notes: (1) The returned array is terminated with a null byte so that it can be used to read ascii data from a file into a proper C string. If the file to be read is empty and == 0, an array with a single null byte is returned. (2) Side effect: the stream pointer is re-positioned to the beginning of the file.

l_uint8* l_binaryReadStream ( FILE *  fp,
size_t *  pnbytes 
)

l_binaryReadStream()

Input: fp (stream opened to read; can be stdin) &nbytes (<return> number of bytes read) Return: null-terminated array, or null on error (reading 0 bytes is not an error)

Notes: (1) The returned array is terminated with a null byte so that it can be used to read ascii data from a file into a proper C string. (2) This can be used to capture data that is piped in via stdin, because it does not require seeking within the file. (3) For example, you can read an image from stdin into memory using shell redirection, with one of these shell commands: cat <imagefile> | readprog readprog < <imagefile> where readprog is: l_uint8 *data = l_binaryReadStream(stdin, &nbytes); Pix *pix = pixReadMem(data, nbytes);

l_int32 l_binaryWrite ( const char *  filename,
const char *  operation,
void *  data,
size_t  nbytes 
)

l_binaryWrite()

Input: filename (output) operation ("w" for write; "a" for append) data (binary data to be written) nbytes (size of data array) Return: 0 if OK; 1 on error

void l_getCurrentTime ( l_int32 sec,
l_int32 usec 
)

l_getCurrentTime()

Input: &sec (<optional return>=""> in seconds since birth of Unix) &usec (<optional return>=""> in microseconds since birth of Unix) Return: void

char* l_getFormattedDate ( )

l_getFormattedDate()

Input: (none) Return: formatted date string, or null on error

Notes: (1) This is used in pdf, in the form specified in section 3.8.2 of http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf (2) Contributed by Dave Bryan. Works on all platforms.

l_int32 l_hashFloat64ToUint64 ( l_int32  nbuckets,
l_float64  val,
l_uint64 phash 
)

l_hashFloat64ToUint64()

Input: nbuckets val &hash (<return>) Return: 0 if OK, 1 on error

Notes: (1) Simple, fast hash for using dnaHash with 64-bit data (e.g., sets and histograms). (2) The resulting hash is called a "key" in a lookup operation. The bucket for in a dnaHash is simply found by taking the mod of the hash with the number of buckets (which is prime). What gets stored in the dna in that bucket could depend on use, but for the most flexibility, we store an index into the associated dna. This is all that is required for generating either a hash set or a histogram (an example of a hash map). (3) For example, to generate a histogram, the histogram dna, a histogram of unique values aligned with the histogram dna, and a dnahash hashmap are built. See l_dnaHashHistoFromDna().

l_int32 l_hashPtToUint64 ( l_int32  x,
l_int32  y,
l_uint64 phash 
)

l_hashPtToUint64()

Input: x, y &hash (<return>) Return: 0 if OK, 1 on error

Notes: (1) I just made up a hash function and fiddled with it to get decent coverage over the 2^64 values. There are no collisions for any of 100 million points with x and y up to 10000.

l_int32 l_hashPtToUint64Fast ( l_int32  nbuckets,
l_int32  x,
l_int32  y,
l_uint64 phash 
)

l_hashPtToUint64Fast()

Input: nbuckets x, y &hash (<return>) Return: 0 if OK, 1 on error

Notes: (1) This is a simple, fast hash that is used with the dna hash map, which takes the mod with a prime number of buckets. The number of buckets is selected so that collisions occur, aiming for about 20 results in each bucket. The design goal is that the hash is fast (mult/add) and approximately the same number of points are hashed to each bucket.

l_int32 l_hashStringToUint64 ( const char *  str,
l_uint64 phash 
)

l_hashStringToUint64()

Input: str &hash (<return>) Return: 0 if OK, 1 on error

Notes: (1) The intent of the hash is to avoid collisions by mapping the string as randomly as possible into 64 bits. (2) To the extent that the hashes are random, the probability of a collision can be approximated by the square of the number of strings divided by 2^64. For 1 million strings, the collision probability is about 1 in 16 million. (3) I expect non-randomness of the distribution to be most evident for small text strings. This hash function has been tested for all 5-character text strings composed of 26 letters, of which there are 26^5 = 12356630. There are no hash collisions for this set.

void* lept_calloc ( size_t  nmemb,
size_t  size 
)

lept_calloc()

Input: nmemb (number of members) size (of each member) Return: void ptr, or null on error

Notes: (1) For safety with windows DLLs, this can be used in conjunction with lept_free() to avoid C-runtime boundary problems. Just use these two functions throughout your application.

l_int32 lept_cp ( const char *  srcfile,
const char *  newdir,
const char *  newtail,
char **  pnewpath 
)

lept_cp()

Input: srcfile newdir (<optional>; can be NULL) newtail (<optional>; can be NULL) &newpath (<optional return>=""> of actual path; can be NULL) Return: 0 on success, non-zero on failure

Notes: (1) This copies to /tmp or to a subdirectory of /tmp. (2) can either be a full path or relative to the current directory. (3) can either specify an existing subdirectory of /tmp, or can be NULL. In the latter case, the file will be written into /tmp. (4) can either specify a filename tail or, if NULL, the filename is taken from src-tail, the tail of . (5) For debugging, the computed newpath can be returned. It must be freed by the caller. (6) Reminders: (a) specify files using unix pathnames (b) for windows, translates /tmp ==> <Temp> where <Temp> is the windows temp directory (7) Examples: * newdir = NULL, newtail = NULL ==> /tmp/src-tail * newdir = NULL, newtail = abc ==> /tmp/abc * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc

void lept_direxists ( const char *  dir,
l_int32 pexists 
)

lept_direxists()

Input: dir &exists (<return> 1 if it exists; 0 otherwise) Return: void

Notes: (1) Always use unix pathname separators. (2) By calling genPathname(), if the pathname begins with "/tmp" this does an automatic directory translation on windows to a path in the windows <Temp> directory: "/tmp" ==> <Temp> (windows)

l_int32 lept_fclose ( FILE *  fp)

lept_fclose()

Input: fp (stream handle) Return: 0 if OK, 1 on error

Notes: (1) This should be used by any application that accepts a file handle generated by a leptonica Windows DLL.

FILE* lept_fopen ( const char *  filename,
const char *  mode 
)

lept_fopen()

Input: filename mode (same as for fopen(); e.g., "rb") Return: stream or null on error

Notes: (1) This must be used by any application that passes a file handle to a leptonica Windows DLL.

void lept_free ( void *  ptr)

lept_free()

Input: void ptr Return: 0 if OK, 1 on error

Notes: (1) This should be used by any application that accepts heap data allocated by a leptonica Windows DLL.

l_int32 lept_isPrime ( l_uint64  n,
l_int32 pis_prime,
l_uint32 pfactor 
)

lept_isPrime()

Input: n (64-bit unsigned) &is_prime (<return> 1 if prime, 0 otherwise) &factor (<optional return>=""> smallest divisor, or 0 on error or if prime) Return: 0 if OK, 1 on error

l_int32 lept_mkdir ( const char *  subdir)

lept_mkdir()

Input: subdir (of /tmp or its equivalent on Windows) Return: 0 on success, non-zero on failure

Notes: (1) is a partial path that can consist of one or more directories. (2) This makes any subdirectories of /tmp that are required. (3) The root temp directory is: /tmp (unix) [default] <Temp> (windows)

l_int32 lept_mv ( const char *  srcfile,
const char *  newdir,
const char *  newtail,
char **  pnewpath 
)

lept_mv()

Input: srcfile newdir (<optional>; can be NULL) newtail (<optional>; can be NULL) &newpath (<optional return>=""> of actual path; can be NULL) Return: 0 on success, non-zero on failure

Notes: (1) This moves to /tmp or to a subdirectory of /tmp. (2) can either be a full path or relative to the current directory. (3) can either specify an existing subdirectory of /tmp or can be NULL. In the latter case, the file will be written into /tmp. (4) can either specify a filename tail or, if NULL, the filename is taken from src-tail, the tail of . (5) For debugging, the computed newpath can be returned. It must be freed by the caller. (6) Reminders: (a) specify files using unix pathnames (b) for windows, translates /tmp ==> <Temp> where <Temp> is the windows temp directory (7) Examples: * newdir = NULL, newtail = NULL ==> /tmp/src-tail * newdir = NULL, newtail = abc ==> /tmp/abc * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc

l_int32 lept_rm ( const char *  subdir,
const char *  tail 
)

lept_rm()

Input: subdir (<optional> of '/tmp'; can be NULL) tail (filename without the directory) Return: 0 on success, non-zero on failure

Notes: (1) By calling genPathname(), this does an automatic directory translation on windows to a path in the windows <Temp> directory: "/tmp/..." ==> <Temp>/... (windows)

l_int32 lept_rm_match ( const char *  subdir,
const char *  substr 
)

lept_rm_match()

Input: subdir (<optional> If NULL, the removed files are in /tmp) substr (<optional> pattern to match in filename) Return: 0 on success, non-zero on failure

Notes: (1) This removes the matched files in /tmp or a subdirectory of /tmp. Use NULL for if the files are in /tmp. (2) If == NULL, this removes all files in the directory. If == "" (empty), this removes no files. If both == NULL and == NULL, this removes all files in /tmp. (3) Use unix pathname separators. (4) By calling genPathname(), if the pathname begins with "/tmp" this does an automatic directory translation on windows to a path in the windows <Temp> directory: "/tmp" ==> <Temp> (windows) (5) Error conditions: * returns -1 if the directory is not found * returns the number of files (> 0) that it was unable to remove.

l_int32 lept_rmdir ( const char *  subdir)

lept_rmdir()

Input: subdir (of /tmp or its equivalent on Windows) Return: 0 on success, non-zero on failure

Notes: (1) is a partial path that can consist of one or more directories. (2) This removes all files from the specified subdirectory of the root temp directory: /tmp (unix) <Temp> (windows) and then removes the subdirectory. (3) The combination lept_rmdir(subdir); lept_mkdir(subdir); is guaranteed to give you an empty subdirectory.

l_int32 lept_rmfile ( const char *  filepath)

TODO: Remove this function ?

lept_rmfile()

Input: filepath (full path to file including the directory) Return: 0 on success, non-zero on failure

Notes: (1) This removes the named file. (2) Use unix pathname separators. (3) Unlike the other lept_* functions in this section, this can remove any file -- it is not restricted to files that are in /tmp or a subdirectory of it.

lept_roundftoi()

Input: fval Return: value rounded to int

Notes: (1) For fval >= 0, fval --> round(fval) == floor(fval + 0.5) For fval < 0, fval --> -round(-fval)) This is symmetric around 0. e.g., for fval in (-0.5 ... 0.5), fval --> 0

l_int32 makeTempDirname ( char *  result,
size_t  nbytes,
const char *  subdir 
)

makeTempDirname()

Input: result (preallocated on stack or heap and passed in) nbytes (size of

Возвращает:
array, in bytes) subdirs (<optional>; can be NULL or an empty string) Return: 0 if OK, 1 on error

Notes: (1) This generates the directory path for output temp files, written into

Возвращает:
with unix separators. (2) Caller allocates
, large enough to hold the path, which is: /tmp/ (unix) <Temp>/ (windows) where <Temp> is a path on windows determined by GenTempPath(). (3) Usage example: char result[256]; makeTempDirname(result, 256, "lept/golden");
l_int32 modifyTrailingSlash ( char *  path,
size_t  nbytes,
l_int32  flag 
)

modifyTrailingSlash()

Input: path (preallocated on stack or heap and passed in) nbytes (size of array, in bytes) flag (L_ADD_TRAIL_SLASH or L_REMOVE_TRAIL_SLASH) Return: 0 if OK, 1 on error

Notes: (1) This carries out the requested action if necessary.

size_t nbytesInFile ( const char *  filename)

nbytesInFile()

Input: filename Return: nbytes in file; 0 on error

char* pathJoin ( const char *  dir,
const char *  fname 
)
void* reallocNew ( void **  pindata,
l_int32  oldsize,
l_int32  newsize 
)

reallocNew()

Input: &indata (<optional>; nulls indata) oldsize (size of input data to be copied, in bytes) newsize (size of data to be reallocated in bytes) Return: ptr to new data, or null on error

Action: !N.B. (3) and (4)! (1) Allocates memory, initialized to 0 (2) Copies as much of the input data as possible to the new block, truncating the copy if necessary (3) Frees the input data (4) Zeroes the input data ptr

Notes: (1) If newsize <=0, just frees input data and nulls ptr (2) If input ptr is null, just callocs new memory (3) This differs from realloc in that it always allocates new memory (if newsize > 0) and initializes it to 0, it requires the amount of old data to be copied, and it takes the address of the input ptr and nulls the handle.

l_float32 returnErrorFloat ( const char *  msg,
const char *  procname,
l_float32  fval 
)

returnErrorFloat()

Input: msg (error message) procname fval (return val) Return: fval

l_int32 returnErrorInt ( const char *  msg,
const char *  procname,
l_int32  ival 
)

returnErrorInt()

Input: msg (error message) procname ival (return val) Return: ival (typically 1 for an error return)

void* returnErrorPtr ( const char *  msg,
const char *  procname,
void *  pval 
)

returnErrorPtr()

Input: msg (error message) procname pval (return val) Return: pval (typically null)

setMsgSeverity()

Input: newsev Return: oldsev

Notes: (1) setMsgSeverity() allows the user to specify the desired message severity threshold. Messages of equal or greater severity will be output. The previous message severity is returned when the new severity is set. (2) If L_SEVERITY_EXTERNAL is passed, then the severity will be obtained from the LEPT_MSG_SEVERITY environment variable. If the environmental variable is not set, a warning is issued.

l_int32 splitPathAtDirectory ( const char *  pathname,
char **  pdir,
char **  ptail 
)

splitPathAtDirectory()

Input: pathname (full path; can be a directory) &dir (<optional return>=""> root directory name of input path, including trailing '/') &tail (<optional return>=""> path tail, which is either the file name within the root directory or the last sub-directory in the path) Return: 0 if OK, 1 on error

Notes: (1) If you only want the tail, input null for the root directory ptr. (2) If you only want the root directory name, input null for the tail ptr. (3) This function makes decisions based only on the lexical structure of the input. Examples: /usr/tmp/abc --> dir: /usr/tmp/ tail: abc /usr/tmp/ --> dir: /usr/tmp/ tail: [empty string] /usr/tmp --> dir: /usr/ tail: tmp abc --> dir: [empty string] tail: abc (4) The input can have either forward (unix) or backward (win) slash separators. The output has unix separators. Note that Win32 pathname functions generally accept both slash forms, but the windows command line interpreter only accepts backward slashes, because forward slashes are used to demarcate switches (vs. dashes in unix).

l_int32 splitPathAtExtension ( const char *  pathname,
char **  pbasename,
char **  pextension 
)

splitPathAtExtension()

Input: pathname (full path; can be a directory) &basename (<optional return>=""> pathname not including the last dot and characters after that) &extension (<optional return>=""> path extension, which is the last dot and the characters after it. If there is no extension, it returns the empty string) Return: 0 if OK, 1 on error

Notes: (1) If you only want the extension, input null for the basename ptr. (2) If you only want the basename without extension, input null for the extension ptr. (3) This function makes decisions based only on the lexical structure of the input. Examples: /usr/tmp/abc.jpg --> basename: /usr/tmp/abc ext: .jpg /usr/tmp/.jpg --> basename: /usr/tmp/ ext: .jpg /usr/tmp.jpg/ --> basename: /usr/tmp.jpg/ ext: [empty str] ./.jpg --> basename: ./ ext: .jpg (4) The input can have either forward (unix) or backward (win) slash separators. The output has unix separators.

void startTimer ( void  )

startTimer(), stopTimer()

Notes: (1) These measure the cpu time elapsed between the two calls: startTimer(); .... fprintf(stderr, "Elapsed time = %7.3f sec\n", stopTimer());

startTimerNested(), stopTimerNested()

Example of usage:

L_TIMER t1 = startTimerNested(); .... L_TIMER t2 = startTimerNested(); .... fprintf(stderr, "Elapsed time 2 = %7.3f sec\n", stopTimerNested(t2)); .... fprintf(stderr, "Elapsed time 1 = %7.3f sec\n", stopTimerNested(t1));

startWallTimer() Input: void Return: walltimer-ptr

stopWallTimer() Input: &walltimer-ptr Return: time (wall time elapsed in seconds)

Notes: (1) These measure the wall clock time elapsed between the two calls: L_WALLTIMER *timer = startWallTimer(); .... fprintf(stderr, "Elapsed time = %f sec\n", stopWallTimer(&timer); (2) Note that the timer object is destroyed by stopWallTimer().

l_float32 stopTimer ( void  )
l_float32 stopTimerNested ( L_TIMER  rusage_start)
l_int32 stringCat ( char *  dest,
size_t  size,
const char *  src 
)

stringCat()

Input: dest (null-terminated byte buffer) size (size of dest) src string (can be null or null-terminated string) Return: number of bytes added to dest; -1 on error

Notes: (1) Alternative implementation of strncat, that checks the input, is easier to use (since the size of the dest buffer is specified rather than the number of bytes to copy), and does not complain if is null. (2) Never writes past end of dest. (3) If it can't append src (an error), it does nothing. (4) N.B. The order of 2nd and 3rd args is reversed from that in strncat, as in the Windows function strcat_s().

char* stringConcatNew ( const char *  first,
  ... 
)

stringConcatNew()

Input: first (first string in list) varargs (NULL-terminated list of strings) Return: result (new string concatenating the input strings), or NULL if first == NULL

Notes: (1) The last arg in the list of strings must be NULL. (2) Caller must free the returned string.

l_int32 stringCopy ( char *  dest,
const char *  src,
l_int32  n 
)

stringCopy()

Input: dest (existing byte buffer) src string (<optional> can be null) n (max number of characters to copy) Return: 0 if OK, 1 on error

Notes: (1) Relatively safe wrapper for strncpy, that checks the input, and does not complain if is null or
< 1. If
< 1, this is a no-op. (2) needs to be at least
bytes in size. (3) We don't call strncpy() because valgrind complains about use of uninitialized values.

l_int32 stringFindSubstr ( const char *  src,
const char *  sub,
l_int32 ploc 
)

stringFindSubstr()

Input: src (input string; can be of zero length) sub (substring to be searched for) &loc (<return optional>=""> location of substring in src) Return: 1 if found; 0 if not found or on error

Notes: (1) This is a wrapper around strstr(). (2) Both and must be defined, and must have length of at least 1. (3) If the substring is not found and loc is returned, it has the value -1.

char* stringJoin ( const char *  src1,
const char *  src2 
)

stringJoin()

Input: src1 string (<optional> can be null) src2 string (<optional> can be null) Return: concatenated string, or null on error

Notes: (1) This is a safe version of strcat; it makes a new string. (2) It is not an error if either or both of the strings are empty, or if either or both of the pointers are null.

l_int32 stringJoinIP ( char **  psrc1,
const char *  src2 
)

stringJoinIP()

Input: &src1 string (address of src1; cannot be on the stack) src2 string (<optional> can be null) Return: 0 if OK, 1 on error

Notes: (1) This is a safe in-place version of strcat. The contents of src1 is replaced by the concatenation of src1 and src2. (2) It is not an error if either or both of the strings are empty (""), or if the pointers to the strings (*psrc1, src2) are null. (3) src1 should be initialized to null or an empty string before the first call. Use one of these: char *src1 = NULL; char *src1 = stringNew(""); Then call with: stringJoinIP(&src1, src2); (4) This can also be implemented as a macro: #define stringJoinIP(src1, src2) \ {tmpstr = stringJoin((src1),(src2)); \ LEPT_FREE(src1); \ (src1) = tmpstr;} (5) Another function to consider for joining many strings is stringConcatNew().

l_int32 stringLength ( const char *  src,
size_t  size 
)

stringLength()

Input: src string (can be null or null-terminated string) size (size of src buffer) Return: length of src in bytes.

Notes: (1) Safe implementation of strlen that only checks size bytes for trailing NUL. (2) Valid returned string lengths are between 0 and size - 1. If size bytes are checked without finding a NUL byte, then an error is indicated by returning size.

char* stringNew ( const char *  src)

stringNew()

Input: src string Return: dest copy of src string, or null on error

char* stringRemoveChars ( const char *  src,
const char *  remchars 
)

stringRemoveChars()

Input: src (input string; can be of zero length) remchars (string of chars to be removed from src) Return: dest (string with specified chars removed), or null on error

l_int32 stringReplace ( char **  pdest,
const char *  src 
)

stringReplace()

Input: &dest string (<return> copy) src string (<optional> can be null) Return: 0 if OK; 1 on error

Notes: (1) Frees any existing dest string (2) Puts a copy of src string in the dest (3) If either or both strings are null, does something reasonable.

char* stringReplaceEachSubstr ( const char *  src,
const char *  sub1,
const char *  sub2,
l_int32 pcount 
)

stringReplaceEachSubstr()

Input: src (input string; can be of zero length) sub1 (substring to be replaced) sub2 (substring to put in; can be "") &count (<optional return=""> the number of times that sub1 is found in src; 0 if not found) Return: dest (string with substring replaced), or null if the substring not found or on error.

Notes: (1) Replaces every instance. (2) To only remove each instance of sub1, use "" for sub2 (3) Returns NULL if sub1 and sub2 are the same.

char* stringReplaceSubstr ( const char *  src,
const char *  sub1,
const char *  sub2,
l_int32 pfound,
l_int32 ploc 
)

stringReplaceSubstr()

Input: src (input string; can be of zero length) sub1 (substring to be replaced) sub2 (substring to put in; can be "") &found (<return optional>=""> 1 if sub1 is found; 0 otherwise) &loc (<return optional>=""> location of ptr after replacement) Return: dest (string with substring replaced), or null if the substring not found or on error.

Notes: (1) Replaces the first instance. (2) To only remove sub1, use "" for sub2 (3) Returns a new string if sub1 and sub2 are the same. (4) The optional loc is input as the byte offset within the src from which the search starts, and after the search it is the char position in the string of the next character after the substituted string. (5) N.B. If ploc is not null, loc must always be initialized. To search the string from the beginning, set loc = 0.

char* stringReverse ( const char *  src)

stringReverse()

Input: src (string) Return: dest (newly-allocated reversed string)

l_int32 stringSplitOnToken ( char *  cstr,
const char *  seps,
char **  phead,
char **  ptail 
)

stringSplitOnToken()

Input: cstr (input string to be split; not altered) seps (a string of character separators) &head (<return> ptr to copy of the input string, up to the first separator token encountered) &tail (<return> ptr to copy of the part of the input string starting with the first non-separator character that occurs after the first separator is found) Return: 0 if OK, 1 on error

Notes: (1) The input string is not altered; all split parts are new strings. (2) The split occurs around the first consecutive sequence of tokens encountered. (3) The head goes from the beginning of the string up to but not including the first token found. (4) The tail contains the second part of the string, starting with the first char in that part that is NOT a token. (5) If no separator token is found, 'head' contains a copy of the input string and 'tail' is null.

char* strtokSafe ( char *  cstr,
const char *  seps,
char **  psaveptr 
)

strtokSafe()

Input: cstr (input string to be sequentially parsed; use NULL after the first call) seps (a string of character separators) &saveptr (<return> ptr to the next char after the last encountered separator) Return: substr (a new string that is copied from the previous saveptr up to but not including the next separator character), or NULL if end of cstr.

Notes: (1) This is a thread-safe implementation of strtok. (2) It has the same interface as strtok_r. (3) It differs from strtok_r in usage in two respects: (a) the input string is not altered (b) each returned substring is newly allocated and must be freed after use. (4) Let me repeat that. This is "safe" because the input string is not altered and because each returned string is newly allocated on the heap. (5) It is here because, surprisingly, some C libraries don't include strtok_r. (6) Important usage points:

  • Input the string to be parsed on the first invocation.
  • Then input NULL after that; the value returned in saveptr is used in all subsequent calls. (7) This is only slightly slower than strtok_k.

Переменные

LEPT_DLL l_int32 LeptMsgSeverity = DEFAULT_SEVERITY
struct rusage rusage_after [static]
struct rusage rusage_before [static]