Leptonica
1.54
|
Функции | |
static l_int32 | pixCorrelationBestShift (PIX *pix1, PIX *pix2, NUMA *nasum1, NUMA *namoment1, l_int32 area2, l_int32 ycent2, l_int32 maxyshift, l_int32 *tab8, l_int32 *pdelx, l_int32 *pdely, l_float32 *pscore, l_int32 debugflag) |
static L_RCH * | rchCreate (l_int32 index, l_float32 score, char *text, l_int32 sample, l_int32 xloc, l_int32 yloc, l_int32 width) |
static L_RCHA * | rchaCreate () |
static l_int32 | transferRchToRcha (L_RCH *rch, L_RCHA *rcha) |
static void | l_showIndicatorSplitValues (NUMA *na1, NUMA *na2, NUMA *na3, NUMA *na4, NUMA *na5, NUMA *na6) |
static l_int32 | recogaSaveBestRcha (L_RECOGA *recoga, PIXA *pixa) |
static l_int32 | recogaTransferRch (L_RECOGA *recoga, L_RECOG *recog, l_int32 index) |
l_int32 | recogaIdentifyMultiple (L_RECOGA *recoga, PIX *pixs, l_int32 nitems, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdb, l_int32 debugsplit) |
l_int32 | recogSplitIntoCharacters (L_RECOG *recog, PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, NUMA **pnaid, l_int32 debug) |
l_int32 | recogCorrelationBestRow (L_RECOG *recog, PIX *pixs, BOXA **pboxa, NUMA **pnascore, NUMA **pnaindex, SARRAY **psachar, l_int32 debug) |
l_int32 | recogCorrelationBestChar (L_RECOG *recog, PIX *pixs, BOX **pbox, l_float32 *pscore, l_int32 *pindex, char **pcharstr, PIX **ppixdb) |
l_int32 | recogaIdentifyPixa (L_RECOGA *recoga, PIXA *pixa, NUMA *naid, PIX **ppixdb) |
l_int32 | recogIdentifyPixa (L_RECOG *recog, PIXA *pixa, NUMA *naid, PIX **ppixdb) |
l_int32 | recogIdentifyPix (L_RECOG *recog, PIX *pixs, PIX **ppixdb) |
l_int32 | recogSkipIdentify (L_RECOG *recog) |
void | rchaDestroy (L_RCHA **prcha) |
void | rchDestroy (L_RCH **prch) |
l_int32 | rchaExtract (L_RCHA *rcha, NUMA **pnaindex, NUMA **pnascore, SARRAY **psatext, NUMA **pnasample, NUMA **pnaxloc, NUMA **pnayloc, NUMA **pnawidth) |
l_int32 | rchExtract (L_RCH *rch, l_int32 *pindex, l_float32 *pscore, char **ptext, l_int32 *psample, l_int32 *pxloc, l_int32 *pyloc, l_int32 *pwidth) |
PIX * | recogProcessToIdentify (L_RECOG *recog, PIX *pixs, l_int32 pad) |
PIX * | recogPreSplittingFilter (L_RECOG *recog, PIX *pixs, l_float32 maxasp, l_float32 minaf, l_int32 debug) |
l_int32 | recogSplittingFilter (L_RECOG *recog, PIX *pixs, l_float32 maxasp, l_float32 minaf, l_int32 *premove, l_int32 debug) |
SARRAY * | recogaExtractNumbers (L_RECOGA *recoga, BOXA *boxas, l_float32 scorethresh, l_int32 spacethresh, BOXAA **pbaa, NUMAA **pnaa) |
l_int32 | recogSetTemplateType (L_RECOG *recog, l_int32 templ_type) |
l_int32 | recogSetScaling (L_RECOG *recog, l_int32 scalew, l_int32 scaleh) |
Переменные | |
static const l_int32 | LeftRightPadding = 32 |
static const l_float32 | MaxAspectRatio = 6.0 |
static const l_float32 | MinFillFactor = 0.10 |
static const l_int32 | MinOverlap1 = 6 |
static const l_int32 | MinOverlap2 = 6 |
static const l_int32 | MinHeightPass1 = 5 |
static void l_showIndicatorSplitValues | ( | NUMA * | na1, |
NUMA * | na2, | ||
NUMA * | na3, | ||
NUMA * | na4, | ||
NUMA * | na5, | ||
NUMA * | na6 | ||
) | [static] |
Input: 6 indicator array
Notes: (1) The values indicate that specific criteria has been met for component removal by pre-splitting filter.. The 'result' line shows which components have been removed.
static l_int32 pixCorrelationBestShift | ( | PIX * | pix1, |
PIX * | pix2, | ||
NUMA * | nasum1, | ||
NUMA * | namoment1, | ||
l_int32 | area2, | ||
l_int32 | ycent2, | ||
l_int32 | maxyshift, | ||
l_int32 * | tab8, | ||
l_int32 * | pdelx, | ||
l_int32 * | pdely, | ||
l_float32 * | pscore, | ||
l_int32 | debugflag | ||
) | [static] |
Input: pix1 (1 bpp, the unknown image; typically larger) pix2 (1 bpp, the matching template image)) nasum1 (vertical column pixel sums for pix1) namoment1 (vertical column first moment of pixels for pix1) area2 (number of on pixels in pix2) ycent2 (y component of centroid of pix2) maxyshift (max y shift of pix2 around the location where the centroids of pix2 and a windowed part of pix1 are vertically aligned) tab8 (<optional> sum tab for ON pixels in byte; can be NULL) &delx (<optional return>=""> best x shift of pix2 relative to pix1 &dely (<optional return>=""> best y shift of pix2 relative to pix1 &score (<optional return>=""> maximum score found; can be NULL) debugflag (<= 0 to skip; positive to generate output. The integer is used to label the debug image.) Return: 0 if OK, 1 on error
Notes: (1) This maximizes the correlation score between two 1 bpp images, one of which is typically wider. In a typical example, pix1 is a bitmap of 2 or more touching characters and pix2 is a single character template. This finds the location of pix2 that gives the largest correlation. (2) The windowed area of fg pixels and windowed first moment in the y direction are computed from the input sum and moment column arrays, and (3) This is a brute force operation. We compute the correlation at every x shift for which pix2 fits entirely within pix1, and where the centroid of pix2 is aligned, within +-maxyshift, with the centroid of a window of pix1 of the same width. The correlation is taken over the full height of pix1. This can be made more efficient.
static L_RCHA * rchaCreate | ( | ) | [static] |
Return: 0 if OK, 1 on error
Notes: (1) Be sure to destroy any existing rcha before assigning this.
void rchaDestroy | ( | L_RCHA ** | prcha | ) |
Input: &rcha Return: void
l_int32 rchaExtract | ( | L_RCHA * | rcha, |
NUMA ** | pnaindex, | ||
NUMA ** | pnascore, | ||
SARRAY ** | psatext, | ||
NUMA ** | pnasample, | ||
NUMA ** | pnaxloc, | ||
NUMA ** | pnayloc, | ||
NUMA ** | pnawidth | ||
) |
Input: rcha &naindex (<optional return>=""> indices of best templates) &nascore (<optional return>=""> correl scores of best templates) &satext (<optional return>=""> character strings of best templates) &nasample (<optional return>=""> indices of best samples) &naxloc (<optional return>=""> x-locations of templates) &nayloc (<optional return>=""> y-locations of templates) &nawidth (<optional return>=""> widths of best templates) Return: 0 if OK, 1 on error
Notes: (1) This returns clones of the number and string arrays. They must be destroyed by the caller.
static L_RCH * rchCreate | ( | l_int32 | index, |
l_float32 | score, | ||
char * | text, | ||
l_int32 | sample, | ||
l_int32 | xloc, | ||
l_int32 | yloc, | ||
l_int32 | width | ||
) | [static] |
Input: index (index of best template) score (correlation score of best template) text (character string of best template) sample (index of best sample; -1 if averages are used) xloc (x-location of template: delx + shiftx) yloc (y-location of template: dely + shifty) width (width of best template) Return: 0 if OK, 1 on error
Notes: (1) Be sure to destroy any existing rch before assigning this. (2) This stores the text string, not a copy of it, so the caller must not destroy the string.
void rchDestroy | ( | L_RCH ** | prch | ) |
Input: &rch Return: void
l_int32 rchExtract | ( | L_RCH * | rch, |
l_int32 * | pindex, | ||
l_float32 * | pscore, | ||
char ** | ptext, | ||
l_int32 * | psample, | ||
l_int32 * | pxloc, | ||
l_int32 * | pyloc, | ||
l_int32 * | pwidth | ||
) |
Input: rch &index (<optional return>=""> index of best template) &score (<optional return>=""> correlation score of best template) &text (<optional return>=""> character string of best template) &sample (<optional return>=""> index of best sample) &xloc (<optional return>=""> x-location of template) &yloc (<optional return>=""> y-location of template) &width (<optional return>=""> width of best template) Return: 0 if OK, 1 on error
SARRAY* recogaExtractNumbers | ( | L_RECOGA * | recoga, |
BOXA * | boxas, | ||
l_float32 | scorethresh, | ||
l_int32 | spacethresh, | ||
BOXAA ** | pbaa, | ||
NUMAA ** | pnaa | ||
) |
Input: recoga boxas (location of components) scorethresh (min score for which we accept a component) spacethresh (max horizontal distance allowed between digits, use -1 for default) &baa (<optional return>=""> bounding boxes of identified numbers) &naa (<optional return>=""> scores of identified digits) Return: sa (of identified numbers), or null on error
Notes: (1) This extracts digit data after recogaIdentifyMultiple() or lower-level identification has taken place. (2) Each string in the returned sa contains a sequence of ascii digits in a number. (3) The horizontal distance between boxes (limited by ) is the negative of the horizontal overlap. (4) Components with a score less than , which may be hyphens or other small characters, will signal the end of the current sequence of digits in the number. A typical value for is 0.60. (5) We allow two digits to be combined if these conditions apply: (a) the first is to the left of the second (b) the second has a horizontal separation less than (c) the vertical overlap >= 0 (vertical separation < 0) (d) both have a score that exceeds (6) Each numa in the optionally returned naa contains the digit scores of a number. Each boxa in the optionally returned baa contains the bounding boxes of the digits in the number.
l_int32 recogaIdentifyMultiple | ( | L_RECOGA * | recoga, |
PIX * | pixs, | ||
l_int32 | nitems, | ||
l_int32 | minw, | ||
l_int32 | minh, | ||
BOXA ** | pboxa, | ||
PIXA ** | ppixa, | ||
PIX ** | ppixdb, | ||
l_int32 | debugsplit | ||
) |
Input: recoga (with training finished) pixs (containing typically a small number of characters) nitems (to be identified in pix; use 0 if not known) minw (remove components with width less than this; use -1 for removing all noise components) minh (remove components with height less than this; use -1 for removing all noise components) &boxa (<optional return>=""> locations of identified components) &pixa (<optional return>=""> images of identified components) &pixdb (<optional return>=""> debug pix: inputs and best fits) debugsplit (1 returns pix split debugging images) Return: 0 if OK; 1 if nothing is found; 2 for other errors. (Get a warning if nitems and the number found are both > 0, but not equal to each other.)
Notes: (1) This filters the input pixa, looking for if requested. Set == 0 if you don't know how many chars to expect. (2) This bundles the filtered components into a pixa and calls recogIdentifyPixa(). If > 0, use = -1 and = -1 to remove all noise components. (3) Set = 0 and = 0 to get all noise components. Set > 0 and/or > 0 to retain selected noise components. All noise components are recognized as an empty string with a score of 0.0. (4) An attempt is made to order the (optionally) returned images and boxes in 2-dimensional sorted order. These can then be used to aggregate identified characters into numbers or words. One typically wants the pixa, which contains a boxa of the extracted subimages.
Input: recoga pixa (of 1 bpp images to match) naid (<optional> indices of components to identify; can be null) &pixdb (<optional return>=""> pix showing inputs and best fits) Return: 0 if OK, 1 on error
Notes: (1) See recogIdentifyPixa(). This does the same operation for each recog, returning the arrays of results (scores, class index and character string) for the best correlation match.
static l_int32 recogaSaveBestRcha | ( | L_RECOGA * | recoga, |
PIXA * | pixa | ||
) | [static] |
Input: recoga pixa (with all components having been identified) Return: 0 if OK, 1 on error
Notes: (1) Finds the best score among the recognizers for each character, and puts the rch data into a rcha in the recoga. This is run after all recognizers have been applied to the pixa. (2) This also writes the best text id for each pix into its text field.
static l_int32 recogaTransferRch | ( | L_RECOGA * | recoga, |
L_RECOG * | recog, | ||
l_int32 | index | ||
) | [static] |
Input: recoga (destination, with rcha defined) recog (source, with best scoring char in its rcha) index (index of component in the original pixa) Return: 0 if OK, 1 on error
Notes: (1) This is called by recogaGetBestRcha() to transfer the results of a single character identification in a selected recog to the rcha array in the recoga, which holds the best scoring characters.
l_int32 recogCorrelationBestChar | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
BOX ** | pbox, | ||
l_float32 * | pscore, | ||
l_int32 * | pindex, | ||
char ** | pcharstr, | ||
PIX ** | ppixdb | ||
) |
Input: recog (with LUT's pre-computed) pixs (can be of multiple touching characters, 1 bpp) &box (<return> bounding box of best fit character) &score (<return> correlation score) &index (<optional return>=""> index of class) &charstr (<optional return>=""> character string of class) &pixdb (<optional return>=""> debug pix showing input and best fit) Return: 0 if OK, 1 on error
Notes: (1) Basic matching character splitter. Finds the best match among all templates to some region of the image. This can result in splitting the image into two parts. This is "image decoding" without dynamic programming, because we don't use a setwidth and compute the best matching score for the entire image. (2) Matching is to the average templates, without character scaling.
l_int32 recogCorrelationBestRow | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
BOXA ** | pboxa, | ||
NUMA ** | pnascore, | ||
NUMA ** | pnaindex, | ||
SARRAY ** | psachar, | ||
l_int32 | debug | ||
) |
Input: recog (with LUT's pre-computed) pixs (typically of multiple touching characters, 1 bpp) &boxa (<return> bounding boxs of best fit character) &nascores (<optional return>=""> correlation scores) &naindex (<optional return>=""> indices of classes) &sachar (<optional return>=""> array of character strings) debug (1 for results written to pixadb_split) Return: 0 if OK, 1 on error
Notes: (1) Supervises character matching for (in general) a c.c with multiple touching characters. Finds the best match greedily. Rejects small parts that are left over after splitting. (2) Matching is to the average, and without character scaling.
l_int32 recogIdentifyPix | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
PIX ** | ppixdb | ||
) |
Input: recog (with LUT's pre-computed) pixs (of a single character, 1 bpp) &pixdb (<optional return>=""> debug pix showing input and best fit) Return: 0 if OK, 1 on error
Notes: (1) Basic recognition function for a single character. (2) If L_USE_ALL, matching is attempted to every bitmap in the recog, and the identify of the best match is returned. However, if L_USE_AVERAGE, the matching is only to the averaged bitmaps, and the index of the bestsample is meaningless (0 is returned if requested). (3) The score is related to the confidence (probability of correct identification), in that a higher score is correlated with a higher probability. However, the actual relation between the correlation (score) and the probability is not known; we call this a "score" because "confidence" can be misinterpreted as an actual probability.
Input: recog pixa (of 1 bpp images to match) naid (<optional> indices of components to identify; can be null) &pixdb (<optional return>=""> pix showing inputs and best fits) Return: 0 if OK, 1 on error
Notes: (1) See recogIdentifyPix(). This does the same operation for each pix in a pixa, and optionally returns the arrays of results (scores, class index and character string) for the best correlation match.
PIX* recogPreSplittingFilter | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
l_float32 | maxasp, | ||
l_float32 | minaf, | ||
l_int32 | debug | ||
) |
Input: recog pixs (1 bpp, single connected component) maxasp (maximum asperity ratio (width/height) to be retained) minaf (minimum area fraction (|fg|/(w*h)) to be retained) debug (1 to output indicator arrays) Return: pixd (with filtered components removed) or null on error
PIX* recogProcessToIdentify | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
l_int32 | pad | ||
) |
Input: recog (with LUT's pre-computed) pixs (typ. single character, possibly d > 1 and uncropped) pad (extra pixels added to left and right sides) Return: pixd (1 bpp, clipped to foreground), or null if there are no fg pixels or on error.
Notes: (1) This is a lightweight operation to insure that the input image is 1 bpp, properly cropped, and padded on each side. If bpp > 1, the image is thresholded.
l_int32 recogSetScaling | ( | L_RECOG * | recog, |
l_int32 | scalew, | ||
l_int32 | scaleh | ||
) |
Input: recog scalew (scale all widths to this; use 0 for no scaling) scaleh (scale all heights to this; use 0 for no scaling) Return: 0 if OK, 1 on error
l_int32 recogSetTemplateType | ( | L_RECOG * | recog, |
l_int32 | templ_type | ||
) |
Input: recog templ_type (L_USE_AVERAGE or L_USE_ALL) Return: 0 if OK, 1 on error
l_int32 recogSkipIdentify | ( | L_RECOG * | recog | ) |
Input: recog Return: 0 if OK, 1 on error
Notes: (1) This just writes a "dummy" result with 0 score and empty string id into the rch.
l_int32 recogSplitIntoCharacters | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
l_int32 | minw, | ||
l_int32 | minh, | ||
BOXA ** | pboxa, | ||
PIXA ** | ppixa, | ||
NUMA ** | pnaid, | ||
l_int32 | debug | ||
) |
Input: recog pixs (1 bpp, contains only mostly deskewed text) minw (remove components with width less than this; use -1 for default removing out of band components) minh (remove components with height less than this; use -1 for default removing out of band components) &boxa (<return> character bounding boxes) &pixa (<return> character images) &naid (<return> indices of components to identify) debug (1 for results written to pixadb_split) Return: 0 if OK, 1 on error or if no components are returned
Notes: (1) This can be given an image that has an arbitrary number of text characters. It does splitting of connected components based on greedy correlation matching in recogCorrelationBestRow(). The returned pixa includes the boxes from which the (possibly split) components are extracted. (2) If either < 0 or < 0, noise components are filtered out, and the returned array is all 1. Otherwise, some noise components whose dimensions (w,h) satisfy w >= and h >= are allowed through, but they are identified in the returned , where they are labelled by 0 to indicate that they are not to be run through identification. Retaining the noise components provides spatial information that can help applications interpret the results. (3) In addition to optional filtering of the noise, the resulting components are put in row-major (2D) order, and the smaller of overlapping components are removed if they satisfy conditions of relative size and fractional overlap. (4) Note that the spliting function uses unscaled templates and does not bother returning the class results and scores. Thes are more accurately found later using the scaled templates.
l_int32 recogSplittingFilter | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
l_float32 | maxasp, | ||
l_float32 | minaf, | ||
l_int32 * | premove, | ||
l_int32 | debug | ||
) |
Input: recog pixs (1 bpp, single connected component) maxasp (maximum asperity ratio (width/height) to be retained) minaf (minimum area fraction (|fg|/(w*h)) to be retained) &remove (<return> 0 to save, 1 to remove) debug (1 to output indicator arrays) Return: 0 if OK, 1 on error
static l_int32 transferRchToRcha | ( | L_RCH * | rch, |
L_RCHA * | rcha | ||
) | [static] |
Input: rch (source of data) rcha (append to arrays in this destination) Return: 0 if OK, 1 on error
Notes: (1) This is used to transfer the results of a single character identification to an rcha array for the array of characters.
const l_int32 LeftRightPadding = 32 [static] |
const l_float32 MaxAspectRatio = 6.0 [static] |
const l_float32 MinFillFactor = 0.10 [static] |
const l_int32 MinHeightPass1 = 5 [static] |
const l_int32 MinOverlap1 = 6 [static] |
const l_int32 MinOverlap2 = 6 [static] |