Leptonica  1.54
Файл src/compare.c
#include <string.h>
#include <math.h>
#include "allheaders.h"

Функции

static l_int32 pixCompareTilesByHisto (PIX *pix1, PIX *pix2, l_int32 maxgray, l_int32 factor, l_int32 nx, l_int32 ny, l_float32 *pscore, PIXA *pixadebug)
l_int32 pixEqual (PIX *pix1, PIX *pix2, l_int32 *psame)
l_int32 pixEqualWithAlpha (PIX *pix1, PIX *pix2, l_int32 use_alpha, l_int32 *psame)
l_int32 pixEqualWithCmap (PIX *pix1, PIX *pix2, l_int32 *psame)
l_int32 pixUsesCmapColor (PIX *pixs, l_int32 *pcolor)
l_int32 pixCorrelationBinary (PIX *pix1, PIX *pix2, l_float32 *pval)
PIXpixDisplayDiffBinary (PIX *pix1, PIX *pix2)
l_int32 pixCompareBinary (PIX *pix1, PIX *pix2, l_int32 comptype, l_float32 *pfract, PIX **ppixdiff)
l_int32 pixCompareGrayOrRGB (PIX *pix1, PIX *pix2, l_int32 comptype, l_int32 plottype, l_int32 *psame, l_float32 *pdiff, l_float32 *prmsdiff, PIX **ppixdiff)
l_int32 pixCompareGray (PIX *pix1, PIX *pix2, l_int32 comptype, l_int32 plottype, l_int32 *psame, l_float32 *pdiff, l_float32 *prmsdiff, PIX **ppixdiff)
l_int32 pixCompareRGB (PIX *pix1, PIX *pix2, l_int32 comptype, l_int32 plottype, l_int32 *psame, l_float32 *pdiff, l_float32 *prmsdiff, PIX **ppixdiff)
l_int32 pixCompareTiled (PIX *pix1, PIX *pix2, l_int32 sx, l_int32 sy, l_int32 type, PIX **ppixdiff)
NUMApixCompareRankDifference (PIX *pix1, PIX *pix2, l_int32 factor)
l_int32 pixTestForSimilarity (PIX *pix1, PIX *pix2, l_int32 factor, l_int32 mindiff, l_float32 maxfract, l_float32 maxave, l_int32 *psimilar, l_int32 printstats)
l_int32 pixGetDifferenceStats (PIX *pix1, PIX *pix2, l_int32 factor, l_int32 mindiff, l_float32 *pfractdiff, l_float32 *pavediff, l_int32 printstats)
NUMApixGetDifferenceHistogram (PIX *pix1, PIX *pix2, l_int32 factor)
l_int32 pixGetPerceptualDiff (PIX *pixs1, PIX *pixs2, l_int32 sampling, l_int32 dilation, l_int32 mindiff, l_float32 *pfract, PIX **ppixdiff1, PIX **ppixdiff2)
l_int32 pixGetPSNR (PIX *pix1, PIX *pix2, l_int32 factor, l_float32 *ppsnr)
l_int32 pixaComparePhotoRegionsByHisto (PIXA *pixa, l_float32 minratio, l_float32 textthresh, l_int32 factor, l_int32 nx, l_int32 ny, l_float32 simthresh, NUMA **pnai, l_float32 **pscores, PIX **ppixd)
l_int32 pixComparePhotoRegionsByHisto (PIX *pix1, PIX *pix2, BOX *box1, BOX *box2, l_float32 minratio, l_int32 factor, l_int32 nx, l_int32 ny, l_float32 *pscore, l_int32 debugflag)
l_int32 pixGenPhotoHistos (PIX *pixs, BOX *box, l_int32 factor, l_float32 thresh, l_int32 nx, l_int32 ny, NUMAA **pnaa, l_int32 *pw, l_int32 *ph, l_int32 debugflag)
PIXpixPadToCenterCentroid (PIX *pixs, l_int32 factor)
l_int32 pixCentroid8 (PIX *pixs, l_int32 factor, l_float32 *pcx, l_float32 *pcy)
l_int32 pixDecideIfPhotoImage (PIX *pix, l_int32 factor, l_int32 nx, l_int32 ny, l_float32 thresh, NUMAA **pnaa, PIXA *pixadebug)
l_int32 compareTilesByHisto (NUMAA *naa1, NUMAA *naa2, l_float32 minratio, l_int32 w1, l_int32 h1, l_int32 w2, l_int32 h2, l_float32 *pscore, PIXA *pixadebug)
l_int32 pixCompareGrayByHisto (PIX *pix1, PIX *pix2, BOX *box1, BOX *box2, l_float32 minratio, l_int32 maxgray, l_int32 factor, l_int32 nx, l_int32 ny, l_float32 *pscore, l_int32 debugflag)
l_int32 pixCropAlignedToCentroid (PIX *pix1, PIX *pix2, l_int32 factor, BOX **pbox1, BOX **pbox2)
l_uint8l_compressGrayHistograms (NUMAA *naa, l_int32 w, l_int32 h, size_t *psize)
NUMAAl_uncompressGrayHistograms (l_uint8 *bytea, size_t size, l_int32 *pw, l_int32 *ph)
l_int32 pixCompareWithTranslation (PIX *pix1, PIX *pix2, l_int32 thresh, l_int32 *pdelx, l_int32 *pdely, l_float32 *pscore, l_int32 debugflag)
l_int32 pixBestCorrelation (PIX *pix1, PIX *pix2, l_int32 area1, l_int32 area2, l_int32 etransx, l_int32 etransy, l_int32 maxshift, l_int32 *tab8, l_int32 *pdelx, l_int32 *pdely, l_float32 *pscore, l_int32 debugflag)

Переменные

static const l_float32 TINY = 0.00001

Функции

l_int32 compareTilesByHisto ( NUMAA naa1,
NUMAA naa2,
l_float32  minratio,
l_int32  w1,
l_int32  h1,
l_int32  w2,
l_int32  h2,
l_float32 pscore,
PIXA pixadebug 
)

compareTilesByHisto()

Input: naa1, naa2 (each is a set of 256 entry histograms) minratio (requiring image sizes be compatible; < 1.0) w1, h1, w2, h2 (image sizes from which histograms were made) &score (<return> similarity score of histograms) pixadebug (<optional> use only for debug output) Return: 0 if OK, 1 on error

Notes: (1) naa1 and naa2 must be generated using pixGenPhotoHistos(), using the same tile sizes. (2) The image dimensions must be similar. The score is 0.0 if the ratio of widths and heights (smallest / largest) exceeds a threshold , which must be between 0.5 and 1.0. If set at 1.0, both images must be exactly the same size. A typical value for is 0.9. (2) The input pixadebug is null unless debug output is requested.

l_uint8* l_compressGrayHistograms ( NUMAA naa,
l_int32  w,
l_int32  h,
size_t *  psize 
)

l_compressGrayHistograms()

Input: numaa (set of 256-entry histograms) w, h (size of image) &size (<return> size of byte array) Return: 0 if OK, 1 on error

Notes: (1) This first writes w and h to the byte array as 4 byte ints. (2) Then it normalizes each histogram to a max value of 255, and saves each value as a byte. If there are N histograms, the output bytearray has 8 + 256 * N bytes. (3) Further compression of the array with zlib yields only about a 25% decrease in size, so we don't bother. If size reduction were important, a lossy transform using a 1-dimensional DCT would be effective, because we don't care about the fine details of these histograms.

NUMAA* l_uncompressGrayHistograms ( l_uint8 bytea,
size_t  size,
l_int32 pw,
l_int32 ph 
)

l_uncompressGrayHistograms()

Input: bytea (byte array of size 8 + 256 * N, N an integer) size (size of byte array) &w (<return> width of the image that generated the histograms) &h (<return> height of the image) Return: numaa (representing N histograms, each with 256 bins), or null on error.

Notes: (1) The first 8 bytes are read as two 32-bit ints. (2) Then this constructs a numaa representing some number of gray histograms that are normalized such that the max value in each histogram is 255. The data is stored as a byte array, with 256 bytes holding the data for each histogram. Each gray histogram was computed from a tile of a grayscale image.

l_int32 pixaComparePhotoRegionsByHisto ( PIXA pixa,
l_float32  minratio,
l_float32  textthresh,
l_int32  factor,
l_int32  nx,
l_int32  ny,
l_float32  simthresh,
NUMA **  pnai,
l_float32 **  pscores,
PIX **  ppixd 
)

pixaComparePhotoRegionsByHisto()

Input: pixa (any depth; colormap OK) minratio (requiring sizes be compatible; < 1.0) factor (subsampling; >= 1) textthresh (threshold for text/photo; use 0 for default) nx, ny (number of subregions to use for histograms; e.g. 3x3) simthresh (threshold for similarity; use 0 for default) &nai (<return> array giving similarity class indices) &scores (<optional return>=""> score matrix as 1-D array of size N^2) &pixd (<optional return>=""> pix of similarity classes) Return: 0 if OK, 1 on error

Notes: (1) This function takes a pixa of cropped photo images and compares each one to the others for similarity. Each image is first tested to see if it is a photo that can be compared by tiled histograms. If so, it is padded to put the centroid in the center of the image, and the histograms are generated. The final step of comparing each histogram with all the others is very fast. (2) An initial filter gives = 0 if the ratio of widths and heights (smallest / largest) does not exceed a threshold . If set at 1.0, both images must be exactly the same size. A typical value for is 0.9. (3) The comparison score between two images is a value in [0.0 .. 1.0]. If the comparison score >= , the images are placed in the same similarity class. Default value for is 0.25. (4) An array of similarity class indices for pix in the input pixa is returned. (5) There are two debugging options: * An optional 2D matrix of scores is returned as a 1D array. A visualization of this is written to a temp file. * An optional pix showing the similarity classes can be returned. Text in each input pix is reproduced. (6) See the notes in pixComparePhotoRegionsByHisto() for details on the implementation.

l_int32 pixBestCorrelation ( PIX pix1,
PIX pix2,
l_int32  area1,
l_int32  area2,
l_int32  etransx,
l_int32  etransy,
l_int32  maxshift,
l_int32 tab8,
l_int32 pdelx,
l_int32 pdely,
l_float32 pscore,
l_int32  debugflag 
)

pixBestCorrelation()

Input: pix1 (1 bpp) pix2 (1 bpp) area1 (number of on pixels in pix1) area2 (number of on pixels in pix2) etransx (estimated x translation of pix2 to align with pix1) etransy (estimated y translation of pix2 to align with pix1) maxshift (max x and y shift of pix2, around the estimated alignment location, relative to pix1) tab8 (<optional> sum tab for ON pixels in byte; can be NULL) &delx (<optional return>=""> best x shift of pix2 relative to pix1 &dely (<optional return>=""> best y shift of pix2 relative to pix1 &score (<optional return>=""> maximum score found; can be NULL) debugflag (<= 0 to skip; positive to generate output. The integer is used to label the debug image.) Return: 0 if OK, 1 on error

Notes: (1) This maximizes the correlation score between two 1 bpp images, by starting with an estimate of the alignment (, ) and computing the correlation around this. It optionally returns the shift (, ) that maximizes the correlation score when pix2 is shifted by this amount relative to pix1. (2) Get the centroids of pix1 and pix2, using pixCentroid(), to compute (, ). Get the areas using pixCountPixels(). (3) The centroid of pix2 is shifted with respect to the centroid of pix1 by all values between -maxshiftx and maxshiftx, and likewise for the y shifts. Therefore, the number of correlations computed is: (2 * maxshiftx + 1) * (2 * maxshifty + 1) Consequently, if pix1 and pix2 are large, you should do this in a coarse-to-fine sequence. See the use of this function in pixCompareWithTranslation().

l_int32 pixCentroid8 ( PIX pixs,
l_int32  factor,
l_float32 pcx,
l_float32 pcy 
)

pixCentroid8()

Input: pixs (8 bpp) factor (subsampling; >= 1) &cx (<return> x value of centroid) &cy (<return> y value of centroid) Return: 0 if OK, 1 on error

Notes: (1) This first does a photometric inversion (black = 255, white = 0). It then finds the centroid of the result. The inversion is done because white is usually background, so the centroid is computed based on the "foreground" gray pixels, and the darker the pixel, the more weight it is given.

l_int32 pixCompareBinary ( PIX pix1,
PIX pix2,
l_int32  comptype,
l_float32 pfract,
PIX **  ppixdiff 
)

pixCompareBinary()

Input: pix1 (1 bpp) pix2 (1 bpp) comptype (L_COMPARE_XOR, L_COMPARE_SUBTRACT) &fract (<return> fraction of pixels that are different) &pixdiff (<optional return>=""> pix of difference) Return: 0 if OK; 1 on error

Notes: (1) The two images are aligned at the UL corner, and do not need to be the same size. (2) If using L_COMPARE_SUBTRACT, pix2 is subtracted from pix1. (3) The total number of pixels is determined by pix1.

l_int32 pixCompareGray ( PIX pix1,
PIX pix2,
l_int32  comptype,
l_int32  plottype,
l_int32 psame,
l_float32 pdiff,
l_float32 prmsdiff,
PIX **  ppixdiff 
)

pixCompareGray()

Input: pix1 (8 or 16 bpp, not cmapped) pix2 (8 or 16 bpp, not cmapped) comptype (L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF) plottype (gplot plot output type, or 0 for no plot) &same (<optional return>=""> 1 if pixel values are identical) &diff (<optional return>=""> average difference) &rmsdiff (<optional return>=""> rms of difference) &pixdiff (<optional return>=""> pix of difference) Return: 0 if OK; 1 on error

Notes: (1) See pixCompareGrayOrRGB() for details. (2) Use pixCompareGrayOrRGB() if the input pix are colormapped.

l_int32 pixCompareGrayByHisto ( PIX pix1,
PIX pix2,
BOX box1,
BOX box2,
l_float32  minratio,
l_int32  maxgray,
l_int32  factor,
l_int32  nx,
l_int32  ny,
l_float32 pscore,
l_int32  debugflag 
)

pixCompareGrayByHisto()

Input: pix1, pix2 (any depth; colormap OK) box1, box2 (<optional> region selected from each; can be null) minratio (requiring sizes be compatible; < 1.0) maxgray (max value to keep in histo; >= 200, 255 to keep all) factor (subsampling; >= 1) nx, ny (number of subregions to use for histograms; e.g. 3x3) &score (<return> similarity score of histograms) debugflag (1 for debug output; 0 for no debugging) Return: 0 if OK, 1 on error

Notes: (1) This function compares two grayscale photo regions. It can do it with a single histogram from each region, or with a set of (nx * ny) spatially aligned histograms. For both cases, align the regions using the centroid of the inverse image, and crop to the smallest of the two. (2) An initial filter gives = 0 if the ratio of widths and heights (smallest / largest) does not exceed a threshold . This must be between 0.5 and 1.0. If set at 1.0, both images must be exactly the same size. A typical value for is 0.9. (3) The lightest values in the histogram can be disregarded. Set to the lightest value to be kept. For example, to eliminate white (255), set = 254. must be >= 200. (4) For an efficient representation of the histogram, normalize using a multiplicative factor so that the number in the maximum bucket is 255. It then takes 256 bytes to store. (5) When comparing the histograms of two regions:

  • Use = 254 to ignore the white pixels, the number of which may be sensitive to the crop region if the pixels outside that region are white.
  • Use the Earth Mover distance (EMD), with the histograms normalized so that the sum over bins is the same. Further normalize by dividing by 255, so that the result is in [0.0 ... 1.0]. (6) Get a similarity score S = 1.0 - k * D, where k is a constant, say in the range 5-10 D = normalized EMD and for multiple tiles, take the Min(S) to be the final score. Using aligned tiles gives protection against accidental similarity of the overall grayscale histograms. A small number of aligned tiles works well. (7) With debug on, you get a pdf that shows, for each tile, the images, histograms and score. (8) When to use: (a) Because this function should not be used on text or line graphics, which can give false positive results (i.e., high scores for different images), the input images should be filtered. (b) To filter, first use pixDecideIfText(). If that function says the image is text, do not use it. If the function says it is not text, it still may be line graphics, and in that case, use: pixGetGrayHistogramTiled() grayInterHistogramStats() to determine whether it is photo or line graphics.
l_int32 pixCompareGrayOrRGB ( PIX pix1,
PIX pix2,
l_int32  comptype,
l_int32  plottype,
l_int32 psame,
l_float32 pdiff,
l_float32 prmsdiff,
PIX **  ppixdiff 
)

pixCompareGrayOrRGB()

Input: pix1 (8 or 16 bpp gray, 32 bpp rgb, or colormapped) pix2 (8 or 16 bpp gray, 32 bpp rgb, or colormapped) comptype (L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF) plottype (gplot plot output type, or 0 for no plot) &same (<optional return>=""> 1 if pixel values are identical) &diff (<optional return>=""> average difference) &rmsdiff (<optional return>=""> rms of difference) &pixdiff (<optional return>=""> pix of difference) Return: 0 if OK; 1 on error

Notes: (1) The two images are aligned at the UL corner, and do not need to be the same size. If they are not the same size, the comparison will be made over overlapping pixels. (2) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (3) If RGB, each component is compared separately. (4) If type is L_COMPARE_ABS_DIFF, pix2 is subtracted from pix1 and the absolute value is taken. (5) If type is L_COMPARE_SUBTRACT, pix2 is subtracted from pix1 and the result is clipped to 0. (6) The plot output types are specified in gplot.h. Use 0 if no difference plot is to be made. (7) If the images are pixelwise identical, no difference plot is made, even if requested. The result (TRUE or FALSE) is optionally returned in the parameter 'same'. (8) The average difference (either subtracting or absolute value) is optionally returned in the parameter 'diff'. (9) The RMS difference is optionally returned in the parameter 'rmsdiff'. For RGB, we return the average of the RMS differences for each of the components.

l_int32 pixComparePhotoRegionsByHisto ( PIX pix1,
PIX pix2,
BOX box1,
BOX box2,
l_float32  minratio,
l_int32  factor,
l_int32  nx,
l_int32  ny,
l_float32 pscore,
l_int32  debugflag 
)

pixComparePhotoRegionsByHisto()

Input: pix1, pix2 (any depth; colormap OK) box1, box2 (<optional> photo regions from each; can be null) minratio (requiring sizes be compatible; < 1.0) factor (subsampling; >= 1) nx, ny (number of subregions to use for histograms; e.g. 3x3) &score (<return> similarity score of histograms) debugflag (1 for debug output; 0 for no debugging) Return: 0 if OK, 1 on error

Notes: (1) This function compares two grayscale photo regions. If a box is given, the region is clipped; otherwise assume the entire images are photo regions. This is done with a set of (nx * ny) spatially aligned histograms, which are aligned using the centroid of the inverse image. (2) An initial filter gives = 0 if the ratio of widths and heights (smallest / largest) does not exceed a threshold . This must be between 0.5 and 1.0. If set at 1.0, both images must be exactly the same size. A typical value for is 0.9. (3) Because this function should not be used on text or line graphics, which can give false positive results (i.e., high scores for different images), filter the images using pixGenPhotoHistos(), which returns tiled histograms only if an image is not text and comparison is expected to work with histograms. If either image fails the test, the comparison returns a score of 0.0. (4) The white value counts in the histograms are removed; they are typically pixels that were padded to achieve alignment. (5) For an efficient representation of the histogram, normalize using a multiplicative factor so that the number in the maximum bucket is 255. It then takes 256 bytes to store. (6) When comparing the histograms of two regions, use the Earth Mover distance (EMD), with the histograms normalized so that the sum over bins is the same. Further normalize by dividing by 255, so that the result is in [0.0 ... 1.0]. (7) Get a similarity score S = 1.0 - k * D, where k is a constant, say in the range 5-10 D = normalized EMD and for multiple tiles, take the Min(S) to be the final score. Using aligned tiles gives protection against accidental similarity of the overall grayscale histograms. A small number of aligned tiles works well. (8) With debug on, you get a pdf that shows, for each tile, the images, histograms and score.

NUMA* pixCompareRankDifference ( PIX pix1,
PIX pix2,
l_int32  factor 
)

pixCompareRankDifference()

Input: pix1 (8 bpp gray or 32 bpp rgb, or colormapped) pix2 (8 bpp gray or 32 bpp rgb, or colormapped) factor (subsampling factor; use 0 or 1 for no subsampling) Return: narank (numa of rank difference), or null on error

Notes: (1) This answers the question: if the pixel values in each component are compared by absolute difference, for any value of difference, what is the fraction of pixel pairs that have a difference of this magnitude or greater. For a difference of 0, the fraction is 1.0. In this sense, it is a mapping from pixel difference to rank order of difference. (2) The two images are aligned at the UL corner, and do not need to be the same size. If they are not the same size, the comparison will be made over overlapping pixels. (3) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (4) If RGB, pixel differences for each component are aggregated into a single histogram.

l_int32 pixCompareRGB ( PIX pix1,
PIX pix2,
l_int32  comptype,
l_int32  plottype,
l_int32 psame,
l_float32 pdiff,
l_float32 prmsdiff,
PIX **  ppixdiff 
)

pixCompareRGB()

Input: pix1 (32 bpp rgb) pix2 (32 bpp rgb) comptype (L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF) plottype (gplot plot output type, or 0 for no plot) &same (<optional return>=""> 1 if pixel values are identical) &diff (<optional return>=""> average difference) &rmsdiff (<optional return>=""> rms of difference) &pixdiff (<optional return>=""> pix of difference) Return: 0 if OK; 1 on error

Notes: (1) See pixCompareGrayOrRGB() for details.

l_int32 pixCompareTiled ( PIX pix1,
PIX pix2,
l_int32  sx,
l_int32  sy,
l_int32  type,
PIX **  ppixdiff 
)

pixCompareTiled()

Input: pix1 (8 bpp or 32 bpp rgb) pix2 (8 bpp 32 bpp rgb) sx, sy (tile size; must be > 1) type (L_MEAN_ABSVAL or L_ROOT_MEAN_SQUARE) &pixdiff (<return> pix of difference) Return: 0 if OK; 1 on error

Notes: (1) With L_MEAN_ABSVAL, we compute for each tile the average abs value of the pixel component difference between the two (aligned) images. With L_ROOT_MEAN_SQUARE, we compute instead the rms difference over all components. (2) The two input pix must be the same depth. Comparison is made using UL corner alignment. (3) For 32 bpp, the distance between corresponding tiles is found by averaging the measured difference over all three components of each pixel in the tile. (4) The result, pixdiff, contains one pixel for each source tile.

static l_int32 pixCompareTilesByHisto ( PIX pix1,
PIX pix2,
l_int32  maxgray,
l_int32  factor,
l_int32  nx,
l_int32  ny,
l_float32 pscore,
PIXA pixadebug 
) [static]

pixCompareTilesByHisto()

Input: pix1, pix2 (8 bpp) maxgray (max value to keep in histo; 255 to keep all) factor (subsampling; >= 1) nx, ny (number of subregions to use for histograms) &score (<return> similarity score of histograms) pixadebug (<optional> use only for debug output) Return: 0 if OK, 1 on error

Notes: (1) This static function is only called from pixCompareGrayByHisto(). The input images have been converted to 8 bpp if necessary, aligned and cropped. (2) The input pixadebug is null unless debug output is requested. (3) See pixCompareGrayByHisto() for details.

l_int32 pixCompareWithTranslation ( PIX pix1,
PIX pix2,
l_int32  thresh,
l_int32 pdelx,
l_int32 pdely,
l_float32 pscore,
l_int32  debugflag 
)

pixCompareWithTranslation()

Input: pix1, pix2 (any depth; colormap OK) thresh (threshold for converting to 1 bpp) &delx (<return> x translation on pix2 to align with pix1) &dely (<return> y translation on pix2 to align with pix1) &score (<return> correlation score at best alignment) debugflag (1 for debug output; 0 for no debugging) Return: 0 if OK, 1 on error

Notes: (1) This does a coarse-to-fine search for best translational alignment of two images, measured by a scoring function that is the correlation between the fg pixels. (2) The threshold is used if the images aren't 1 bpp. (3) With debug on, you get a pdf that shows, as a grayscale image, the score as a function of shift from the initial estimate, for each of the four levels. The shift is 0 at the center of the image. (4) With debug on, you also get a pdf that shows the difference at the best alignment between the two images, at each of the four levels. The red and green pixels show locations where one image has a fg pixel and the other doesn't. The black pixels are where both images have fg pixels, and white pixels are where neither image has fg pixels.

l_int32 pixCorrelationBinary ( PIX pix1,
PIX pix2,
l_float32 pval 
)

pixCorrelationBinary()

Input: pix1 (1 bpp) pix2 (1 bpp) &val (<return> correlation) Return: 0 if OK; 1 on error

Notes: (1) The correlation is a number between 0.0 and 1.0, based on foreground similarity: (|1 AND 2|)**2 correlation = -------------- |1| * |2| where |x| is the count of foreground pixels in image x. If the images are identical, this is 1.0. If they have no fg pixels in common, this is 0.0. If one or both images have no fg pixels, the correlation is 0.0. (2) Typically the two images are of equal size, but this is not enforced. Instead, the UL corners are aligned.

l_int32 pixCropAlignedToCentroid ( PIX pix1,
PIX pix2,
l_int32  factor,
BOX **  pbox1,
BOX **  pbox2 
)

pixCropAlignedToCentroid()

Input: pix1, pix2 (any depth; colormap OK) factor (subsampling; >= 1) &box1 (<return> crop box for pix1) &box2 (<return> crop box for pix2) Return: 0 if OK, 1 on error

Notes: (1) This finds the maximum crop boxes for two 8 bpp images when their centroids of their photometric inverses are aligned. Black pixels have weight 255; white pixels have weight 0.

l_int32 pixDecideIfPhotoImage ( PIX pix,
l_int32  factor,
l_int32  nx,
l_int32  ny,
l_float32  thresh,
NUMAA **  pnaa,
PIXA pixadebug 
)

pixDecideIfPhotoImage()

Input: pix (8 bpp, centroid in center) factor (subsampling for histograms; >= 1) nx, ny (number of subregions to use for histograms) thresh (threshold for photo/text; use 0 for default) &naa (<return> array of normalized histograms) pixadebug (<optional> use only for debug output) Return: 0 if OK, 1 on error

Notes: (1) The input image must be 8 bpp (no colormap), and padded with white pixels so the centroid of photo-inverted pixels is at the center of the image. (2) If the pix is not almost certainly a photoimage, the returned histograms () are null. (3) If histograms are generated, the white (255) count is set to 0. This removes all pixels values above 230, including white padding from the centroid matching operation, from consideration. The resulting histograms are then normalized so the maximum count is 255. (4) Default for is 1.3; this seems sufficiently conservative. (5) Use == NULL unless debug output is requested.

PIX* pixDisplayDiffBinary ( PIX pix1,
PIX pix2 
)

pixDisplayDiffBinary()

Input: pix1 (1 bpp) pix2 (1 bpp) Return: pixd (4 bpp cmapped), or null on error

Notes: (1) This gives a color representation of the difference between pix1 and pix2. The color difference depends on the order. The pixels in pixd have 4 colors: * unchanged: black (on), white (off) * on in pix1, off in pix2: red * on in pix2, off in pix1: green (2) This aligns the UL corners of pix1 and pix2, and crops to the overlapping pixels.

l_int32 pixEqual ( PIX pix1,
PIX pix2,
l_int32 psame 
)

pixEqual()

Input: pix1 pix2 &same (<return> 1 if same; 0 if different) Return: 0 if OK; 1 on error

Notes: (1) Equality is defined as having the same pixel values for each respective image pixel. (2) This works on two pix of any depth. If one or both pix have a colormap, the depths can be different and the two pix can still be equal. (3) This ignores the alpha component for 32 bpp images. (4) If both pix have colormaps and the depths are equal, use the pixEqualWithCmap() function, which does a fast comparison if the colormaps are identical and a relatively slow comparison otherwise. (5) In all other cases, any existing colormaps must first be removed before doing pixel comparison. After the colormaps are removed, the resulting two images must have the same depth. The "lowest common denominator" is RGB, but this is only chosen when necessary, or when both have colormaps but different depths. (6) For images without colormaps that are not 32 bpp, all bits in the image part of the data array must be identical.

l_int32 pixEqualWithAlpha ( PIX pix1,
PIX pix2,
l_int32  use_alpha,
l_int32 psame 
)

pixEqualWithAlpha()

Input: pix1 pix2 use_alpha (1 to compare alpha in RGBA; 0 to ignore) &same (<return> 1 if same; 0 if different) Return: 0 if OK; 1 on error

Notes: (1) See notes in pixEqual(). (2) This is more general than pixEqual(), in that for 32 bpp RGBA images, where spp = 4, you can optionally include the alpha component in the comparison.

l_int32 pixEqualWithCmap ( PIX pix1,
PIX pix2,
l_int32 psame 
)

pixEqualWithCmap()

Input: pix1 pix2 &same Return: 0 if OK, 1 on error

Notes: (1) This returns same = TRUE if the images have identical content. (2) Both pix must have a colormap, and be of equal size and depth. If these conditions are not satisfied, it is not an error; the returned result is same = FALSE. (3) We then check whether the colormaps are the same; if so, the comparison proceeds 32 bits at a time. (4) If the colormaps are different, the comparison is done by slow brute force.

l_int32 pixGenPhotoHistos ( PIX pixs,
BOX box,
l_int32  factor,
l_float32  thresh,
l_int32  nx,
l_int32  ny,
NUMAA **  pnaa,
l_int32 pw,
l_int32 ph,
l_int32  debugflag 
)

pixGenPhotoHistos()

Input: pix (depth > 1 bpp; colormap OK) box (<optional> region to be selected; can be null) factor (subsampling; >= 1) thresh (threshold for photo/text; use 0 for default) nx, ny (number of subregions to use for histograms; e.g. 3x3) &naa (<return> nx * ny 256-entry gray histograms) &w (<return> width of image used to make histograms) &h (<return> height of image used to make histograms) debugflag (1 for debug output; 0 for no debugging) Return: 0 if OK, 1 on error

Notes: (1) This crops and converts to 8 bpp if necessary. It adds a minimal white boundary such that the centroid of the photo-inverted image is in the center. This allows automatic alignment with histograms of other image regions. (2) The white value in the histogram is removed, because of the padding. (3) Use 0 for conservative default (1.3) for thresh. (4) For an efficient representation of the histogram, normalize using a multiplicative factor so that the number in the maximum bucket is 255. It then takes 256 bytes to store. (5) With debug on, you get a pdf that shows, for each tile, the images and histograms.

NUMA* pixGetDifferenceHistogram ( PIX pix1,
PIX pix2,
l_int32  factor 
)

pixGetDifferenceHistogram()

Input: pix1 (8 bpp gray or 32 bpp rgb, or colormapped) pix2 (8 bpp gray or 32 bpp rgb, or colormapped) factor (subsampling factor; use 0 or 1 for no subsampling) Return: na (Numa of histogram of differences), or null on error

Notes: (1) The two images are aligned at the UL corner, and do not need to be the same size. If they are not the same size, the comparison will be made over overlapping pixels. (2) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (3) If RGB, the maximum difference between pixel components is saved in the histogram.

l_int32 pixGetDifferenceStats ( PIX pix1,
PIX pix2,
l_int32  factor,
l_int32  mindiff,
l_float32 pfractdiff,
l_float32 pavediff,
l_int32  printstats 
)

pixGetDifferenceStats()

Input: pix1 (8 bpp gray or 32 bpp rgb, or colormapped) pix2 (8 bpp gray or 32 bpp rgb, or colormapped) factor (subsampling factor; use 0 or 1 for no subsampling) mindiff (minimum pixel difference to be counted; > 0) &fractdiff (<return> fraction of pixels with diff greater than or equal to mindiff) &avediff (<return> average difference of pixels with diff greater than or equal to mindiff, less mindiff) printstats (use 1 to print normalized histogram to stderr) Return: 0 if OK, 1 on error

Notes: (1) This takes a threshold and describes the difference between two images in terms of two numbers: (a) the fraction of pixels, , whose difference equals or exceeds the threshold , and (b) the average value of the difference in pixel value for the pixels in the set given by (a), after you subtract . The reason for subtracting is that you then get a useful measure for the rate of falloff of the distribution for larger differences. For example, if = 10 and you find that = 2.5, it says that of the pixels with diff > 10, the average of their diffs is just mindiff + 2.5 = 12.5. This is a fast falloff in the histogram with increasing difference. (2) The two images are aligned at the UL corner, and do not need to be the same size. If they are not the same size, the comparison will be made over overlapping pixels. (3) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (4) If RGB, the maximum difference between pixel components is saved in the histogram.

l_int32 pixGetPerceptualDiff ( PIX pixs1,
PIX pixs2,
l_int32  sampling,
l_int32  dilation,
l_int32  mindiff,
l_float32 pfract,
PIX **  ppixdiff1,
PIX **  ppixdiff2 
)

pixGetPerceptualDiff()

Input: pix1 (8 bpp gray or 32 bpp rgb, or colormapped) pix2 (8 bpp gray or 32 bpp rgb, or colormapped) sampling (subsampling factor; use 0 or 1 for no subsampling) dilation (size of grayscale or color Sel; odd) mindiff (minimum pixel difference to be counted; > 0) &fract (<return> fraction of pixels with diff greater than mindiff) &pixdiff1 (<optional return>=""> showing difference (gray or color)) &pixdiff2 (<optional return>=""> showing pixels of sufficient diff) Return: 0 if OK, 1 on error

Notes: (1) This takes 2 pix and determines, using 2 input parameters: * specifies the amount of grayscale or color dilation to apply to the images, to compensate for a small amount of misregistration. A typical number might be 5, which uses a 5x5 Sel. Grayscale dilation expands lighter pixels into darker pixel regions. * determines the threshold on the difference in pixel values to be counted -- two pixels are not similar if their difference in value is at least . For color pixels, we use the maximum component difference. (2) The pixelwise comparison is always done with the UL corners aligned. The sizes of pix1 and pix2 need not be the same, although in practice it can be useful to scale to the same size. (3) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (4) Two optional diff images can be retrieved (typ. for debugging): pixdiff1: the gray or color difference pixdiff2: thresholded to 1 bpp for pixels exceeding (5) The returned value of fract can be compared to some threshold, which is application dependent. (6) This method is in analogy to the two-sided hausdorff transform, except here it is for d > 1. For d == 1 (see pixRankHaustest()), we verify that when one pix1 is dilated, it covers at least a given fraction of the pixels in pix2, and v.v.; in that case, the two pix are sufficiently similar. Here, we do an analogous thing: subtract the dilated pix1 from pix2 to get a 1-sided hausdorff-like transform. Then do it the other way. Take the component-wise max of the two results, and threshold to get the fraction of pixels with a difference below the threshold.

l_int32 pixGetPSNR ( PIX pix1,
PIX pix2,
l_int32  factor,
l_float32 ppsnr 
)

pixGetPSNR()

Input: pix1, pix2 (8 or 32 bpp; no colormap) factor (sampling factor; >= 1) &psnr (<return> power signal/noise ratio difference) Return: 0 if OK, 1 on error

Notes: (1) This computes the power S/N ratio, in dB, for the difference between two images. By convention, the power S/N for a grayscale image is ('log' == log base 10, and 'ln == log base e): PSNR = 10 * log((255/MSE)^2) = 4.3429 * ln((255/MSE)^2) = -4.3429 * ln((MSE/255)^2) where MSE is the mean squared error. Here are some examples: MSE PSNR --- ---- 10 28.1 3 38.6 1 48.1 0.1 68.1 (2) If pix1 and pix2 have the same pixel values, the MSE = 0.0 and the PSNR is infinity. For that case, this returns PSNR = 1000, which corresponds to the very small MSE of about 10^(-48).

PIX* pixPadToCenterCentroid ( PIX pixs,
l_int32  factor 
)

pixPadToCenterCentroid()

Input: pixs (any depth, colormap OK) factor (subsampling for centroid; >= 1) Return: pixd (padded with white pixels), or NULL on error.

Notes: (1) This add minimum white padding to an 8 bpp pix, such that the centroid of the photometric inverse is in the center of the resulting image. Thus in computing the centroid, black pixels have weight 255, and white pixels have weight 0.

l_int32 pixTestForSimilarity ( PIX pix1,
PIX pix2,
l_int32  factor,
l_int32  mindiff,
l_float32  maxfract,
l_float32  maxave,
l_int32 psimilar,
l_int32  printstats 
)

pixTestForSimilarity()

Input: pix1 (8 bpp gray or 32 bpp rgb, or colormapped) pix2 (8 bpp gray or 32 bpp rgb, or colormapped) factor (subsampling factor; use 0 or 1 for no subsampling) mindiff (minimum pixel difference to be counted; > 0) maxfract (maximum fraction of pixels allowed to have diff greater than or equal to mindiff) maxave (maximum average difference of pixels allowed for pixels with diff greater than or equal to mindiff, after subtracting mindiff) &similar (<return> 1 if similar, 0 otherwise) printstats (use 1 to print normalized histogram to stderr) Return: 0 if OK, 1 on error

Notes: (1) This takes 2 pix that are the same size and determines using 3 input parameters if they are "similar". The first parameter establishes a criterion of pixel-to-pixel similarity: two pixels are not similar if their difference in value is at least mindiff. Then and are thresholds on the number and distribution of dissimilar pixels allowed for the two pix to be similar. If the pix are to be similar, neither threshold can be exceeded. (2) In setting the and thresholds, you have these options: (a) Base the comparison only on . Then set = 0.0 or 256.0. (If 0, we always ignore it.) (b) Base the comparison only on . Then set = 1.0. (c) Base the comparison on both thresholds. (3) Example of values that can be expected at mindiff = 15 when comparing lossless png encoding with jpeg encoding, q=75: (smoothish bg) fractdiff = 0.01, avediff = 2.5 (natural scene) fractdiff = 0.13, avediff = 3.5 To identify these images as 'similar', select maxfract and maxave to be upper bounds of what you expect. (4) See pixGetDifferenceStats() for a discussion of why we subtract mindiff from the computed average diff of the nonsimilar pixels to get the 'avediff' returned by that function. (5) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (6) If RGB, the maximum difference between pixel components is saved in the histogram.

l_int32 pixUsesCmapColor ( PIX pixs,
l_int32 pcolor 
)

pixUsesCmapColor()

Input: pixs &color (<return>) Return: 0 if OK, 1 on error

Notes: (1) This returns color = TRUE if three things are obtained: (a) the pix has a colormap (b) the colormap has at least one color entry (c) a color entry is actually used (2) It is used in pixEqual() for comparing two images, in a situation where it is required to know if the colormap has color entries that are actually used in the image.


Переменные

const l_float32 TINY = 0.00001 [static]