diff options
Diffstat (limited to 'tesseract/src/textord/ccnontextdetect.cpp')
-rw-r--r-- | tesseract/src/textord/ccnontextdetect.cpp | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/tesseract/src/textord/ccnontextdetect.cpp b/tesseract/src/textord/ccnontextdetect.cpp new file mode 100644 index 00000000..cfbbb95a --- /dev/null +++ b/tesseract/src/textord/ccnontextdetect.cpp @@ -0,0 +1,323 @@ +/////////////////////////////////////////////////////////////////////// +// File: ccnontextdetect.cpp +// Description: Connected-Component-based photo (non-text) detection. +// Author: rays@google.com (Ray Smith) +// +// Copyright 2011 Google Inc. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "ccnontextdetect.h" +#include "imagefind.h" +#include "strokewidth.h" + +namespace tesseract { + +// Max number of neighbour small objects per squared gridsize before a grid +// cell becomes image. +const double kMaxSmallNeighboursPerPix = 1.0 / 32; +// Max number of small blobs a large blob may overlap before it is rejected +// and determined to be image. +const int kMaxLargeOverlapsWithSmall = 3; +// Max number of small blobs a medium blob may overlap before it is rejected +// and determined to be image. Larger than for large blobs as medium blobs +// may be complex Chinese characters. Very large Chinese characters are going +// to overlap more medium blobs than small. +const int kMaxMediumOverlapsWithSmall = 12; +// Max number of normal blobs a large blob may overlap before it is rejected +// and determined to be image. This is set higher to allow for drop caps, which +// may overlap a lot of good text blobs. +const int kMaxLargeOverlapsWithMedium = 12; +// Multiplier of original noise_count used to test for the case of spreading +// noise beyond where it should really be. +const int kOriginalNoiseMultiple = 8; +// Pixel padding for noise blobs when rendering on the image +// mask to encourage them to join together. Make it too big and images +// will fatten out too much and have to be clipped to text. +const int kNoisePadding = 4; +// Fraction of max_noise_count_ to be added to the noise count if there is +// photo mask in the background. +const double kPhotoOffsetFraction = 0.375; +// Min ratio of perimeter^2/16area for a "good" blob in estimating noise +// density. Good blobs are supposed to be highly likely real text. +// We consider a square to have unit ratio, where A=(p/4)^2, hence the factor +// of 16. Digital circles are weird and have a minimum ratio of pi/64, not +// the 1/(4pi) that you would expect. +const double kMinGoodTextPARatio = 1.5; + +CCNonTextDetect::CCNonTextDetect(int gridsize, + const ICOORD& bleft, const ICOORD& tright) + : BlobGrid(gridsize, bleft, tright), + max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix * + gridsize * gridsize)), + noise_density_(nullptr) { + // TODO(rays) break max_noise_count_ out into an area-proportional + // value, as now plus an additive constant for the number of text blobs + // in the 3x3 neighbourhood - maybe 9. +} + +CCNonTextDetect::~CCNonTextDetect() { + delete noise_density_; +} + +// Creates and returns a Pix with the same resolution as the original +// in which 1 (black) pixels represent likely non text (photo, line drawing) +// areas of the page, deleting from the blob_block the blobs that were +// determined to be non-text. +// The photo_map is used to bias the decision towards non-text, rather than +// supplying definite decision. +// The blob_block is the usual result of connected component analysis, +// holding the detected blobs. +// The returned Pix should be PixDestroyed after use. +Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map, + TO_BLOCK* blob_block) { + // Insert the smallest blobs into the grid. + InsertBlobList(&blob_block->small_blobs); + InsertBlobList(&blob_block->noise_blobs); + // Add the medium blobs that don't have a good strokewidth neighbour. + // Those that do go into good_grid as an antidote to spreading beyond the + // real reaches of a noise region. + BlobGrid good_grid(gridsize(), bleft(), tright()); + BLOBNBOX_IT blob_it(&blob_block->blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0; + perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area(); + if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio) + InsertBBox(true, true, blob); + else + good_grid.InsertBBox(true, true, blob); + } + noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid); + good_grid.Clear(); // Not needed any more. + Pix* pix = noise_density_->ThresholdToPix(max_noise_count_); + if (debug) { + pixWrite("junknoisemask.png", pix, IFF_PNG); + } + ScrollView* win = nullptr; + #ifndef GRAPHICS_DISABLED + if (debug) { + win = MakeWindow(0, 400, "Photo Mask Blobs"); + } + #endif // !GRAPHICS_DISABLED + // Large and medium blobs are not text if they overlap with "a lot" of small + // blobs. + MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, + kMaxLargeOverlapsWithSmall, + win, ScrollView::DARK_GREEN, pix); + MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall, + win, ScrollView::WHITE, pix); + // Clear the grid of small blobs and insert the medium blobs. + Clear(); + InsertBlobList(&blob_block->blobs); + MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, + kMaxLargeOverlapsWithMedium, + win, ScrollView::DARK_GREEN, pix); + // Clear again before we start deleting the blobs in the grid. + Clear(); + MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, + win, ScrollView::CORAL, pix); + MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, + win, ScrollView::GOLDENROD, pix); + MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, + win, ScrollView::WHITE, pix); + if (debug) { + #ifndef GRAPHICS_DISABLED + win->Update(); + #endif // !GRAPHICS_DISABLED + pixWrite("junkccphotomask.png", pix, IFF_PNG); + #ifndef GRAPHICS_DISABLED + delete win->AwaitEvent(SVET_DESTROY); + delete win; + #endif // !GRAPHICS_DISABLED + } + return pix; +} + +// Computes and returns the noise_density IntGrid, at the same gridsize as +// this by summing the number of small elements in a 3x3 neighbourhood of +// each grid cell. good_grid is filled with blobs that are considered most +// likely good text, and this is filled with small and medium blobs that are +// more likely non-text. +// The photo_map is used to bias the decision towards non-text, rather than +// supplying definite decision. +IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map, + BlobGrid* good_grid) { + IntGrid* noise_counts = CountCellElements(); + IntGrid* noise_density = noise_counts->NeighbourhoodSum(); + IntGrid* good_counts = good_grid->CountCellElements(); + // Now increase noise density in photo areas, to bias the decision and + // minimize hallucinated text on image, but trim the noise_density where + // there are good blobs and the original count is low in non-photo areas, + // indicating that most of the result came from neighbouring cells. + int height = pixGetHeight(photo_map); + int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction); + for (int y = 0; y < gridheight(); ++y) { + for (int x = 0; x < gridwidth(); ++x) { + int noise = noise_density->GridCellValue(x, y); + if (max_noise_count_ < noise + photo_offset && + noise <= max_noise_count_) { + // Test for photo. + int left = x * gridsize(); + int right = left + gridsize(); + int bottom = height - y * gridsize(); + int top = bottom - gridsize(); + if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right, + &bottom)) { + noise_density->SetGridCell(x, y, noise + photo_offset); + } + } + if (debug && noise > max_noise_count_ && + good_counts->GridCellValue(x, y) > 0) { + tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n", + x * gridsize(), y * gridsize(), + noise_density->GridCellValue(x, y), + good_counts->GridCellValue(x, y), + noise_counts->GridCellValue(x, y), max_noise_count_); + } + if (noise > max_noise_count_ && + good_counts->GridCellValue(x, y) > 0 && + noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <= + max_noise_count_) { + noise_density->SetGridCell(x, y, 0); + } + } + } + delete noise_counts; + delete good_counts; + return noise_density; +} + +// Helper to expand a box in one of the 4 directions by the given pad, +// provided it does not expand into any cell with a zero noise density. +// If that is not possible, try expanding all round by a small constant. +static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density, + int pad) { + TBOX expanded_box(box); + expanded_box.set_right(box.right() + pad); + if (!noise_density.AnyZeroInRect(expanded_box)) + return expanded_box; + expanded_box = box; + expanded_box.set_left(box.left() - pad); + if (!noise_density.AnyZeroInRect(expanded_box)) + return expanded_box; + expanded_box = box; + expanded_box.set_top(box.top() + pad); + if (!noise_density.AnyZeroInRect(expanded_box)) + return expanded_box; + expanded_box = box; + expanded_box.set_bottom(box.bottom() + pad); + if (!noise_density.AnyZeroInRect(expanded_box)) + return expanded_box; + expanded_box = box; + expanded_box.pad(kNoisePadding, kNoisePadding); + if (!noise_density.AnyZeroInRect(expanded_box)) + return expanded_box; + return box; +} + +// Tests each blob in the list to see if it is certain non-text using 2 +// conditions: +// 1. blob overlaps a cell with high value in noise_density_ (previously set +// by ComputeNoiseDensity). +// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This +// condition is disabled with max_blob_overlaps == -1. +// If it does, the blob is declared non-text, and is used to mark up the +// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their +// neighbours reset, as they may now point to deleted data. +// WARNING: The blobs list blobs may be in the *this grid, but they are +// not removed. If any deleted blobs might be in *this, then this must be +// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. +// If the win is not nullptr, deleted blobs are drawn on it in red, and kept +// blobs are drawn on it in ok_color. +void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, + int max_blob_overlaps, + ScrollView* win, + ScrollView::Color ok_color, + Pix* nontext_mask) { + int imageheight = tright().y() - bleft().x(); + BLOBNBOX_IT blob_it(blobs); + BLOBNBOX_LIST dead_blobs; + BLOBNBOX_IT dead_it(&dead_blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + TBOX box = blob->bounding_box(); + if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && + (max_blob_overlaps < 0 || + !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { + blob->ClearNeighbours(); + #ifndef GRAPHICS_DISABLED + if (win != nullptr) + blob->plot(win, ok_color, ok_color); + #endif // !GRAPHICS_DISABLED + } else { + if (noise_density_->AnyZeroInRect(box)) { + // There is a danger that the bounding box may overlap real text, so + // we need to render the outline. + Pix* blob_pix = blob->cblob()->render_outline(); + pixRasterop(nontext_mask, box.left(), imageheight - box.top(), + box.width(), box.height(), PIX_SRC | PIX_DST, + blob_pix, 0, 0); + pixDestroy(&blob_pix); + } else { + if (box.area() < gridsize() * gridsize()) { + // It is a really bad idea to make lots of small components in the + // photo mask, so try to join it to a bigger area by expanding the + // box in a way that does not touch any zero noise density cell. + box = AttemptBoxExpansion(box, *noise_density_, gridsize()); + } + // All overlapped cells are non-zero, so just mark the rectangle. + pixRasterop(nontext_mask, box.left(), imageheight - box.top(), + box.width(), box.height(), PIX_SET, nullptr, 0, 0); + } + #ifndef GRAPHICS_DISABLED + if (win != nullptr) + blob->plot(win, ScrollView::RED, ScrollView::RED); + #endif // !GRAPHICS_DISABLED + // It is safe to delete the cblob now, as it isn't used by the grid + // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the + // dead_blobs list. + // TODO(rays) delete the delete when the BLOBNBOX destructor deletes + // the cblob. + delete blob->cblob(); + dead_it.add_to_end(blob_it.extract()); + } + } +} + +// Returns true if the given blob overlaps more than max_overlaps blobs +// in the current grid. +bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) { + // Search the grid to see what intersects it. + // Setup a Rectangle search for overlapping this blob. + BlobGridSearch rsearch(this); + const TBOX& box = blob->bounding_box(); + rsearch.StartRectSearch(box); + rsearch.SetUniqueMode(true); + BLOBNBOX* neighbour; + int overlap_count = 0; + while (overlap_count <= max_overlaps && + (neighbour = rsearch.NextRectSearch()) != nullptr) { + if (box.major_overlap(neighbour->bounding_box())) { + ++overlap_count; + if (overlap_count > max_overlaps) + return true; + } + } + return false; +} + +} // namespace tesseract. |