24. Бинаризация Sauvola Thresholding из OCRopus

Вернуться к разделу "Реализация проекта BookScanLib".

24. Бинаризация Sauvola Thresholding из OCRopus

Бинаризация Sauvola из проекта OCRopus применяется для преобразования серой (8-битной) растровой картинки в чёрно-белую (1-битная).

Алгоритм Sauvola Thresholding анализирует обрабатываемую картинку и автоматически вычисляет порог бинаризации - индивидуальный для каждого пикселя (т.е это локальная или адаптивная бинаризация). Найденный порог используется для обыкновенной пороговой бинаризации в отношении текущего пикселя.

Главный недостаток этого алгоритма в том, что он идёт под Apache License - что меня совершенно не устраивает - т.к. мне нужно только GPL 2 и выше. Поэтому прийдётся искать такой же алгоритм, но под лицензией GPL 2 и выше.

Я написал простейшую консольную программу для демонстрации работы Sauvola Thresholding. На входе она принимает следующие параметры:

sauvola_ocropus <input_file> <size (int)> <weight (double)>

Size - это размер обрабатывающего окна.

Weight - вес фильтра. Точность - до одной десятой.

На выходе программа выдаёт этот же файл, обработанный этим алгоритмом.

Программа работает только с серыми изображениями.

Всё необходимое для тестирования этой программы (компиляционный проект, готовый экзешник, файл-пример и bat-файлы для тестирования программы) я оформил в небольшой пакет:

Скачать пакет sauvola_ocropus (41 КБ)

(Для работы программы требуется FreeImage dll-библиотека из пакета FreeImage DLL v3.9.2 - см. статью 1. Знакомство с FreeImage).

Рассмотрим исходные коды этой программы:

// -*- C++ -*-

// Copyright 2006-2008 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz 
// or its licensors, as applicable.
// 
// You may not use this file except under the terms of the accompanying license.
// 
// Licensed under the Apache License, Version 2.0 (the "License"); you
// may not use this file except in compliance with the License. You may
// obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// 
// Project: OCRopus
// File: ocr-binarize-sauvola.cc
// Purpose: An efficient implementation of the Sauvola's document binarization 
//          algorithm based on integral images as described in
//          F. Shafait, D. Keysers, T.M. Breuel. "Efficient Implementation of 
//          Local Adaptive Thresholding Techniques Using Integral Images".
//          Document Recognition and Retrieval XV, San Jose.
//
// Responsible: Faisal Shafait (faisal.shafait@dfki.de)
// Reviewer: 
// Primary Repository: 
// Web Sites: www.iupr.org, www.dfki.de

// An efficient implementation of the Sauvola's document
// binarization algorithm based on integral images.

// This algorithm was taken from the OCRopus sourcecodes
// and adopted for the FreeImage library
//
//	Copyright (C) 2007-2008:
//	monday2000	monday2000@yandex.ru

#include "FreeImage.h"
#include "Utilities.h"

#define MAXVAL 256

// float k = 0.3,"Weighting factor"
// int w = 40,"Local window size. Should always be positive"
// k>=0.05 && k<=0.95;
// w>0 && k<1000;
////////////////////////////////////////////////////////////////////////////////

inline void SetPixel(BYTE *bits, unsigned x, BYTE* value)
{   // this function is simplified from FreeImage_SetPixelIndex
	
	*value ? bits[x >> 3] |= (0x80 >> (x & 0x7)) : bits[x >> 3] &= (0xFF7F >> (x & 0x7));
}

////////////////////////////////////////////////////////////////////////////////

FIBITMAP* ProcessFilter(FIBITMAP* src_dib, int w, double k)
{
	const unsigned width = FreeImage_GetWidth(src_dib);
	
	const unsigned height = FreeImage_GetHeight(src_dib);
	
	unsigned src_pitch = FreeImage_GetPitch(src_dib);
	
	unsigned bpp = FreeImage_GetBPP(src_dib);
	
	FIBITMAP* dst_dib = FreeImage_Allocate(width, height, 1);
	
	// Build a monochrome palette
	RGBQUAD *pal = FreeImage_GetPalette(dst_dib);
	pal[0].rgbRed = pal[0].rgbGreen = pal[0].rgbBlue = 0;
	pal[1].rgbRed = pal[1].rgbGreen = pal[1].rgbBlue = 255;
	
	unsigned dst_pitch = FreeImage_GetPitch(dst_dib);
	
	BYTE* src_bits = (BYTE*)FreeImage_GetBits(src_dib); // The image raster
	
	BYTE* dst_bits = (BYTE*)FreeImage_GetBits(dst_dib); // The image raster
	
	BYTE* lines, *lined;	
	
	unsigned whalf = w>>1; // Half of window size	

	// Calculate the integral image, and integral of the squared image

	int int_size = height*width*sizeof(unsigned)*sizeof(unsigned);

	int ws = width*sizeof(unsigned);
	
	unsigned* p_integral_image = (unsigned*)malloc(int_size);
	unsigned* p_rowsum_image = (unsigned*)malloc(int_size);
	unsigned* p_integral_sqimg = (unsigned*)malloc(int_size);
	unsigned* p_rowsum_sqimg = (unsigned*)malloc(int_size);
	
	memset(p_integral_image,0,int_size);
	memset(p_rowsum_image,0,int_size);
	memset(p_integral_sqimg,0,int_size);
	memset(p_rowsum_sqimg,0,int_size);
	
	int xmin,ymin,xmax,ymax;
	
	double diagsum,idiagsum,diff,sqdiagsum,sqidiagsum,sqdiff,area;
	
	double mean,std,threshold;
	
	unsigned i,j;
	
	for(j=0; j<height; j++)
	{
		lines = src_bits + j * src_pitch;
		
		p_rowsum_image[j*ws] = lines[0];
		p_rowsum_sqimg[j*ws] = lines[0]*lines[0];
	}
	
	for(j=0; j<height; j++)
	{
		lines = src_bits + j * src_pitch;
		
		for(i=1; i<width; i++)
		{
			p_rowsum_image[j*ws+i] = p_rowsum_image[(j-1)*ws+i] + lines[i];
			p_rowsum_sqimg[j*ws+i] = p_rowsum_sqimg[(j-1)*ws+i] + lines[i]*lines[i];
		}
	}
	
	for(i=0; i<width; i++)
	{
		p_integral_image[i] = p_rowsum_image[i];
		p_integral_sqimg[i] = p_rowsum_sqimg[i];
	}
	
	for(j=1; j<height; j++)
	{		
		for(i=0; i<width; i++)
		{
			p_integral_image[j*ws+i] = p_integral_image[j*ws+(i-1)] + p_rowsum_image[j*ws+i];
			p_integral_sqimg[j*ws+i] = p_integral_sqimg[j*ws+(i-1)] + p_rowsum_sqimg[j*ws+i];
		}
	}
	
	//Calculate the mean and standard deviation using the integral image
	
	BYTE val;
	
	for(j=0; j<height; j++)
	{
		lined = dst_bits + j * dst_pitch;
		
		lines = src_bits + j * src_pitch;
		
		for(i=0; i<width; i++)		
		{
			xmin = MAX(0,int(i-whalf));
			xmax = MIN(width-1,i+whalf);
			
			ymin = MAX(0,int(j-whalf));			
			ymax = MIN(height-1,j+whalf);
			
			area = (xmax-xmin+1)*(ymax-ymin+1);
			
			// area can't be 0 here
			// proof (assuming whalf >= 0): 
			// we'll prove that (xmax-xmin+1) > 0,
			// (ymax-ymin+1) is analogous
			// It's the same as to prove: xmax >= xmin
			// width - 1 >= 0         since width > i >= 0
			// i + whalf >= 0               since i >= 0, whalf >= 0
			// i + whalf >= i - whalf       since whalf >= 0
			// width - 1 >= i - whalf since width > i
			// --IM
			
			if(!xmin && !ymin)
			{ // Point at origin
				diff   = p_integral_image[ymax*ws+xmax];
				sqdiff = p_integral_sqimg[ymax*ws+xmax];
			}
			
			else if(!xmin && ymin)
			{ // first column
				//printf("ymin=%d\n",ymin);
				
				diff   = p_integral_image[ymax*ws+xmax] - p_integral_image[(ymin-1)*ws+xmax];
				sqdiff = p_integral_sqimg[ymax*ws+xmax] - p_integral_sqimg[(ymin-1)*ws+xmax];
			}
			
			else if(xmin && !ymin)
			{ // first row
				diff   = p_integral_image[ymax*ws+xmax] - p_integral_image[ymax*ws+(xmin-1)];
				sqdiff = p_integral_sqimg[ymax*ws+xmax] - p_integral_sqimg[ymax*ws+(xmin-1)];
			}
			
			else
			{ // rest of the image
				diagsum    = p_integral_image[ymax*ws+xmax] + p_integral_image[(ymin-1)*ws+(xmin-1)];
				idiagsum   = p_integral_image[ymax*ws+(xmin-1)] + p_integral_image[(ymin-1)*ws+xmax];
				diff       = diagsum - idiagsum;
				sqdiagsum  = p_integral_sqimg[ymax*ws+xmax] + p_integral_sqimg[(ymin-1)*ws+(xmin-1)];
				sqidiagsum = p_integral_sqimg[ymax*ws+(xmin-1)] + p_integral_sqimg[(ymin-1)*ws+xmax];
				sqdiff     = sqdiagsum - sqidiagsum;
			}
			
			mean = diff/area;
			
			std  = sqrt((sqdiff - diff*diff/area)/(area-1));
			
			threshold = mean*(1+k*((std/128)-1));
			
			val = (BYTE) ( ( lines[i] >= threshold ) ? 255 : 0 );			
			
			SetPixel(lined, i, &val);
			
		}
	}
	
	// Copying the DPI...
	
	FreeImage_SetDotsPerMeterX(dst_dib, FreeImage_GetDotsPerMeterX(src_dib));
	
	FreeImage_SetDotsPerMeterY(dst_dib, FreeImage_GetDotsPerMeterY(src_dib));
	
	free(p_integral_image);
	free(p_rowsum_image);
	free(p_integral_sqimg);
	free(p_rowsum_sqimg);
	
	return dst_dib;
}

////////////////////////////////////////////////////////////////////////////////
/**
FreeImage error handler
@param fif Format / Plugin responsible for the error 
@param message Error message
*/
void FreeImageErrorHandler(FREE_IMAGE_FORMAT fif, const char *message) {
	printf("\n*** "); 
	printf("%s Format\n", FreeImage_GetFormatFromFIF(fif));
	printf(message);
	printf(" ***\n");
}

////////////////////////////////////////////////////////////////////////////////

/** Generic image loader

  @param lpszPathName Pointer to the full file name
  @param flag Optional load flag constant
  @return Returns the loaded dib if successful, returns NULL otherwise
*/

FIBITMAP* GenericLoader(const char* lpszPathName, int flag)
{	
	FREE_IMAGE_FORMAT fif = FIF_UNKNOWN;
	// check the file signature and deduce its format
	// (the second argument is currently not used by FreeImage)
	
	fif = FreeImage_GetFileType(lpszPathName, 0);
	
	FIBITMAP* dib;
	
	if(fif == FIF_UNKNOWN)
	{
		// no signature ?
		// try to guess the file format from the file extension
		fif = FreeImage_GetFIFFromFilename(lpszPathName);
	}
	
	// check that the plugin has reading capabilities ...
	if((fif != FIF_UNKNOWN) && FreeImage_FIFSupportsReading(fif))
	{
		// ok, let's load the file
		dib = FreeImage_Load(fif, lpszPathName, flag);
		
		// unless a bad file format, we are done !
		if (!dib)
		{
			printf("%s%s%s\n","File \"", lpszPathName, "\" not found.");
			return NULL;
		}
	}	
	
	return dib;
}

////////////////////////////////////////////////////////////////////////////////

int main(int argc, char *argv[]) {
	
	// call this ONLY when linking with FreeImage as a static library
#ifdef FREEIMAGE_LIB
	FreeImage_Initialise();
#endif // FREEIMAGE_LIB
	
	// initialize your own FreeImage error handler
	
	FreeImage_SetOutputMessage(FreeImageErrorHandler);
	
	if(argc != 4) {
		printf("Usage : sauvola_ocropus <input_file> <size (int)> <weight (double)>\n");
		return 0;
	}
	
	FIBITMAP *dib = GenericLoader(argv[1], 0);

	unsigned size = atoi(argv[2]); // Local window size. Should always be positive
								   // default = 40; w>0 && k<1000
	
	double weight = atof(argv[3]); //Weighting factor
								   // default = 0.3; k>=0.05 && k<=0.95;
	
	if (dib)
	{		
		// bitmap is successfully loaded!
		
		if (FreeImage_GetImageType(dib) == FIT_BITMAP)
		{
			if (FreeImage_GetBPP(dib) == 8)
			{
				FIBITMAP* dst_dib = ProcessFilter(dib, size, weight);
				
				if (dst_dib)
				{					
					// save the filtered bitmap
					const char *output_filename = "filtered.tif";
					
					// first, check the output format from the file name or file extension
					FREE_IMAGE_FORMAT out_fif = FreeImage_GetFIFFromFilename(output_filename);
					
					if(out_fif != FIF_UNKNOWN)
					{
						// then save the file
						FreeImage_Save(out_fif, dst_dib, output_filename, 0);
					}
					
					// free the loaded FIBITMAP
					FreeImage_Unload(dst_dib);					
				}
			}
			
			else
				
				printf("%s\n", "Unsupported color mode.");
		}
		
		else // non-FIT_BITMAP images are not supported.
			
			printf("%s\n", "Unsupported color mode.");
		
		FreeImage_Unload(dib);
	}	 
	
	// call this ONLY when linking with FreeImage as a static library
#ifdef FREEIMAGE_LIB
	FreeImage_DeInitialise();
#endif // FREEIMAGE_LIB
	
	return 0;

Краткое описание алгоритма:

Составляются 2 вспомогательных т.н. "интегральных" изображения. Первое заполняется кумулятивными суммами исходных пикселей, второе - кумулятивными квадратами сумм исходных пикселей.

Проходим в цикле по всем пикселям исходного изображения квадратной апертурой задаваемого размера. На каждом шаге находим 2 "габаритные" (по диагонали) разницы апертуры - по к.суммам и к.кв.сумм. Находим площадь апертуры. Находим среднее по к.сумме. С помощью извлечения корня и составления пропорции пир участии веса находим порог бинаризации для текущего пикселя и преобразуем его в чёрно-белый.

1. OCRopus.

2. Описание алгоритма (PDF ENG 386 КБ)