27. Бинаризация Mixture Modeling Thresholding

Вернуться к разделу "Реализация проекта BookScanLib".

27. Бинаризация Mixture Modeling Thresholding

Бинаризация Mixture Modeling из проекта ImageJ применяется для преобразования серой (8-битной) растровой картинки в чёрно-белую (1-битная).

Алгоритм Mixture Modeling анализирует обрабатываемую картинку и автоматически вычисляет порог бинаризации - единый для всей картинки (т.е это глобальная бинаризация). Найденный порог подаётся на вход обыкновенной пороговой бинаризации (например, в библиотеке FreeImage для этого есть функция FreeImage_Threshold).

Я написал простейшую консольную программу для демонстрации работы Mixture Modeling Thresholding. На входе она принимает следующие параметры:

mix_mod_thres <input_file>

На выходе программа выдаёт этот же файл, обработанный этим алгоритмом.

Программа работает только с серыми изображениями.

Всё необходимое для тестирования этой программы (компиляционный проект, готовый экзешник, файл-пример и bat-файлы для тестирования программы) я оформил в небольшой пакет:

Скачать пакет mix_mod_thres (45 КБ)

(Для работы программы требуется FreeImage dll-библиотека из пакета FreeImage DLL v3.9.2 - см. статью 1. Знакомство с FreeImage).

Рассмотрим исходные коды этой программы:

// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
// http://www.gnu.org/copyleft/gpl.html

/*
* Mixture Modeling algorithm
*
* Copyright (c) 2003 by Christopher Mei (christopher.mei@sophia.inria.fr)
*                    and Maxime Dauphin
*
*  This algorithm thresholds the image using a gray-level 
*  histogram Gaussian characterisation.
*  
**/

/*
This algorithm separates the histogram of an image into
two classes using a Gaussian model. It then calculates
the image threshold as the intersection of these two
Gaussians.
This thresholding technique has the advantage of finding
a threshold that is in certain cases closer to real world data.
The Gaussian parameters can also be used to caracterise the
two regions obtained.
The plugin returns a histogram with the two Gaussians,
the parameters obtained (average, standard deviation,
threshold) and the thresholded image.
*/

// This algorithm was taken from the ImageJ sourcecodes
// http://rsb.info.nih.gov/ij/plugins/mixture-modeling.html
// and adopted for the FreeImage library
//
// Copyright (C) 2007-2008:
// monday2000  monday2000@yandex.ru

#include "FreeImage.h"
#include "Utilities.h"

#define INDEX_MAX 253

class MMClass
{
public:	

	int histogram[256];
    /** The index must vary between 1 and 253
	C1 : [0;index]
	C2 : [index+1; 255]
    **/
	int index;
	double mu1, mu2;
	double sigma2_1, sigma2_2;
	double mult1, mult2;
	double twoVariance1, twoVariance2;
	double max1, max2;
	int cardinal1, cardinal2;
	int cardinal;
	
	MMClass(FIBITMAP* src_dib);
	
    double calculateMax(int index);
	
    void setValues();
	
    double gamma1(int i);
	
    double gamma2(int i);	
};

////////////////////////////////////////////////////////////////////////////////

// This class implements a MMClass.

MMClass::MMClass(FIBITMAP* src_dib)
{
	unsigned width = FreeImage_GetWidth(src_dib);
	
	unsigned height = FreeImage_GetHeight(src_dib);
	
	unsigned src_pitch = FreeImage_GetPitch(src_dib);	
	
	BYTE* src_bits = (BYTE*)FreeImage_GetBits(src_dib); // The image raster
	
	BYTE* lines;
	
	
	cardinal = width*height;
	
	// build histogram first	
	memset(histogram,0,256*sizeof(int));
	
	// for each line	
	for ( unsigned y = 0; y < height; y++ )
	{			
		lines = src_bits + y * src_pitch;
		
		// for each pixel
		for ( unsigned x = 0; x < width; x++)
		{
			histogram[lines[x]]++;
		}		
	}
	
	index = 0;	
}

////////////////////////////////////////////////////////////////////////////////

double MMClass::calculateMax(int index)
{
	double sum = histogram[index];
	double num = 1;
	
	if(index-1>=0)
	{
		sum += histogram[index-1];
		num++;
	}
	
	if(index+1<255)
	{
		sum += histogram[index+1];
		num++;
	}
	return sum/num;
}

////////////////////////////////////////////////////////////////////////////////

void MMClass::setValues()
{	
	mu1 = 0; mu2 = 0;
	sigma2_1 = 0; sigma2_2 = 0;
	max1 = 0; max2 = 0;
	cardinal1 = 0; cardinal2 = 0;
	
	int i = 0;
	
	for(i=0; i<=index ; i++)
	{
		cardinal1 +=  histogram[i];
		mu1 +=  i*histogram[i];
	}
    
	for(i=index+1; i<256 ; i++)
	{
		cardinal2 +=  histogram[i];
		mu2 +=  i*histogram[i];
	}
    
	if(cardinal1 == 0)
	{
		mu1 = 0;
		sigma2_1 = 0;
		
	}
	else 
		mu1 /= (double)cardinal1; 
	
	if(cardinal2 == 0)
	{
		mu2 = 0;
		sigma2_2 = 0;
	}
	else 
		mu2 /= (double)cardinal2; 
	
	if( mu1 != 0 )
	{
		for(int i=0; i<=index ; i++) 
			sigma2_1 += histogram[i]*pow(i-mu1,2);
		
		sigma2_1 /= (double)cardinal1;
		
		max1 = calculateMax((int) mu1);
		
		mult1 = (double) max1;
		twoVariance1 = 2*sigma2_1; 
	}
	
	if( mu2 != 0 )
	{
		for(i=index+1; i<256 ; i++)
			sigma2_2 += histogram[i]*pow(i-mu2,2);
		
		sigma2_2 /= (double)cardinal2;
		
		max2 = calculateMax((int) mu2);
		
		mult2 = (double) max2;
		twoVariance2 = 2*sigma2_2; 
	}
}

////////////////////////////////////////////////////////////////////////////////

double MMClass::gamma1(int i) 
{
	if(sigma2_1 == 0) 
		return 0;
	
	return (double)(mult1*exp(-(pow((double)i-mu1,2))/twoVariance1));
}

////////////////////////////////////////////////////////////////////////////////

double MMClass::gamma2(int i)
{
	if(sigma2_2 == 0) 
		return 0;
	
	return (double)(mult2*exp(-(pow((double)i-mu2,2))/twoVariance2));
}

////////////////////////////////////////////////////////////////////////////////

double findThreshold(int mu1, int mu2, MMClass& mmc)
{
	double min = 9999999;
	int threshold = 0;
	
	int differenceGamma;
	
	for(int i=mu1; i<mu2; i++)
	{
		differenceGamma = mmc.gamma1(i) - mmc.gamma2(i);
		
		double val = (double)pow(differenceGamma,2.0);
		
		if(min>val)
		{
			min = val;
			threshold = i;
		}
	}
	return threshold;
}

////////////////////////////////////////////////////////////////////////////////

double calculateError(MMClass& mmc)
{
	double error = 0;
	
	double gamma;
	
	for(int i=0; i<256; i++)
	{
		gamma = mmc.gamma1(i) + mmc.gamma2(i);
		
		error += pow(gamma - mmc.histogram[i],2);
	}
	
	return error/256;
}

////////////////////////////////////////////////////////////////////////////////

inline void SetPixel(BYTE *bits, unsigned x, BYTE* value)
{   // this function is simplified from FreeImage_SetPixelIndex
	
	*value ? bits[x >> 3] |= (0x80 >> (x & 0x7)) : bits[x >> 3] &= (0xFF7F >> (x & 0x7));
}

////////////////////////////////////////////////////////////////////////////////

FIBITMAP* ProcessFilter(FIBITMAP* src_dib)
{
	// get source image size
	unsigned width = FreeImage_GetWidth(src_dib);
	
	unsigned height = FreeImage_GetHeight(src_dib);
	
	unsigned src_pitch = FreeImage_GetPitch(src_dib);
	
	unsigned bpp = FreeImage_GetBPP(src_dib);
	
	unsigned btpp = bpp/8;
	
	FIBITMAP* dst_dib = FreeImage_Allocate(width, height, 1);
	
	// Build a monochrome palette
	RGBQUAD *pal = FreeImage_GetPalette(dst_dib);
	pal[0].rgbRed = pal[0].rgbGreen = pal[0].rgbBlue = 0;
	pal[1].rgbRed = pal[1].rgbGreen = pal[1].rgbBlue = 255;
	
	unsigned dst_pitch = FreeImage_GetPitch(dst_dib);
	
	BYTE* src_bits = (BYTE*)FreeImage_GetBits(src_dib); // The image raster
	
	BYTE* dst_bits = (BYTE*)FreeImage_GetBits(dst_dib); // The image raster	
	
	BYTE* lines, *lined;
	
	
	MMClass mmc(src_dib); // initialize the helper class
	
	int threshold = 0;
	
	double error = 0;
	double errormin = 9999999;
	double mu1 = 0, mu2 = 0;
	
	// Start
	for (int i=0; i<INDEX_MAX; i++) // INDEX_MAX = 253
	{
		mmc.index++;		
		mmc.setValues();
		
		error = calculateError(mmc);
		
		if(error<errormin)
		{
			errormin = error;
			threshold = mmc.index;
			mu1 = mmc.mu1;
			mu2 = mmc.mu2;
		}
	}
	
	mmc.index = threshold;
	mmc.setValues();
	
	printf("Direct threshold = %d\n", threshold);
	
	threshold = (int)findThreshold((int)mu1, (int)mu2, mmc);
	
	printf("Real threshold = %d\n", threshold);
	
	BYTE val;
	
	unsigned x,y;
	
	// for each line
	for ( y = 0; y < height; y++ )
	{
		lined = dst_bits + y * dst_pitch;
		
		lines = src_bits + y * src_pitch;
		
		// for all pixels
		for ( x = 0; x < width; x++)
		{
			val = (BYTE) ( ( lines[x] >= threshold ) ? 255 : 0 );			
			
			SetPixel(lined, x, &val);
		}		
	}
	
	// Copying the DPI...
	
	FreeImage_SetDotsPerMeterX(dst_dib, FreeImage_GetDotsPerMeterX(src_dib));
	
	FreeImage_SetDotsPerMeterY(dst_dib, FreeImage_GetDotsPerMeterY(src_dib));
	
	return dst_dib;
}

////////////////////////////////////////////////////////////////////////////////
/**
FreeImage error handler
@param fif Format / Plugin responsible for the error 
@param message Error message
*/
void FreeImageErrorHandler(FREE_IMAGE_FORMAT fif, const char *message) {
	printf("\n*** "); 
	printf("%s Format\n", FreeImage_GetFormatFromFIF(fif));
	printf(message);
	printf(" ***\n");
}

////////////////////////////////////////////////////////////////////////////////

/** Generic image loader

  @param lpszPathName Pointer to the full file name
  @param flag Optional load flag constant
  @return Returns the loaded dib if successful, returns NULL otherwise
*/

FIBITMAP* GenericLoader(const char* lpszPathName, int flag)
{	
	FREE_IMAGE_FORMAT fif = FIF_UNKNOWN;
	// check the file signature and deduce its format
	// (the second argument is currently not used by FreeImage)
	
	fif = FreeImage_GetFileType(lpszPathName, 0);
	
	FIBITMAP* dib;
	
	if(fif == FIF_UNKNOWN)
	{
		// no signature ?
		// try to guess the file format from the file extension
		fif = FreeImage_GetFIFFromFilename(lpszPathName);
	}
	
	// check that the plugin has reading capabilities ...
	if((fif != FIF_UNKNOWN) && FreeImage_FIFSupportsReading(fif))
	{
		// ok, let's load the file
		dib = FreeImage_Load(fif, lpszPathName, flag);
		
		// unless a bad file format, we are done !
		if (!dib)
		{
			printf("%s%s%s\n","File \"", lpszPathName, "\" not found.");
			return NULL;
		}
	}	
	
	return dib;
}

////////////////////////////////////////////////////////////////////////////////

int main(int argc, char *argv[]) {
	
	// call this ONLY when linking with FreeImage as a static library
#ifdef FREEIMAGE_LIB
	FreeImage_Initialise();
#endif // FREEIMAGE_LIB
	
	// initialize your own FreeImage error handler
	
	FreeImage_SetOutputMessage(FreeImageErrorHandler);
	
	if(argc != 2) {
		printf("Usage : mix_mod_thres <input_file>\n");
		return 0;
	}
	
	FIBITMAP *dib = GenericLoader(argv[1], 0);
	
	if (dib)
	{		
		// bitmap is successfully loaded!
		
		if (FreeImage_GetImageType(dib) == FIT_BITMAP)
		{
			if (FreeImage_GetBPP(dib) == 8)
			{
				FIBITMAP* dst_dib = ProcessFilter(dib);
				
				if (dst_dib)
				{					
					// save the filtered bitmap
					const char *output_filename = "filtered.tif";
					
					// first, check the output format from the file name or file extension
					FREE_IMAGE_FORMAT out_fif = FreeImage_GetFIFFromFilename(output_filename);
					
					if(out_fif != FIF_UNKNOWN)
					{
						// then save the file
						FreeImage_Save(out_fif, dst_dib, output_filename, 0);
					}
					
					// free the loaded FIBITMAP
					FreeImage_Unload(dst_dib);					
				}
			}
			
			else
				
				printf("%s\n", "Unsupported color mode.");
		}
		
		else // non-FIT_BITMAP images are not supported.
			
			printf("%s\n", "Unsupported color mode.");
		
		FreeImage_Unload(dib);
	}	 
	
	// call this ONLY when linking with FreeImage as a static library
#ifdef FREEIMAGE_LIB
	FreeImage_DeInitialise();
#endif // FREEIMAGE_LIB
	
	return 0;

Краткое описание алгоритма:

Используется вспомогательный С++ - класс (просто для удобства и чтобы сделать код компактнее).

Строим гистограмму. Проходим по ней в цикле. На каждом шаге цикла делаем следующее:

- Делим гистограмму на 2 класса (до текущего индекса и после).

- Находим по сложным формулам всевозможные производные коэффициенты, характеризующие оба класса.

- Находим 2 интегральных коэффициента, характеризующих оба класса - на базе ранее полученных коэффициентов.

- Находим минимальную разницу между 2 интегральными коэффициентами и запоминаем текущий номер гистограммы и ещё 2 параметра ("стандартные отклонения") 2 классов.

Затем в цикле проходим от одного стандартного отклонения до второго, находим опять 2 интегральных коэффициента (по текущему номеру отклонения), подсчитываем разницу между ними - и запоминаем её наименьшее значение. Это и есть найденный порог.

Делаем обычную пороговую бинаризацию по найденному порогу.