29. Фильтр Adaptive Wiener

Вернуться к разделу "Реализация проекта BookScanLib".

29. Фильтр Adaptive Wiener

Фильтр Adaptive Wiener из проекта gamera применяется для размытия фона (подавления шума) серой (8-битной) растровой картинки перед бинаризацией.

Я написал простейшую консольную программу для демонстрации работы Adaptive Wiener. На входе она принимает следующие параметры:

wiener <input_file> <region_size (int)> <noise_variance (double)>

region_size - это размер обрабатывающего окна . 5 по умолчанию.

noise_variance - средний уровень шума. -1 по умолчанию.

Точность дробных - до одной десятой.

На выходе программа выдаёт этот же файл, обработанный этим алгоритмом.

Программа работает только с серыми изображениями.

Всё необходимое для тестирования этой программы (компиляционный проект, готовый экзешник, файл-пример и bat-файлы для тестирования программы) я оформил в небольшой пакет:

Скачать пакет wiener (41 КБ)

(Для работы программы требуется FreeImage dll-библиотека из пакета FreeImage DLL v3.9.2 - см. статью 1. Знакомство с FreeImage).

Рассмотрим исходные коды этой программы:

/*
*
* Copyright (C) 2005 John Ashley Burgoyne and Ichiro Fujinaga
*               2007 Uma Kompella and Christoph Dalitz
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
* more details.
* 
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

/*
 Adaptive Wiener filter for de-noising.

 See:
    
 J. Lim. 2001. *Two-Dimensional Signal Processing.* Englewood
 Cliffs: Prentice Hall.

 *region_size* : default = 5

 The size of the region within which to calculate the filter
 coefficients.

 *noise_variance* : default = -1

 Variance of the noise in the image. If negative, estimated
 automatically as the median of local variances.
*/

// This algorithm was taken from the gamera.sf.net sourcecodes
// and adopted for the FreeImage library
//
//	Copyright (C) 2007-2008:
//	monday2000	monday2000@yandex.ru

#include "FreeImage.h"
#include "Utilities.h"

////////////////////////////////////////////////////////////////////////////////

void mean_filter(FIBITMAP* src_dib, int region_size, double* p_mean)
{
	unsigned width = FreeImage_GetWidth(src_dib);
	
	unsigned height = FreeImage_GetHeight(src_dib);
	
	unsigned pitch = FreeImage_GetPitch(src_dib);
	
	unsigned bpp = FreeImage_GetBPP(src_dib);
	
	unsigned x, y, i, j;
	
	int k_index;
	
	BYTE* src_bits = (BYTE*)FreeImage_GetBits(src_dib); // The image raster
	
	BYTE* src_end_row = src_bits + (height-1) * pitch;
	
	int end_col = width - 1;
	
	BYTE* lines, *linek;
	
	int sum;
	
	unsigned area = region_size * region_size;
	
	
	for (y = 0; y < height; y++)
	{
		lines = src_bits + y * pitch;
		
		for (x = 0; x < width; x++)
		{
			sum = 0;
			
			// kernel processing
			for (i = 0; i < region_size; i++)
				for (j = 0; j < region_size; j++)
				{					
					linek = lines + (i-1) * pitch;
					
					if (linek < src_bits) linek = src_bits;
					if (linek > src_end_row) linek = src_end_row;
					
					k_index = x+j-1;
					
					if (k_index < 0) k_index = 0;
					if (k_index > end_col) k_index = end_col;
					
					sum += linek[k_index];
				}
				
				p_mean[y*width+x] = (double)sum / area;			
		}
	}
}

////////////////////////////////////////////////////////////////////////////////

void variance_filter(FIBITMAP* src_dib, int region_size, double* p_mean, double* p_var)
{
	unsigned width = FreeImage_GetWidth(src_dib);
	
	unsigned height = FreeImage_GetHeight(src_dib);
	
	unsigned pitch = FreeImage_GetPitch(src_dib);
	
	unsigned bpp = FreeImage_GetBPP(src_dib);
	
	unsigned y, x, i, j;
	
	int k_index;
	
	BYTE* src_bits = (BYTE*)FreeImage_GetBits(src_dib); // The image raster
	
	BYTE* src_end_row = src_bits + (height-1) * pitch;
	
	int end_col = width - 1;
	
	BYTE* lines, *linek;
	
	int sum;
	
	unsigned area = region_size * region_size;
	
	
	for (y = 0; y < height; y++)
	{
		lines = src_bits + y * pitch;
		
		for (x = 0; x < width; x++)
		{
			sum = 0;
			
			// kernel processing
			for (i = 0; i < region_size; i++)
				for (j = 0; j < region_size; j++)
				{					
					linek = lines + (i-1) * pitch;
					
					if (linek < src_bits) linek = src_bits;
					if (linek > src_end_row) linek = src_end_row;
					
					k_index = x+j-1;
					
					if (k_index < 0) k_index = 0;
					if (k_index > end_col) k_index = end_col;
					
					sum += linek[k_index]*linek[k_index];
				}				
				
				p_var[y*width+x] = (double)sum / area - p_mean[y*width+x] * p_mean[y*width+x];				
		}
	}
}

////////////////////////////////////////////////////////////////////////////////

int compare_colors(const void *elem1, const void *elem2)
{
	// Comparison function for sorting pixels - needed for the standart qsort function	
	
	return (int)(*((double*)elem2) - *((double*)elem1));
}

double median_filter(FIBITMAP* src_dib, double* p_var)
{
	unsigned width = FreeImage_GetWidth(src_dib);
	
	unsigned height = FreeImage_GetHeight(src_dib);	
	
	int int_size = height*width*sizeof(double);
	
	int area = height*width;
	
	double* p_median = (double*)malloc(int_size);
	
	memcpy(p_median,p_var,int_size);
	
	qsort(p_median, area, sizeof(double), compare_colors);	
	
	double res = p_median[(area - 1) / 2];
	
	free(p_median);
	
	return res; // noise_variance
}

////////////////////////////////////////////////////////////////////////////////

FIBITMAP* ProcessFilter(FIBITMAP* src_dib, int region_size,	double noise_variance)
{
	unsigned width = FreeImage_GetWidth(src_dib);
	
	unsigned height = FreeImage_GetHeight(src_dib);
	
	unsigned src_pitch = FreeImage_GetPitch(src_dib);
	
	unsigned bpp = FreeImage_GetBPP(src_dib);
	
	FIBITMAP* dst_dib = FreeImage_Allocate(width, height, bpp);
	
	if(bpp == 8)
	{
		if(FreeImage_GetColorType(src_dib) == FIC_MINISWHITE) {
			// build an inverted greyscale palette
			RGBQUAD *dst_pal = FreeImage_GetPalette(dst_dib);
			for(int i = 0; i < 256; i++) {
				dst_pal[i].rgbRed = dst_pal[i].rgbGreen =
					dst_pal[i].rgbBlue = (BYTE)(255 - i);
			}
		} else {
			// build a greyscale palette
			RGBQUAD *dst_pal = FreeImage_GetPalette(dst_dib);
			for(int i = 0; i < 256; i++) {
				dst_pal[i].rgbRed = dst_pal[i].rgbGreen =
					dst_pal[i].rgbBlue = (BYTE)i;
			}
		}
	}
	
	unsigned dst_pitch = FreeImage_GetPitch(dst_dib);
	
	BYTE* src_bits = (BYTE*)FreeImage_GetBits(src_dib); // The image raster
	
	BYTE* dst_bits = (BYTE*)FreeImage_GetBits(dst_dib); // The image raster
	
	BYTE* lines, *lined;
	
	unsigned x, y;
	
	BYTE mean;
	
	double variance;
	
	int int_size = height*width*sizeof(double);
	
	double* p_mean = (double*)malloc(int_size);
	double* p_var = (double*)malloc(int_size);
	
	memset(p_mean,0,int_size);
	memset(p_var,0,int_size);
	
	mean_filter(src_dib, region_size, p_mean);	
	
	variance_filter(src_dib, region_size, p_mean, p_var);	
	
    // Compute noise variance if needed.
    if (noise_variance < 0) noise_variance = median_filter(src_dib, p_var);	
	
	
    for (y = 0; y < height; y++)
	{
		lines = src_bits + y * src_pitch;
		
		lined = dst_bits + y * dst_pitch;
		
        for (x = 0; x < width; x++)
		{
			mean = p_mean[y*width+x];
			
			variance = p_var[y*width+x];
			
            // The estimate of noise variance will never be perfect, but in
            // theory, it would be impossible for any region to have a local
            // variance less than it. The following check eliminates that
            // theoretical impossibility and has a side benefit of preventing
            // division by zero.
            if (variance < noise_variance)
				
				lined[x] = mean;
			
			else
			{
                double multiplier = (variance - noise_variance) / variance;
                
				double value = (double)lines[x];
                
				lined[x] = (BYTE)(mean + multiplier * (value - mean));
            }
        }
    }
	
	// Copying the DPI...
	
	FreeImage_SetDotsPerMeterX(dst_dib, FreeImage_GetDotsPerMeterX(src_dib));
	
	FreeImage_SetDotsPerMeterY(dst_dib, FreeImage_GetDotsPerMeterY(src_dib));
	
	free(p_mean);
	
	free(p_var);
	
	return dst_dib;
}

////////////////////////////////////////////////////////////////////////////////
/**
FreeImage error handler
@param fif Format / Plugin responsible for the error 
@param message Error message
*/
void FreeImageErrorHandler(FREE_IMAGE_FORMAT fif, const char *message) {
	printf("\n*** "); 
	printf("%s Format\n", FreeImage_GetFormatFromFIF(fif));
	printf(message);
	printf(" ***\n");
}

////////////////////////////////////////////////////////////////////////////////

/** Generic image loader

  @param lpszPathName Pointer to the full file name
  @param flag Optional load flag constant
  @return Returns the loaded dib if successful, returns NULL otherwise
*/

FIBITMAP* GenericLoader(const char* lpszPathName, int flag)
{	
	FREE_IMAGE_FORMAT fif = FIF_UNKNOWN;
	// check the file signature and deduce its format
	// (the second argument is currently not used by FreeImage)
	
	fif = FreeImage_GetFileType(lpszPathName, 0);
	
	FIBITMAP* dib;
	
	if(fif == FIF_UNKNOWN)
	{
		// no signature ?
		// try to guess the file format from the file extension
		fif = FreeImage_GetFIFFromFilename(lpszPathName);
	}
	
	// check that the plugin has reading capabilities ...
	if((fif != FIF_UNKNOWN) && FreeImage_FIFSupportsReading(fif))
	{
		// ok, let's load the file
		dib = FreeImage_Load(fif, lpszPathName, flag);
		
		// unless a bad file format, we are done !
		if (!dib)
		{
			printf("%s%s%s\n","File \"", lpszPathName, "\" not found.");
			return NULL;
		}
	}	
	
	return dib;
}

////////////////////////////////////////////////////////////////////////////////

int main(int argc, char *argv[]) {
	
	// call this ONLY when linking with FreeImage as a static library
#ifdef FREEIMAGE_LIB
	FreeImage_Initialise();
#endif // FREEIMAGE_LIB
	
	// initialize your own FreeImage error handler
	
	FreeImage_SetOutputMessage(FreeImageErrorHandler);
	
	if(argc != 4) {
		printf("Usage : wiener <input_file> <region_size (int)> <noise_variance (double)>\n");
		return 0;
	}
	
	FIBITMAP *dib = GenericLoader(argv[1], 0);
	
	int region_size = atoi(argv[2]);
	
	double noise_variance = atof(argv[3]);
	
	if (dib)
	{		
		// bitmap is successfully loaded!
		
		if (FreeImage_GetImageType(dib) == FIT_BITMAP)
		{
			if (FreeImage_GetBPP(dib) == 8)
			{
				FIBITMAP* dst_dib = ProcessFilter(dib, region_size, noise_variance);
				
				if (dst_dib)
				{					
					// save the filtered bitmap
					const char *output_filename = "filtered.tif";
					
					// first, check the output format from the file name or file extension
					FREE_IMAGE_FORMAT out_fif = FreeImage_GetFIFFromFilename(output_filename);
					
					if(out_fif != FIF_UNKNOWN)
					{
						// then save the file
						FreeImage_Save(out_fif, dst_dib, output_filename, 0);
					}
					
					// free the loaded FIBITMAP
					FreeImage_Unload(dst_dib);					
				}
			}
			
			else
				
				printf("%s\n", "Unsupported color mode.");
		}
		
		else // non-FIT_BITMAP images are not supported.
			
			printf("%s\n", "Unsupported color mode.");
		
		FreeImage_Unload(dib);
	}	 
	
	// call this ONLY when linking with FreeImage as a static library
#ifdef FREEIMAGE_LIB
	FreeImage_DeInitialise();
#endif // FREEIMAGE_LIB
	
	return 0;
}

Краткое описание алгоритма:

Составляются 3 вспомогательных изображения. Первое заполняется так: квадратная апертура пробегает по всем пикселям, суммирует апертуру, делит на площадь и вставляет в результат. Получается усреднённое изображение.

Второе заполняется так: квадратная апертура пробегает по всем пикселям, суммирует квадраты в апертуре, делит на площадь и вычитает квадрат соответствующего пикселя из первого вспомогательного изображения и вставляет в результат. Получается отклонённое изображение.

Второе копируется в третье. Третье сортируется по возрастанию. Берём точку из середины (т.е. получаем медиану отклонения) и удаляем третье изображение.

Затем в основном цикле проходим по всем пикселям. Если отклонение данного пикселя меньше медианы - то просто переносим данный пиксель в результат, если больше - то вычисляем новый пиксель по формуле с участием текущего пикселя, текущего усреднения, текущего отклонения и медианы - и вставляем его в результат.

Чисто визуально можно увидеть, что этот алгоритм размывает фон за пределами некоей "зоны отчуждения" вокруг каждой буквы - что даёт 100% гарантию не-затрагивания букв при размытии.