From f3f97b6223ef71428d21ae5bc07af2aed694554e Mon Sep 17 00:00:00 2001 From: chrox Date: Tue, 16 Oct 2012 10:02:38 +0800 Subject: [PATCH] add pdf page reflow Conflicts: pdfreader.lua --- Makefile | 6 +- k2pdfopt.c | 6338 ++++++++++++++++++++++++++++++++++++++++++++++++++++ k2pdfopt.h | 33 + pdf.c | 93 + 4 files changed, 6469 insertions(+), 1 deletion(-) create mode 100644 k2pdfopt.c create mode 100644 k2pdfopt.h diff --git a/Makefile b/Makefile index eed248c57..9f2c372fd 100644 --- a/Makefile +++ b/Makefile @@ -112,13 +112,14 @@ POPENNSLIB := $(POPENNSDIR)/libpopen_noshell.a all: kpdfview VERSION?=$(shell git describe HEAD) -kpdfview: kpdfview.o einkfb.o pdf.o blitbuffer.o drawcontext.o input.o $(POPENNSLIB) util.o ft.o lfs.o mupdfimg.o $(MUPDFLIBS) $(THIRDPARTYLIBS) $(LUALIB) djvu.o $(DJVULIBS) cre.o $(CRENGINELIBS) pic.o pic_jpeg.o +kpdfview: kpdfview.o einkfb.o pdf.o k2pdfopt.o blitbuffer.o drawcontext.o input.o $(POPENNSLIB) util.o ft.o lfs.o mupdfimg.o $(MUPDFLIBS) $(THIRDPARTYLIBS) $(LUALIB) djvu.o $(DJVULIBS) cre.o $(CRENGINELIBS) pic.o pic_jpeg.o echo $(VERSION) > git-rev $(CC) \ $(CFLAGS) \ kpdfview.o \ einkfb.o \ pdf.o \ + k2pdfopt.o \ blitbuffer.o \ drawcontext.o \ input.o \ @@ -155,6 +156,9 @@ ft.o: %.o: %.c $(THIRDPARTYLIBS) kpdfview.o pdf.o blitbuffer.o util.o drawcontext.o einkfb.o input.o mupdfimg.o: %.o: %.c $(CC) -c $(KPDFREADER_CFLAGS) $(EMU_CFLAGS) -I$(LFSDIR)/src $< -o $@ +k2pdfopt.o: %.o: %.c + $(CC) -c -I$(MUPDFDIR)/ $(CFLAGS) $< -o $@ + djvu.o: %.o: %.c $(CC) -c $(KPDFREADER_CFLAGS) -I$(DJVUDIR)/ $< -o $@ diff --git a/k2pdfopt.c b/k2pdfopt.c new file mode 100644 index 000000000..d982d6f9f --- /dev/null +++ b/k2pdfopt.c @@ -0,0 +1,6338 @@ +/* + ** k2pdfopt.c K2pdfopt optimizes PDF/DJVU files for mobile e-readers + ** (e.g. the Kindle) and smartphones. It works well on + ** multi-column PDF/DJVU files. K2pdfopt is freeware. + ** + ** Copyright (C) 2012 http://willus.com + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU Affero General Public License as + ** published by the Free Software Foundation, either version 3 of the + ** License, or (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU Affero General Public License for more details. + ** + ** You should have received a copy of the GNU Affero General Public License + ** along with this program. If not, see . + ** + /* + ** WILLUSDEBUGX flags: + ** 1 = Generic + ** 2 = breakinfo row analysis + ** 4 = word wrapping + ** 8 = word wrapping II + ** 16 = hyphens + ** 32 = OCR + ** + */ +// #define WILLUSDEBUGX 32 +// #define WILLUSDEBUG +#include "k2pdfopt.h" +#include +#include +#include +#include +#include +#include + +#define HAVE_MUPDF + +#define VERSION "v1.51" +#define GRAYLEVEL(r,g,b) ((int)(((r)*0.3+(g)*0.59+(b)*0.11)*1.002)) +#if (defined(WIN32) || defined(WIN64)) +#define TTEXT_BOLD ANSI_WHITE +#define TTEXT_NORMAL ANSI_NORMAL +#define TTEXT_BOLD2 ANSI_YELLOW +#define TTEXT_INPUT ANSI_GREEN +#define TTEXT_WARN ANSI_RED +#define TTEXT_HEADER ANSI_CYAN +#define TTEXT_MAGENTA ANSI_MAGENTA +#else +#define TTEXT_BOLD "\x1b[0m\x1b[34m" +#define TTEXT_NORMAL "\x1b[0m" +#define TTEXT_BOLD2 "\x1b[0m\x1b[33m" +#define TTEXT_INPUT "\x1b[0m\x1b[32m" +#define TTEXT_WARN "\x1b[0m\x1b[31m" +#define TTEXT_HEADER "\x1b[0m\x1b[36m" +#define TTEXT_MAGENTA "\x1b[0m\x1b[35m" +#endif + +#ifndef __ANSI_H__ +#define ANSI_RED "\x1b[1m\x1b[31m" +#define ANSI_GREEN "\x1b[1m\x1b[32m" +#define ANSI_YELLOW "\x1b[1m\x1b[33m" +#define ANSI_BROWN "\x1b[0m\x1b[33m" +#define ANSI_BLUE "\x1b[1m\x1b[34m" +#define ANSI_MAGENTA "\x1b[1m\x1b[35m" +#define ANSI_CYAN "\x1b[1m\x1b[36m" +#define ANSI_WHITE "\x1b[1m\x1b[37m" +#define ANSI_NORMAL "\x1b[0m\x1b[37m" +#define ANSI_SAVE_CURSOR "\x1b[s" +#define ANSI_RESTORE_CURSOR "\x1b[u" +#define ANSI_CLEAR_TO_END "\x1b[K" +#define ANSI_BEGIN_LINE "\x1b[80D" +#define ANSI_UP_ONE_LINE "\x1b[1A" +#define ANSI_HOME "\x1b[2J\x1b[0;0;H" +#define __ANSI_H__ +#endif + +/* bmp.c */ +#define WILLUSBITMAP_TYPE_NATIVE 0 +#define WILLUSBITMAP_TYPE_WIN32 1 + +#ifdef PI +#undef PI +#endif +/* + ** Constants from the front of the CRC standard math tables + ** (Accuracy = 50 digits) + */ +/* The 50 digits cause problems with MPW's Mr. C on the Macintosh, */ +/* so I've truncated to 20 digits. */ +/* + #define PI 3.14159265358979323846264338327950288419716939937511 + #define SQRT2 1.41421356237309504880168872420969807856967187537695 + #define SQRT3 1.73205080756887729352744634150587236694280525381039 + #define LOG10E 0.43429448190325182765112891891660508229439700580367 + */ +#define PI 3.1415926535897932384 +#define SQRT2 1.4142135623730950488 +#define SQRT3 1.7320508075688772935 +#define LOG10E 0.4342944819032518276 +#define DBPERNEP (20.*LOG10E) + +#define SRC_TYPE_PDF 1 +#define SRC_TYPE_DJVU 2 +#define SRC_TYPE_OTHER 3 + +/* DATA STRUCTURES */ + +typedef struct { + int page; /* Source page */ + double rot_deg; /* Source rotation (happens first) */ + double x0, y0; /* x0,y0, in points, of lower left point on rectangle */ + double w, h; /* width and height of rectangle in points */ + double scale; /* Scale rectangle by this factor on destination page */ + double x1, y1; /* (x,y) position of lower left point on destination page, in points */ +} PDFBOX; + +typedef struct { + PDFBOX *box; + int n; + int na; +} PDFBOXES; + +typedef struct { + int pageno; /* Source page number */ + double page_rot_deg; /* Source page rotation */ + PDFBOXES boxes; +} PAGEINFO; + +typedef struct { + int ch; /* Hyphen starting point -- < 0 for no hyphen */ + int c2; /* End of end region if hyphen is erased */ + int r1; /* Top of hyphen */ + int r2; /* Bottom of hyphen */ +} HYPHENINFO; + +typedef struct { + int c1, c2; /* Left and right columns */ + int r1, r2; /* Top and bottom of region in pixels */ + int rowbase; /* Baseline of row */ + int gap; /* Gap to next region in pixels */ + int rowheight; /* text + gap */ + int capheight; + int h5050; + int lcheight; + HYPHENINFO hyphen; +} TEXTROW; + +typedef struct { + TEXTROW *textrow; + int rhmean_pixels; /* Mean row height (text) */ + int centered; /* Is this set of rows centered? */ + int n, na; +} BREAKINFO; + +typedef struct { + int red[256]; + int green[256]; + int blue[256]; + unsigned char *data; /* Top to bottom in native type, bottom to */ + /* top in Win32 type. */ + int width; /* Width of image in pixels */ + int height; /* Height of image in pixels */ + int bpp; /* Bits per pixel (only 8 or 24 allowed) */ + int size_allocated; + int type; /* See defines above for WILLUSBITMAP_TYPE_... */ +} WILLUSBITMAP; + +typedef struct { + int r1, r2; /* row position from top of bmp, inclusive */ + int c1, c2; /* column positions, inclusive */ + int rowbase; /* Baseline of text row */ + int capheight; /* capital letter height */ + int h5050; + int lcheight; /* lower-case letter height */ + int bgcolor; /* 0 - 255 */ + HYPHENINFO hyphen; + WILLUSBITMAP *bmp; + WILLUSBITMAP *bmp8; + WILLUSBITMAP *marked; +} BMPREGION; + +typedef struct { + WILLUSBITMAP bmp; + int rows; + int published_pages; + int bgcolor; + int fit_to_page; + int wordcount; + char debugfolder[256]; +} MASTERINFO; + +static int verbose = 0; +static int debug = 0; + +#define DEFAULT_WIDTH 600 +#define DEFAULT_HEIGHT 800 +#define MIN_REGION_WIDTH_INCHES 1.0 +#define SRCROT_AUTO -999. +#define SRCROT_AUTOEP -998. + +/* + ** Blank Area Threshold Widths--average black pixel width, in inches, that + ** prevents a region from being determined as "blank" or clear. + */ +static double gtc_in = .005; // detecting gap between columns +static double gtr_in = .006; // detecting gap between rows +static double gtw_in = .0015; // detecting gap between words +// static double gtm_in=.005; // detecting margins for trimming +static int src_left_to_right = 1; +static int src_whitethresh = -1; +static int dst_dpi = 167; +static int fit_columns = 1; +static int src_dpi = 300; +static int dst_width = DEFAULT_WIDTH; /* Full device width in pixels */ +static int dst_height = DEFAULT_HEIGHT; +static int dst_userwidth = DEFAULT_WIDTH; +static int dst_userheight = DEFAULT_HEIGHT; +static int dst_justify = -1; // 0 = left, 1 = center +static int dst_figure_justify = -1; // -1 = same as dst_justify. 0=left 1=center 2=right +static double dst_min_figure_height_in = 0.75; +static int dst_fulljustify = -1; // 0 = no, 1 = yes +static int dst_color = 0; +static int dst_landscape = 0; +static double dst_mar = 0.02; +static double dst_martop = -1.0; +static double dst_marbot = -1.0; +static double dst_marleft = -1.0; +static double dst_marright = -1.0; +static double min_column_gap_inches = 0.1; +static double max_column_gap_inches = 1.5; // max gap between columns +static double min_column_height_inches = 1.5; +static double mar_top = -1.0; +static double mar_bot = -1.0; +static double mar_left = -1.0; +static double mar_right = -1.0; +static double max_region_width_inches = 3.6; /* Max viewable width (device width minus margins) */ +static int max_columns = 2; +static double column_gap_range = 0.33; +static double column_offset_max = 0.2; +static double column_row_gap_height_in = 1. / 72.; +static int text_wrap = 1; +static double word_spacing = 0.375; +static double display_width_inches = 3.6; /* Device width = dst_width / dst_dpi */ +static int column_fitted = 0; +static double lm_org, bm_org, tm_org, rm_org, dpi_org; +static double contrast_max = 2.0; +static int show_marked_source = 0; +static double defect_size_pts = 1.0; +static double max_vertical_gap_inches = 0.25; +static double vertical_multiplier = 1.0; +static double vertical_line_spacing = -1.2; +static double vertical_break_threshold = 1.75; +static int erase_vertical_lines = 0; +static int k2_hyphen_detect = 1; +static int dst_fit_to_page = 0; +/* + ** Undocumented cmd-line args + */ +static double no_wrap_ar_limit = 0.2; /* -arlim */ +static double no_wrap_height_limit_inches = 0.55; /* -whmax */ +static double little_piece_threshold_inches = 0.5; /* -rwmin */ +/* + ** Keeping track of vertical gaps + */ +static double last_scale_factor_internal = -1.0; +/* indicates desired vert. gap before next region is added. */ +static int last_rowbase_internal; /* Pixels between last text row baseline and current end */ +/* of destination bitmap. */ +static int beginning_gap_internal = -1; +static int last_h5050_internal = -1; +static int just_flushed_internal = 0; +static int gap_override_internal; /* If > 0, apply this gap in wrapbmp_flush() and then reset. */ + +void adjust_params_init(void); +void set_region_widths(void); +static void mark_source_page(BMPREGION *region, int caller_id, int mark_flags); +static void fit_column_to_screen(double column_width_inches); +static void restore_output_dpi(void); +void adjust_contrast(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey, int *white); +static int bmpregion_row_black_count(BMPREGION *region, int r0); +static void bmpregion_row_histogram(BMPREGION *region); +static int bmpregion_find_multicolumn_divider(BMPREGION *region, + int *row_black_count, BMPREGION *pageregion, int *npr, int *colcount, + int *rowcount); +static int bmpregion_column_height_and_gap_test(BMPREGION *column, + BMPREGION *region, int r1, int r2, int cmid, int *colcount, + int *rowcount); +static int bmpregion_is_clear(BMPREGION *region, int *row_is_clear, + double gt_in); +void bmpregion_multicolumn_add(BMPREGION *region, MASTERINFO *masterinfo, + int level, PAGEINFO *pageinfo, int colgap0_pixels); +static void bmpregion_vertically_break(BMPREGION *region, + MASTERINFO *masterinfo, int allow_text_wrapping, double force_scale, + int *colcount, int *rowcount, PAGEINFO *pageinfo, int colgap_pixels, + int ncols); +static void bmpregion_add(BMPREGION *region, BREAKINFO *breakinfo, + MASTERINFO *masterinfo, int allow_text_wrapping, int trim_flags, + int allow_vertical_breaks, double force_scale, int justify_flags, + int caller_id, int *colcount, int *rowcount, PAGEINFO *pageinfo, + int mark_flags, int rowbase_delta); +static void dst_add_gap_src_pixels(char *caller, MASTERINFO *masterinfo, + int pixels); +static void dst_add_gap(MASTERINFO *masterinfo, double inches); +static void bmp_src_to_dst(MASTERINFO *masterinfo, WILLUSBITMAP *src, + int justification_flags, int whitethresh, int nocr, int dpi); +static void bmp_fully_justify(WILLUSBITMAP *jbmp, WILLUSBITMAP *src, int nocr, + int whitethresh, int just); +#ifdef HAVE_OCR +static void ocrwords_fill_in(OCRWORDS *words,WILLUSBITMAP *src,int whitethresh,int dpi); +#endif +static void bmpregion_trim_margins(BMPREGION *region, int *colcount0, + int *rowcount0, int flags); +static void bmpregion_hyphen_detect(BMPREGION *region); +#if (WILLUSDEBUGX & 6) +static void breakinfo_echo(BREAKINFO *bi); +#endif +#if (defined(WILLUSDEBUGX) || defined(WILLUSDEBUG)) +static void bmpregion_write(BMPREGION *region,char *filename); +#endif +static int height2_calc(int *rc, int n); +static void trim_to(int *count, int *i1, int i2, double gaplen); +static void bmpregion_analyze_justification_and_line_spacing(BMPREGION *region, + BREAKINFO *breakinfo, MASTERINFO *masterinfo, int *colcount, + int *rowcount, PAGEINFO *pageinfo, int allow_text_wrapping, + double force_scale); +static int bmpregion_is_centered(BMPREGION *region, BREAKINFO *breakinfo, + int i1, int i2, int *textheight); +static double median_val(double *x, int n); +static void bmpregion_find_vertical_breaks(BMPREGION *region, + BREAKINFO *breakinfo, int *colcount, int *rowcount, double apsize_in); +static void textrow_assign_bmpregion(TEXTROW *textrow, BMPREGION *region); +static void breakinfo_compute_row_gaps(BREAKINFO *breakinfo, int r2); +static void breakinfo_compute_col_gaps(BREAKINFO *breakinfo, int c2); +static void breakinfo_remove_small_col_gaps(BREAKINFO *breakinfo, int lcheight, + double mingap); +static void breakinfo_remove_small_rows(BREAKINFO *breakinfo, double fracrh, + double fracgap, BMPREGION *region, int *colcount, int *rowcount); +static void breakinfo_alloc(int index, BREAKINFO *breakinfo, int nrows); +static void breakinfo_free(int index, BREAKINFO *breakinfo); +static void breakinfo_sort_by_gap(BREAKINFO *breakinfo); +static void breakinfo_sort_by_row_position(BREAKINFO *breakinfo); +static void bmpregion_one_row_find_breaks(BMPREGION *region, + BREAKINFO *breakinfo, int *colcount, int *rowcount, int add_to_dbase); +void wrapbmp_init(void); +static int wrapbmp_ends_in_hyphen(void); +static void wrapbmp_set_color(int is_color); +static void wrapbmp_free(void); +static void wrapbmp_set_maxgap(int value); +static int wrapbmp_width(void); +static int wrapbmp_remaining(void); +static void wrapbmp_add(BMPREGION *region, int gap, int line_spacing, int rbase, + int gio, int justification_flags); +static void wrapbmp_flush(MASTERINFO *masterinfo, int allow_full_justify, + PAGEINFO *pageinfo, int use_bgi); +static void wrapbmp_hyphen_erase(void); +static void bmpregion_one_row_wrap_and_add(BMPREGION *region, + BREAKINFO *breakinfo, int index, int i0, int i1, MASTERINFO *masterinfo, + int justflags, int *colcount, int *rowcount, PAGEINFO *pageinfo, + int rheight, int mean_row_gap, int rowbase, int marking_flags, int pi); +static void white_margins(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey); +static void get_white_margins(BMPREGION *region); +/* Bitmap orientation detection functions */ +static double bitmap_orientation(WILLUSBITMAP *bmp); +static double bmp_inflections_vertical(WILLUSBITMAP *srcgrey, int ndivisions, + int delta, int *wthresh); +static double bmp_inflections_horizontal(WILLUSBITMAP *srcgrey, int ndivisions, + int delta, int *wthresh); +static int inflection_count(double *x, int n, int delta, int *wthresh); +static void pdfboxes_init(PDFBOXES *boxes); +static void pdfboxes_free(PDFBOXES *boxes); +/* + static void pdfboxes_add_box(PDFBOXES *boxes,PDFBOX *box); + static void pdfboxes_delete(PDFBOXES *boxes,int n); + */ +static void word_gaps_add(BREAKINFO *breakinfo, int lcheight, + double *median_gap); +static void bmp_detect_vertical_lines(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp, + double dpi, double minwidth_in, double maxwidth_in, double minheight_in, + double anglemax_deg, int white_thresh); +static int vert_line_erase(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp, + WILLUSBITMAP *tmp, int row0, int col0, double tanthx, + double minheight_in, double minwidth_in, double maxwidth_in, + int white_thresh); +static void willus_dmem_alloc_warn(int index, void **ptr, int size, + char *funcname, int exitcode); +static void willus_dmem_free(int index, double **ptr, char *funcname); +static int willus_mem_alloc_warn(void **ptr, int size, char *name, int exitcode); +static void willus_mem_free(double **ptr, char *name); +static void sortd(double *x, int n); +static void sorti(int *x, int n); +static void bmp_init(WILLUSBITMAP *bmap); +static int bmp_alloc(WILLUSBITMAP *bmap); +static void bmp_free(WILLUSBITMAP *bmap); +static int bmp_copy(WILLUSBITMAP *dest, WILLUSBITMAP *src); +static void bmp_fill(WILLUSBITMAP *bmp,int r,int g,int b); +static int bmp_bytewidth(WILLUSBITMAP *bmp); +static unsigned char *bmp_rowptr_from_top(WILLUSBITMAP *bmp, int row); +static void bmp_more_rows(WILLUSBITMAP *bmp, double ratio, int pixval); +static int bmp_is_grayscale(WILLUSBITMAP *bmp); +static int bmp_resample(WILLUSBITMAP *dest, WILLUSBITMAP *src, double x1, + double y1, double x2, double y2, int newwidth, int newheight); +static void bmp_contrast_adjust(WILLUSBITMAP *dest,WILLUSBITMAP *src,double contrast); +static void bmp_convert_to_greyscale_ex(WILLUSBITMAP *dst, WILLUSBITMAP *src); +static int bmpmupdf_pixmap_to_bmp(WILLUSBITMAP *bmp, fz_context *ctx, + fz_pixmap *pixmap); + +static MASTERINFO _masterinfo, *masterinfo; +static WILLUSBITMAP _bmp, *bmp; +static int master_bmp_inited = 0; +static int master_bmp_width = 0; +static int master_bmp_height = 0; + +void k2pdfopt_mupdf_reflow_bmp(fz_context *ctx, fz_pixmap *pix, double rot_deg) { + PAGEINFO _pageinfo, *pageinfo; + WILLUSBITMAP _src, *src; + WILLUSBITMAP _srcgrey, *srcgrey; + int i, status, white, pw, np, src_type, or_detect, orep_detect, + second_time_through; + int pagecount, pagestep, pages_done, is_gray, dpi; + double size, area_ratio, bormean; + + masterinfo = &_masterinfo; + masterinfo->debugfolder[0] = '\0'; + second_time_through = 0; + white = src_whitethresh; /* Will be set by adjust_contrast() or set to src_whitethresh */ + dpi = src_dpi; + adjust_params_init(); + set_region_widths(); + + bmp = &_bmp; + src = &_src; + srcgrey = &_srcgrey; + + if (master_bmp_inited == 0) { + bmp_init(&masterinfo->bmp); + master_bmp_inited = 1; + } + // free last used master bmp + bmp_free(&masterinfo->bmp); + + bmp_init(&masterinfo->bmp); + bmp_init(src); + bmp_init(srcgrey); + + wrapbmp_init(); + + int ii; + masterinfo->bmp.bpp = 8; + for (ii = 0; ii < 256; ii++) + masterinfo->bmp.red[ii] = masterinfo->bmp.blue[ii] = + masterinfo->bmp.green[ii] = ii; + masterinfo->rows = 0; + masterinfo->bmp.width = dst_width; + area_ratio = 8.5 * 11.0 * dst_dpi * dst_dpi / (dst_width * dst_height); + masterinfo->bmp.height = dst_height * area_ratio * 1.5; + bmp_alloc(&masterinfo->bmp); + bmp_fill(&masterinfo->bmp, 255, 255, 255); + + BMPREGION region; + + status = bmpmupdf_pixmap_to_bmp(src, ctx, pix); + bmp_copy(srcgrey, src); + adjust_contrast(src, srcgrey, &white); + white_margins(src, srcgrey); + + region.r1 = 0; + region.r2 = srcgrey->height - 1; + region.c1 = 0; + region.c2 = srcgrey->width - 1; + region.bgcolor = white; + region.bmp = src; + region.bmp8 = srcgrey; + + masterinfo->bgcolor = white; + masterinfo->fit_to_page = dst_fit_to_page; + /* Check to see if master bitmap might need more room */ + bmpregion_multicolumn_add(®ion, masterinfo, 1, pageinfo, + pages_done == 0. ? 0. : (int) (0.25 * src_dpi + .5)); + + master_bmp_width = masterinfo->bmp.width; + master_bmp_height = masterinfo->rows; + + bmp_free(srcgrey); + bmp_free(src); +} + +void k2pdfopt_mupdf_rfbmp_size(int *width, int *height) { + *width = master_bmp_width; + *height = master_bmp_height; +} + +void k2pdfopt_mupdf_rfbmp_ptr(unsigned char** bmp_ptr_ptr) { + *bmp_ptr_ptr = masterinfo->bmp.data; +} + +/* ansi.c */ +#define MAXSIZE 8000 + +static int ansi_on=1; +static char ansi_buffer[MAXSIZE]; + +int avprintf(FILE *f, char *fmt, va_list args) + +{ + int status; + { + if (!ansi_on) { + status = vsprintf(ansi_buffer, fmt, args); + ansi_parse(f, ansi_buffer); + } else + status = vfprintf(f, fmt, args); + } + return (status); +} + +int aprintf(char *fmt, ...) + +{ + va_list args; + int status; + + va_start(args, fmt); + status = avprintf(stdout, fmt, args); + va_end(args); + return (status); +} + +/* + ** Ensure that max_region_width_inches will be > MIN_REGION_WIDTH_INCHES + ** + ** Should only be called once, after all params are set. + ** + */ +void adjust_params_init(void) + +{ + if (dst_landscape) { + dst_width = dst_userheight; + dst_height = dst_userwidth; + } else { + dst_width = dst_userwidth; + dst_height = dst_userheight; + } + if (dst_mar < 0.) + dst_mar = 0.02; + if (dst_martop < 0.) + dst_martop = dst_mar; + if (dst_marbot < 0.) + dst_marbot = dst_mar; + if (dst_marleft < 0.) + dst_marleft = dst_mar; + if (dst_marright < 0.) + dst_marright = dst_mar; + if ((double) dst_width / dst_dpi - dst_marleft + - dst_marright< MIN_REGION_WIDTH_INCHES) { + int olddpi; + olddpi = dst_dpi; + dst_dpi = (int) ((double) dst_width + / (MIN_REGION_WIDTH_INCHES + dst_marleft + dst_marright)); + aprintf( + TTEXT_BOLD2 "Output DPI of %d is too large. Reduced to %d." TTEXT_NORMAL "\n\n", + olddpi, dst_dpi); + } +} + +void set_region_widths(void) + +{ + max_region_width_inches = display_width_inches = (double) dst_width + / dst_dpi; + max_region_width_inches -= (dst_marleft + dst_marright); + /* This is ensured by adjust_dst_dpi() as of v1.17 */ + /* + if (max_region_width_inches < MIN_REGION_WIDTH_INCHES) + max_region_width_inches = MIN_REGION_WIDTH_INCHES; + */ +} + +/* + ** Process full source page bitmap into rectangular regions and add + ** to the destination bitmap. Start by looking for columns. + ** + ** level = recursion level. First call = 1, then 2, ... + ** + */ +void bmpregion_multicolumn_add(BMPREGION *region, MASTERINFO *masterinfo, + int level, PAGEINFO *pageinfo, int colgap0_pixels) + +{ + static char *funcname = "bmpregion_multicolumn_add"; + int *row_black_count; + int r2, rh, r0, cgr, maxlevel; + BMPREGION *srcregion, _srcregion; + BMPREGION *newregion, _newregion; + BMPREGION *pageregion; + double minh; + int ipr, npr, na; + int *colcount, *rowcount; + + willus_dmem_alloc_warn(1, (void **) &colcount, + sizeof(int) * (region->c2 + 1), funcname, 10); + willus_dmem_alloc_warn(2, (void **) &rowcount, + sizeof(int) * (region->r2 + 1), funcname, 10); + maxlevel = max_columns / 2; + if (debug) + printf("@bmpregion_multicolumn_add (%d,%d) - (%d,%d) lev=%d\n", + region->c1, region->r1, region->c2, region->r2, level); + newregion = &_newregion; + (*newregion) = (*region); + /* Establish colcount, rowcount arrays */ + bmpregion_trim_margins(newregion, colcount, rowcount, 0xf); + (*newregion) = (*region); + srcregion = &_srcregion; + (*srcregion) = (*region); + /* How many page regions do we need? */ + minh = min_column_height_inches; + if (minh < .01) + minh = .1; + na = (srcregion->r2 - srcregion->r1 + 1) / src_dpi / minh; + if (na < 1) + na = 1; + na += 16; + /* Allocate page regions */ + willus_dmem_alloc_warn(3, (void **) &pageregion, sizeof(BMPREGION) * na, + funcname, 10); +#ifdef COMMENT + mindr=src_dpi*.045; /* src->height/250; */ + if (mindr<1) + mindr=1; +#endif +// white=250; +// for (i=0;iwidth;i++) +// colcount[i]=0; + if (debug) + bmpregion_row_histogram(region); + + /* + ** Store information about which rows are mostly clear for future + ** processing (saves processing time). + */ + willus_dmem_alloc_warn(4, (void **) &row_black_count, + region->bmp8->height * sizeof(int), funcname, 10); + for (cgr = 0, r0 = 0; r0 < region->bmp8->height; r0++) { + row_black_count[r0] = bmpregion_row_black_count(region, r0); + if (row_black_count[r0] == 0) + cgr++; + /* + int dr; + dr=mindr; + if (r0+dr>region->bmp8->height) + dr=region->bmp8->height-r0; + if ((row_is_clear[r0]=bmpregion_row_mostly_white(region,r0,dr))!=0) + cgr++; + */ +// printf("row_is_clear[%d]=%d\n",r0,row_is_clear[r0]); + } + if (verbose) + printf("%d clear rows.\n", cgr); + + if (max_columns == 1) { + pageregion[0] = (*srcregion); + /* Set c1 negative to indicate full span */ + pageregion[0].c1 = -1 - pageregion[0].c1; + npr = 1; + } else + /* Find all column dividers in source region and store sequentially in pageregion[] array */ + for (npr = 0, rh = 0; srcregion->r1 <= srcregion->r2; srcregion->r1 += + rh) { + static char *ierr = + TTEXT_WARN "\n\aInternal error--not enough allocated regions.\n" + "Please inform the developer at willus.com.\n\n" TTEXT_NORMAL; + if (npr >= na - 3) { + aprintf("%s", ierr); + break; + } + rh = bmpregion_find_multicolumn_divider(srcregion, row_black_count, + pageregion, &npr, colcount, rowcount); + if (verbose) + printf("rh=%d/%d\n", rh, region->r2 - region->r1 + 1); + } + + /* Process page regions by column */ + if (debug) + printf("Page regions: %d\n", npr); + r2 = -1; + for (ipr = 0; ipr < npr;) { + int r20, jpr, colnum, colgap_pixels; + + for (colnum = 1; colnum <= 2; colnum++) { + if (debug) { + printf("ipr = %d of %d...\n", ipr, npr); + printf("COLUMN %d...\n", colnum); + } + r20 = r2; + for (jpr = ipr; jpr < npr; jpr += 2) { + /* If we get to a page region that spans the entire source, stop */ + if (pageregion[jpr].c1 < 0) + break; + /* See if we should suspend this column and start displaying the next one */ + if (jpr > ipr) { + double cpdiff, cdiv1, cdiv2, rowgap1_in, rowgap2_in; + + if (column_offset_max < 0.) + break; + /* Did column divider move too much? */ + cdiv1 = (pageregion[jpr].c2 + pageregion[jpr + 1].c1) / 2.; + cdiv2 = (pageregion[jpr - 2].c2 + pageregion[jpr - 1].c1) + / 2.; + cpdiff = fabs( + (double) (cdiv1 - cdiv2) + / (srcregion->c2 - srcregion->c1 + 1)); + if (cpdiff > column_offset_max) + break; + /* Is gap between this column region and next column region too big? */ + rowgap1_in = (double) (pageregion[jpr].r1 + - pageregion[jpr - 2].r2) / src_dpi; + rowgap2_in = (double) (pageregion[jpr + 1].r1 + - pageregion[jpr - 1].r2) / src_dpi; + if (rowgap1_in > 0.28 && rowgap2_in > 0.28) + break; + } + (*newregion) = pageregion[ + src_left_to_right ? + jpr + colnum - 1 : jpr + (2 - colnum)]; + /* Preserve vertical gap between this region and last region */ + if (r20 >= 0 && newregion->r1 - r20 >= 0) + colgap_pixels = newregion->r1 - r20; + else + colgap_pixels = colgap0_pixels; + if (level < maxlevel) + bmpregion_multicolumn_add(newregion, masterinfo, level + 1, + pageinfo, colgap_pixels); + else { + bmpregion_vertically_break(newregion, masterinfo, text_wrap, + fit_columns ? -2.0 : -1.0, colcount, rowcount, + pageinfo, colgap_pixels, 2 * level); + } + r20 = newregion->r2; + } + if (r20 > r2) + r2 = r20; + if (jpr == ipr) + break; + } + if (jpr < npr && pageregion[jpr].c1 < 0) { + if (debug) + printf("SINGLE COLUMN REGION...\n"); + (*newregion) = pageregion[jpr]; + newregion->c1 = -1 - newregion->c1; + /* dst_add_gap_src_pixels("Col level",masterinfo,newregion->r1-r2); */ + colgap_pixels = newregion->r1 - r2; + bmpregion_vertically_break(newregion, masterinfo, text_wrap, + (fit_columns && (level > 1)) ? -2.0 : -1.0, colcount, + rowcount, pageinfo, colgap_pixels, level); + r2 = newregion->r2; + jpr++; + } + ipr = jpr; + } + willus_dmem_free(4, (double **) &row_black_count, funcname); + willus_dmem_free(3, (double **) &pageregion, funcname); + willus_dmem_free(2, (double **) &rowcount, funcname); + willus_dmem_free(1, (double **) &colcount, funcname); +} + +static void fit_column_to_screen(double column_width_inches) + +{ + double text_width_pixels, lm_pixels, rm_pixels, tm_pixels, bm_pixels; + + if (!column_fitted) { + dpi_org = dst_dpi; + lm_org = dst_marleft; + rm_org = dst_marright; + tm_org = dst_martop; + bm_org = dst_marbot; + } + text_width_pixels = max_region_width_inches * dst_dpi; + lm_pixels = dst_marleft * dst_dpi; + rm_pixels = dst_marright * dst_dpi; + tm_pixels = dst_martop * dst_dpi; + bm_pixels = dst_marbot * dst_dpi; + dst_dpi = text_width_pixels / column_width_inches; + dst_marleft = lm_pixels / dst_dpi; + dst_marright = rm_pixels / dst_dpi; + dst_martop = tm_pixels / dst_dpi; + dst_marbot = bm_pixels / dst_dpi; + set_region_widths(); + column_fitted = 1; +} + +static void restore_output_dpi(void) + +{ + if (column_fitted) { + dst_dpi = dpi_org; + dst_marleft = lm_org; + dst_marright = rm_org; + dst_martop = tm_org; + dst_marbot = bm_org; + set_region_widths(); + } + column_fitted = 0; +} + +void adjust_contrast(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey, int *white) + +{ + int i, j, tries, wc, tc, hist[256]; + double contrast, rat0; + WILLUSBITMAP *dst, _dst; + + if (debug && verbose) + printf("\nAt adjust_contrast.\n"); + if ((*white) <= 0) + (*white) = 192; + /* If contrast_max negative, use it as fixed contrast adjustment. */ + if (contrast_max < 0.) { + bmp_contrast_adjust(srcgrey, srcgrey, -contrast_max); + if (dst_color && fabs(contrast_max + 1.0) > 1e-4) + bmp_contrast_adjust(src, src, -contrast_max); + return; + } + dst = &_dst; + bmp_init(dst); + wc = 0; /* Avoid compiler warning */ + tc = srcgrey->width * srcgrey->height; + rat0 = 0.5; /* Avoid compiler warning */ + for (contrast = 1.0, tries = 0; contrast < contrast_max + .01; tries++) { + if (fabs(contrast - 1.0) > 1e-4) + bmp_contrast_adjust(dst, srcgrey, contrast); + else + bmp_copy(dst, srcgrey); + /*Get bitmap histogram */ + for (i = 0; i < 256; i++) + hist[i] = 0; + for (j = 0; j < dst->height; j++) { + unsigned char *p; + p = bmp_rowptr_from_top(dst, j); + for (i = 0; i < dst->width; i++, p++) + hist[p[0]]++; + } + if (tries == 0) { + int h1; + for (h1 = 0, j = (*white); j < 256; j++) + h1 += hist[j]; + rat0 = (double) h1 / tc; + if (debug && verbose) + printf(" rat0 = rat[%d-255]=%.4f\n", (*white), rat0); + } + + /* Find white ratio */ + /* + for (wc=hist[254],j=253;j>=252;j--) + if (hist[j]>wc1) + wc1=hist[j]; + */ + for (wc = 0, j = 252; j <= 255; j++) + wc += hist[j]; + /* + if ((double)wc/tc >= rat0*0.7 && (double)hist[255]/wc > 0.995) + break; + */ + if (debug && verbose) + printf(" %2d. Contrast=%7.2f, rat[252-255]/rat0=%.4f\n", + tries + 1, contrast, (double) wc / tc / rat0); + if ((double) wc / tc >= rat0 * 0.94) + break; + contrast *= 1.05; + } + if (debug) + printf("Contrast=%7.2f, rat[252-255]/rat0=%.4f\n", contrast, + (double) wc / tc / rat0); + /* + bmp_write(dst,"outc.png",stdout,100); + wfile_written_info("outc.png",stdout); + exit(10); + */ + bmp_copy(srcgrey, dst); + /* Maybe don't adjust the contrast for the color bitmap? */ + if (dst_color && fabs(contrast - 1.0) > 1e-4) + bmp_contrast_adjust(src, src, contrast); + bmp_free(dst); +} + +static int bmpregion_row_black_count(BMPREGION *region, int r0) + +{ + unsigned char *p; + int i, nc, c; + + p = bmp_rowptr_from_top(region->bmp8, r0) + region->c1; + nc = region->c2 - region->c1 + 1; + for (c = i = 0; i < nc; i++, p++) + if (p[0] < region->bgcolor) + c++; + return (c); +} + +/* + ** Returns height of region found and divider position in (*divider_column). + ** (*divider_column) is absolute position on source bitmap. + ** + */ +static int bmpregion_find_multicolumn_divider(BMPREGION *region, + int *row_black_count, BMPREGION *pageregion, int *npr, int *colcount, + int *rowcount) + +{ + int itop, i, dm, middle, divider_column, min_height_pixels, mhp2, + min_col_gap_pixels; + BMPREGION _newregion, *newregion, column[2]; + BREAKINFO *breakinfo, _breakinfo; + int *rowmin, *rowmax; + static char *funcname = "bmpregion_find_multicolumn_divider"; + + if (debug) + printf("@bmpregion_find_multicolumn_divider(%d,%d)-(%d,%d)\n", + region->c1, region->r1, region->c2, region->r2); + breakinfo = &_breakinfo; + breakinfo->textrow = NULL; + breakinfo_alloc(101, breakinfo, region->r2 - region->r1 + 1); + bmpregion_find_vertical_breaks(region, breakinfo, colcount, rowcount, + column_row_gap_height_in); + /* + { + printf("region (%d,%d)-(%d,%d) has %d breaks:\n",region->c1,region->r1,region->c2,region->r2,breakinfo->n); + for (i=0;in;i++) + printf(" Rows %d - %d\n",breakinfo->textrow[i].r1,breakinfo->textrow[i].r2); + } + */ + newregion = &_newregion; + (*newregion) = (*region); + min_height_pixels = min_column_height_inches * src_dpi; /* src->height/15; */ + mhp2 = min_height_pixels - 1; + if (mhp2 < 0) + mhp2 = 0; + dm = 1 + (region->c2 - region->c1 + 1) * column_gap_range / 2.; + middle = (region->c2 - region->c1 + 1) / 2; + min_col_gap_pixels = (int) (min_column_gap_inches * src_dpi + .5); + if (verbose) { + printf("(dm=%d, width=%d, min_gap=%d)\n", dm, + region->c2 - region->c1 + 1, min_col_gap_pixels); + printf("Checking regions (r1=%d, r2=%d, minrh=%d)..", region->r1, + region->r2, min_height_pixels); + fflush(stdout); + } + breakinfo_sort_by_row_position(breakinfo); + willus_dmem_alloc_warn(5, (void **) &rowmin, + (region->c2 + 10) * 2 * sizeof(int), funcname, 10); + rowmax = &rowmin[region->c2 + 10]; + for (i = 0; i < region->c2 + 2; i++) { + rowmin[i] = region->r2 + 2; + rowmax[i] = -1; + } + + /* Start with top-most and bottom-most regions, look for column dividers */ + for (itop = 0; + itop < breakinfo->n + && breakinfo->textrow[itop].r1 + < region->r2 + 1 - min_height_pixels; itop++) { + int ibottom; + + for (ibottom = breakinfo->n - 1; + ibottom >= itop + && breakinfo->textrow[ibottom].r2 + - breakinfo->textrow[itop].r1 + >= min_height_pixels; ibottom--) { + /* + ** Look for vertical shaft of clear space that clearly demarcates + ** two columns + */ + for (i = 0; i < dm; i++) { + int foundgap, ii, c1, c2, iiopt, status; + + newregion->c1 = region->c1 + middle - i; + /* If we've effectively already checked this shaft, move on */ + if (itop >= rowmin[newregion->c1] + && ibottom <= rowmax[newregion->c1]) + continue; + newregion->c2 = newregion->c1 + min_col_gap_pixels - 1; + newregion->r1 = breakinfo->textrow[itop].r1; + newregion->r2 = breakinfo->textrow[ibottom].r2; + foundgap = bmpregion_is_clear(newregion, row_black_count, + gtc_in); + if (!foundgap && i > 0) { + newregion->c1 = region->c1 + middle + i; + newregion->c2 = newregion->c1 + min_col_gap_pixels - 1; + foundgap = bmpregion_is_clear(newregion, row_black_count, + gtc_in); + } + if (!foundgap) + continue; + /* Found a gap, but look for a better gap nearby */ + c1 = newregion->c1; + c2 = newregion->c2; + for (iiopt = 0, ii = -min_col_gap_pixels; + ii <= min_col_gap_pixels; ii++) { + int newgap; + newregion->c1 = c1 + ii; + newregion->c2 = c2 + ii; + newgap = bmpregion_is_clear(newregion, row_black_count, + gtc_in); + if (newgap > 0 && newgap < foundgap) { + iiopt = ii; + foundgap = newgap; + if (newgap == 1) + break; + } + } + newregion->c1 = c1 + iiopt; + /* If we've effectively already checked this shaft, move on */ + if (itop >= rowmin[newregion->c1] + && ibottom <= rowmax[newregion->c1]) + continue; + newregion->c2 = c2 + iiopt; + divider_column = newregion->c1 + min_col_gap_pixels / 2; + status = bmpregion_column_height_and_gap_test(column, region, + breakinfo->textrow[itop].r1, + breakinfo->textrow[ibottom].r2, divider_column, + colcount, rowcount); + /* If fails column height or gap test, mark as bad */ + if (status) { + if (itop < rowmin[newregion->c1]) + rowmin[newregion->c1] = itop; + if (ibottom > rowmax[newregion->c1]) + rowmax[newregion->c1] = ibottom; + } + /* If right column too short, stop looking */ + if (status & 2) + break; + if (!status) { + int colheight; + + /* printf(" GOT COLUMN DIVIDER AT x=%d.\n",(*divider_column)); */ + if (verbose) { + printf("\n GOOD REGION: col gap=(%d,%d) - (%d,%d)\n" + " r1=%d, r2=%d\n", + newregion->c1, newregion->r1, newregion->c2, + newregion->r2, breakinfo->textrow[itop].r1, + breakinfo->textrow[ibottom].r2); + } + if (itop > 0) { + /* add 1-column region */ + pageregion[(*npr)] = (*region); + pageregion[(*npr)].r2 = breakinfo->textrow[itop - 1].r2; + if (pageregion[(*npr)].r2 + > pageregion[(*npr)].bmp8->height - 1) + pageregion[(*npr)].r2 = + pageregion[(*npr)].bmp8->height - 1; + bmpregion_trim_margins(&pageregion[(*npr)], colcount, + rowcount, 0xf); + /* Special flag to indicate full-width region */ + pageregion[(*npr)].c1 = -1 - pageregion[(*npr)].c1; + (*npr) = (*npr) + 1; + } + pageregion[(*npr)] = column[0]; + (*npr) = (*npr) + 1; + pageregion[(*npr)] = column[1]; + (*npr) = (*npr) + 1; + colheight = breakinfo->textrow[ibottom].r2 - region->r1 + 1; + breakinfo_free(101, breakinfo); + /* + printf("Returning %d divider column = %d - %d\n",region->r2-region->r1+1,newregion->c1,newregion->c2); + */ + return (colheight); + } + } + } + } + if (verbose) + printf("NO GOOD REGION FOUND.\n"); + pageregion[(*npr)] = (*region); + bmpregion_trim_margins(&pageregion[(*npr)], colcount, rowcount, 0xf); + /* Special flag to indicate full-width region */ + pageregion[(*npr)].c1 = -1 - pageregion[(*npr)].c1; + (*npr) = (*npr) + 1; + /* (*divider_column)=region->c2+1; */ + willus_dmem_free(5, (double **) &rowmin, funcname); + breakinfo_free(101, breakinfo); + /* + printf("Returning %d\n",region->r2-region->r1+1); + */ + return (region->r2 - region->r1 + 1); +} + +/* + ** 1 = column 1 too short + ** 2 = column 2 too short + ** 3 = both too short + ** 0 = both okay + ** Both columns must pass height requirement. + ** + ** Also, if gap between columns > max_column_gap_inches, fails test. (8-31-12) + ** + */ +static int bmpregion_column_height_and_gap_test(BMPREGION *column, + BMPREGION *region, int r1, int r2, int cmid, int *colcount, + int *rowcount) + +{ + int min_height_pixels, status; + + status = 0; + min_height_pixels = min_column_height_inches * src_dpi; + column[0] = (*region); + column[0].r1 = r1; + column[0].r2 = r2; + column[0].c2 = cmid - 1; + bmpregion_trim_margins(&column[0], colcount, rowcount, 0xf); + /* + printf(" COL1: pix=%d (%d - %d)\n",newregion->r2-newregion->r1+1,newregion->r1,newregion->r2); + */ + if (column[0].r2 - column[0].r1 + 1 < min_height_pixels) + status |= 1; + column[1] = (*region); + column[1].r1 = r1; + column[1].r2 = r2; + column[1].c1 = cmid; + column[1].c2 = region->c2; + bmpregion_trim_margins(&column[1], colcount, rowcount, 0xf); + /* + printf(" COL2: pix=%d (%d - %d)\n",newregion->r2-newregion->r1+1,newregion->r1,newregion->r2); + */ + if (column[1].r2 - column[1].r1 + 1 < min_height_pixels) + status |= 2; + /* Make sure gap between columns is not too large */ + if (max_column_gap_inches >= 0. + && column[1].c1 - column[0].c2 - 1 + > max_column_gap_inches * src_dpi) + status |= 4; + return (status); +} + +/* + ** Return 0 if there are dark pixels in the region. NZ otherwise. + */ +static int bmpregion_is_clear(BMPREGION *region, int *row_black_count, + double gt_in) + +{ + int r, c, nc, pt; + + /* + ** row_black_count[] doesn't necessarily match up to this particular region's columns. + ** So if row_black_count[] == 0, the row is clear, otherwise it has to be counted. + ** because the columns are a subset. + */ + /* nr=region->r2-region->r1+1; */ + nc = region->c2 - region->c1 + 1; + pt = (int) (gt_in * src_dpi * nc + .5); + if (pt < 0) + pt = 0; + for (c = 0, r = region->r1; r <= region->r2; r++) { + if (r < 0 || r >= region->bmp8->height) + continue; + if (row_black_count[r] == 0) + continue; + c += bmpregion_row_black_count(region, r); + if (c > pt) + return (0); + } + /* + printf("(%d,%d)-(%d,%d): c=%d, pt=%d (gt_in=%g)\n", + region->c1,region->r1,region->c2,region->r2,c,pt,gt_in); + */ + return (1 + (int) 10 * c / pt); +} + +static void bmpregion_row_histogram(BMPREGION *region) + +{ + static char *funcname = "bmpregion_row_histogram"; + WILLUSBITMAP *src; + FILE *out; + static int *rowcount; + static int *hist; + int i, j, nn; + + willus_dmem_alloc_warn(6, (void **) &rowcount, + (region->r2 - region->r1 + 1) * sizeof(int), funcname, 10); + willus_dmem_alloc_warn(7, (void **) &hist, + (region->c2 - region->c1 + 1) * sizeof(int), funcname, 10); + src = region->bmp8; + for (j = region->r1; j <= region->r2; j++) { + unsigned char *p; + p = bmp_rowptr_from_top(src, j) + region->c1; + rowcount[j - region->r1] = 0; + for (i = region->c1; i <= region->c2; i++, p++) + if (p[0] < region->bgcolor) + rowcount[j - region->r1]++; + } + for (i = region->c1; i <= region->c2; i++) + hist[i - region->c1] = 0; + for (i = region->r1; i <= region->r2; i++) + hist[rowcount[i - region->r1]]++; + for (i = region->c2 - region->c1 + 1; i >= 0; i--) + if (hist[i] > 0) + break; + nn = i; + out = fopen("hist.ep", "w"); + for (i = 0; i <= nn; i++) + fprintf(out, "%5d %5d\n", i, hist[i]); + fclose(out); + out = fopen("rowcount.ep", "w"); + for (i = 0; i < region->r2 - region->r1 + 1; i++) + fprintf(out, "%5d %5d\n", i, rowcount[i]); + fclose(out); + willus_dmem_free(7, (double **) &hist, funcname); + willus_dmem_free(6, (double **) &rowcount, funcname); +} + +/* + ** Mark the region + ** mark_flags & 1 : Mark top + ** mark_flags & 2 : Mark bottom + ** mark_flags & 4 : Mark left + ** mark_flags & 8 : Mark right + ** + */ +static void mark_source_page(BMPREGION *region0, int caller_id, int mark_flags) + +{ + static int display_order = 0; + int i, n, nn, fontsize, r, g, b, shownum; + char num[16]; + BMPREGION *region, _region; + BMPREGION *clip, _clip; + + if (!show_marked_source) + return; + + if (region0 == NULL) { + display_order = 0; + return; + } + + region = &_region; + (*region) = (*region0); + + /* Clip the region w/ignored margins */ + clip = &_clip; + clip->bmp = region0->bmp; + get_white_margins(clip); + if (region->c1 < clip->c1) + region->c1 = clip->c1; + if (region->c2 > clip->c2) + region->c2 = clip->c2; + if (region->r1 < clip->r1) + region->r1 = clip->r1; + if (region->r2 > clip->r2) + region->r2 = clip->r2; + if (region->r2 <= region->r1 || region->c2 <= region->c1) + return; + + /* printf("@mark_source_page(display_order=%d)\n",display_order); */ + if (caller_id == 1) { + display_order++; + shownum = 1; + n = (int) (src_dpi / 60. + 0.5); + if (n < 5) + n = 5; + r = 255; + g = b = 0; + } else if (caller_id == 2) { + shownum = 0; + n = 2; + r = 0; + g = 0; + b = 255; + } else if (caller_id == 3) { + shownum = 0; + n = (int) (src_dpi / 80. + 0.5); + if (n < 4) + n = 4; + r = 0; + g = 255; + b = 0; + } else if (caller_id == 4) { + shownum = 0; + n = 2; + r = 255; + g = 0; + b = 255; + } else { + shownum = 0; + n = 2; + r = 140; + g = 140; + b = 140; + } + if (n < 2) + n = 2; + nn = (region->c2 + 1 - region->c1) / 2; + if (n > nn) + n = nn; + nn = (region->r2 + 1 - region->r1) / 2; + if (n > nn) + n = nn; + if (n < 1) + n = 1; + for (i = 0; i < n; i++) { + int j; + unsigned char *p; + if (mark_flags & 1) { + p = bmp_rowptr_from_top(region->marked, region->r1 + i) + + region->c1 * 3; + for (j = region->c1; j <= region->c2; j++, p += 3) { + p[0] = r; + p[1] = g; + p[2] = b; + } + } + if (mark_flags & 2) { + p = bmp_rowptr_from_top(region->marked, region->r2 - i) + + region->c1 * 3; + for (j = region->c1; j <= region->c2; j++, p += 3) { + p[0] = r; + p[1] = g; + p[2] = b; + } + } + if (mark_flags & 16) /* rowbase */ + { + p = bmp_rowptr_from_top(region->marked, region->rowbase - i) + + region->c1 * 3; + for (j = region->c1; j <= region->c2; j++, p += 3) { + p[0] = r; + p[1] = g; + p[2] = b; + } + } + if (mark_flags & 4) + for (j = region->r1; j <= region->r2; j++) { + p = bmp_rowptr_from_top(region->marked, j) + + (region->c1 + i) * 3; + p[0] = r; + p[1] = g; + p[2] = b; + } + if (mark_flags & 8) + for (j = region->r1; j <= region->r2; j++) { + p = bmp_rowptr_from_top(region->marked, j) + + (region->c2 - i) * 3; + p[0] = r; + p[1] = g; + p[2] = b; + } + } + if (!shownum) + return; + fontsize = region->c2 - region->c1 + 1; + if (fontsize > region->r2 - region->r1 + 1) + fontsize = region->r2 - region->r1 + 1; + fontsize /= 2; + if (fontsize > src_dpi) + fontsize = src_dpi; + if (fontsize < 5) + return; + fontrender_set_typeface("helvetica-bold"); + fontrender_set_fgcolor(r, g, b); + fontrender_set_bgcolor(255, 255, 255); + fontrender_set_pixel_size(fontsize); + fontrender_set_justification(4); + fontrender_set_or(1); + sprintf(num, "%d", display_order); + fontrender_render(region->marked, (double) (region->c1 + region->c2) / 2., + (double) (region->marked->height - ((region->r1 + region->r2) / 2.)), + num, 0, NULL); + /* printf(" done mark_source_page.\n"); */ +} + +/* + ** Input: A generic rectangular region from the source file. It will not + ** be checked for multiple columns, but the text may be wrapped + ** (controlled by allow_text_wrapping input). + ** + ** force_scale == -2 : Use same scale for entire column--fit to device + ** + ** This function looks for vertical gaps in the region and breaks it at + ** the widest ones (if there are significantly wider ones). + ** + */ +static void bmpregion_vertically_break(BMPREGION *region, + MASTERINFO *masterinfo, int allow_text_wrapping, double force_scale, + int *colcount, int *rowcount, PAGEINFO *pageinfo, int colgap_pixels, + int ncols) + +{ + static int ncols_last = -1; + int regcount, i, i1, biggap, revert, trim_flags, allow_vertical_breaks; + int justification_flags, caller_id, marking_flags, rbdelta; + // int trim_left_and_right; + BMPREGION *bregion, _bregion; + BREAKINFO *breakinfo, _breakinfo; + double region_width_inches, region_height_inches; + +#if (WILLUSDEBUGX & 1) + printf("\n\n@bmpregion_vertically_break. colgap_pixels=%d\n\n",colgap_pixels); +#endif + trim_flags = 0xf; + allow_vertical_breaks = 1; + justification_flags = 0x8f; /* Don't know region justification status yet. Use user settings. */ + rbdelta = -1; + breakinfo = &_breakinfo; + breakinfo->textrow = NULL; + breakinfo_alloc(102, breakinfo, region->r2 - region->r1 + 1); + bmpregion_find_vertical_breaks(region, breakinfo, colcount, rowcount, -1.0); + /* Should there be a check for breakinfo->n==0 here? */ + /* Don't think it breaks anything to let it go. -- 6-11-12 */ +#if (WILLUSDEBUGX & 2) + breakinfo_echo(breakinfo); +#endif + breakinfo_remove_small_rows(breakinfo, 0.25, 0.5, region, colcount, + rowcount); +#if (WILLUSDEBUGX & 2) + breakinfo_echo(breakinfo); +#endif + breakinfo->centered = bmpregion_is_centered(region, breakinfo, 0, + breakinfo->n - 1, NULL); +#if (WILLUSDEBUGX & 2) + breakinfo_echo(breakinfo); +#endif + /* + newregion=&_newregion; + for (i=0;in;i++) + { + (*newregion)=(*region); + newregion->r1=breakinfo->textrow[i].r1; + newregion->r2=breakinfo->textrow[i].r2; + bmpregion_add(newregion,breakinfo,masterinfo,allow_text_wrapping,force_scale,0,1, + colcount,rowcount,pageinfo,0,0xf); + } + breakinfo_free(breakinfo); + return; + */ + /* + if (!vertical_breaks) + { + caller_id=100; + marking_flags=0; + bmpregion_add(region,breakinfo,masterinfo,allow_text_wrapping,trim_flags, + allow_vertical_breaks,force_scale,justification_flags, + caller_id,colcount,rowcount,pageinfo,marking_flags,rbdelta); + breakinfo_free(breakinfo); + return; + } + */ + /* Red, numbered region */ + mark_source_page(region, 1, 0xf); + bregion = &_bregion; + if (debug) { + if (!allow_text_wrapping) + printf( + "@bmpregion_vertically_break (no break) (%d,%d) - (%d,%d) (scale=%g)\n", + region->c1, region->r1, region->c2, region->r2, + force_scale); + else + printf( + "@bmpregion_vertically_break (allow break) (%d,%d) - (%d,%d) (scale=%g)\n", + region->c1, region->r1, region->c2, region->r2, + force_scale); + } + /* + ** Tag blank rows and columns + */ + if (vertical_break_threshold < 0. || breakinfo->n < 6) + biggap = -1.; + else { + int gap_median; + /* + int rowheight_median; + + breakinfo_sort_by_rowheight(breakinfo); + rowheight_median = breakinfo->textrow[breakinfo->n/2].rowheight; + */ +#ifdef WILLUSDEBUG + for (i=0;in;i++) + printf(" gap[%d]=%d\n",i,breakinfo->textrow[i].gap); +#endif + breakinfo_sort_by_gap(breakinfo); + gap_median = breakinfo->textrow[breakinfo->n / 2].gap; +#ifdef WILLUSDEBUG + printf(" median=%d\n",gap_median); +#endif + biggap = gap_median * vertical_break_threshold; + breakinfo_sort_by_row_position(breakinfo); + } +#ifdef WILLUSDEBUG + printf(" biggap=%d\n",biggap); +#endif + region_width_inches = (double) (region->c2 - region->c1 + 1) / src_dpi; + region_height_inches = (double) (region->r2 - region->r1 + 1) / src_dpi; + /* + trim_left_and_right = 1; + if (region_width_inches <= max_region_width_inches) + trim_left_and_right = 0; + */ + /* + printf("force_scale=%g, rwi = %g, rwi/mrwi = %g, rhi = %g\n", + force_scale, + region_width_inches, + region_width_inches / max_region_width_inches, + region_height_inches); + */ + if (force_scale < -1.5 && region_width_inches > MIN_REGION_WIDTH_INCHES + && region_width_inches / max_region_width_inches < 1.25 + && region_height_inches > 0.5) { + revert = 1; + force_scale = -1.0; + fit_column_to_screen(region_width_inches); + // trim_left_and_right = 0; + allow_text_wrapping = 0; + } else + revert = 0; + /* Add the regions (broken vertically) */ + caller_id = 1; + /* + if (trim_left_and_right) + trim_flags=0xf; + else + trim_flags=0xc; + */ + trim_flags = 0xf; + for (regcount = i1 = i = 0; i1 < breakinfo->n; i++) { + int i2; + + i2 = i < breakinfo->n ? i : breakinfo->n - 1; + if (i >= breakinfo->n + || (biggap > 0. && breakinfo->textrow[i2].gap >= biggap)) { + int j, c1, c2, nc, nowrap; + double regwidth, ar1, rh1; + +// printf("CALLER 1: i1=%d, i2=%d (breakinfo->n=%d)\n",i1,i2,breakinfo->n); + (*bregion) = (*region); + bregion->r1 = breakinfo->textrow[i1].r1; + bregion->r2 = breakinfo->textrow[i2].r2; + c1 = breakinfo->textrow[i1].c1; + c2 = breakinfo->textrow[i1].c2; + nc = c2 - c1 + 1; + if (nc <= 0) + nc = 1; + rh1 = (double) (breakinfo->textrow[i1].r2 + - breakinfo->textrow[i1].r1 + 1) / src_dpi; + ar1 = (double) (breakinfo->textrow[i1].r2 + - breakinfo->textrow[i1].r1 + 1) / nc; + for (j = i1 + 1; j <= i2; j++) { + if (c1 > breakinfo->textrow[j].c1) + c1 = breakinfo->textrow[j].c1; + if (c2 < breakinfo->textrow[j].c2) + c2 = breakinfo->textrow[j].c2; + } + regwidth = (double) (c2 - c1 + 1) / src_dpi; + marking_flags = (i1 == 0 ? 0 : 1) + | (i2 == breakinfo->n - 1 ? 0 : 2); + /* Green */ + mark_source_page(bregion, 3, marking_flags); + nowrap = ((regwidth <= max_region_width_inches + && allow_text_wrapping < 2) + || (ar1 > no_wrap_ar_limit + && rh1 > no_wrap_height_limit_inches)); + /* + ** If between regions, or if the next region isn't going to be + ** wrapped, or if the next region starts a different number of + ** columns than before, then "flush and gap." + */ + if (regcount > 0 || just_flushed_internal || nowrap + || (ncols_last > 0 && ncols_last != ncols)) { + int gap; +#ifdef WILLUSDEBUG + printf("wrapflush1\n"); +#endif + if (!just_flushed_internal) + wrapbmp_flush(masterinfo, 0, pageinfo, 0); + gap = regcount == 0 ? + colgap_pixels : breakinfo->textrow[i1 - 1].gap; + if (regcount == 0 && beginning_gap_internal > 0) { + if (last_h5050_internal > 0) { + if (fabs( + 1. + - (double) breakinfo->textrow[i1].h5050 + / last_h5050_internal) > .1) + dst_add_gap_src_pixels("Col/Page break", masterinfo, + colgap_pixels); + last_h5050_internal = -1; + } + gap = beginning_gap_internal; + beginning_gap_internal = -1; + } + dst_add_gap_src_pixels("Vert break", masterinfo, gap); + } else { + if (regcount == 0 && beginning_gap_internal < 0) + beginning_gap_internal = colgap_pixels; + } + bmpregion_add(bregion, breakinfo, masterinfo, allow_text_wrapping, + trim_flags, allow_vertical_breaks, force_scale, + justification_flags, caller_id, colcount, rowcount, + pageinfo, marking_flags, rbdelta); + regcount++; + i1 = i2 + 1; + } + } + ncols_last = ncols; + if (revert) + restore_output_dpi(); + breakinfo_free(102, breakinfo); +} + +/* + ** + ** MAIN BITMAP REGION ADDING FUNCTION + ** + ** NOTE: This function calls itself recursively! + ** + ** Input: A generic rectangular region from the source file. It will not + ** be checked for multiple columns, but the text may be wrapped + ** (controlled by allow_text_wrapping input). + ** + ** First, excess margins are trimmed off of the region. + ** + ** Then, if the resulting trimmed region is wider than the max desirable width + ** and allow_text_wrapping is non-zero, then the + ** bmpregion_analyze_justification_and_line_spacing() function is called. + ** Otherwise the region is scaled to fit and added to the master set of pages. + ** + ** justification_flags + ** Bits 6-7: 0 = document is not fully justified + ** 1 = document is fully justified + ** 2 = don't know document justification yet + ** Bits 4-5: 0 = Use user settings + ** 1 = fully justify + ** 2 = do not fully justify + ** Bits 2-3: 0 = document is left justified + ** 1 = document is centered + ** 2 = document is right justified + ** 3 = don't know document justification yet + ** Bits 0-1: 0 = left justify document + ** 1 = center document + ** 2 = right justify document + ** 3 = Use user settings + ** + ** force_scale = -2.0 : Fit column width to display width + ** force_scale = -1.0 : Use output dpi unless the region doesn't fit. + ** In that case, scale it down until it fits. + ** force_scale > 0.0 : Scale region by force_scale. + ** + ** mark_flags & 1 : Mark top + ** mark_flags & 2 : Mark bottom + ** mark_flags & 4 : Mark left + ** mark_flags & 8 : Mark right + ** + ** trim_flags & 0x80 : Do NOT re-trim no matter what. + ** + */ +static void bmpregion_add(BMPREGION *region, BREAKINFO *breakinfo, + MASTERINFO *masterinfo, int allow_text_wrapping, int trim_flags, + int allow_vertical_breaks, double force_scale, int justification_flags, + int caller_id, int *colcount, int *rowcount, PAGEINFO *pageinfo, + int mark_flags, int rowbase_delta) + +{ + int w, wmax, i, nc, nr, h, bpp, tall_region; + double region_width_inches; + WILLUSBITMAP *bmp, _bmp; + BMPREGION *newregion, _newregion; + + newregion = &_newregion; + (*newregion) = (*region); +#if (WILLUSDEBUGX & 1) + printf("@bmpregion_add (%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2); + printf(" trimflags = %X\n",trim_flags); +#endif + if (debug) { + if (!allow_text_wrapping) + printf("@bmpregion_add (no break) (%d,%d) - (%d,%d) (scale=%g)\n", + region->c1, region->r1, region->c2, region->r2, + force_scale); + else + printf( + "@bmpregion_add (allow break) (%d,%d) - (%d,%d) (scale=%g)\n", + region->c1, region->r1, region->c2, region->r2, + force_scale); + } + /* + ** Tag blank rows and columns and trim the blank margins off + ** trimflags = 0xf for all margin trim. + ** trimflags = 0xc for just top and bottom margins. + */ + bmpregion_trim_margins(newregion, colcount, rowcount, trim_flags); +#if (WILLUSDEBUGX & 1) + printf(" After trim: (%d,%d) - (%d,%d)\n",newregion->c1,newregion->r1,newregion->c2,newregion->r2); +#endif + nc = newregion->c2 - newregion->c1 + 1; + nr = newregion->r2 - newregion->r1 + 1; +// printf("nc=%d, nr=%d\n",nc,nr); + if (verbose) { + printf(" row range adjusted to %d - %d\n", newregion->r1, + newregion->r2); + printf(" col range adjusted to %d - %d\n", newregion->c1, + newregion->c2); + } + if (nc <= 5 || nr <= 1) + return; + region_width_inches = (double) nc / src_dpi; +// printf("regwidth = %g in\n",region_width_inches); + /* Use untrimmed region left/right if possible */ + if (caller_id == 1 && region_width_inches <= max_region_width_inches) { + int trimleft, trimright; + int maxpix, dpix; + + maxpix = (int) (max_region_width_inches * src_dpi + .5); +#if (WILLUSDEBUGX & 1) + printf(" Trimming. C's = %4d %4d %4d %4d\n",region->c1,newregion->c1,newregion->c2,region->c2); + printf(" maxpix = %d, regwidth = %d\n",maxpix,region->c2-region->c1+1); +#endif + if (maxpix > (region->c2 - region->c1 + 1)) + maxpix = region->c2 - region->c1 + 1; +// printf(" maxpix = %d\n",maxpix); + dpix = (region->c2 - region->c1 + 1 - maxpix) / 2; +// printf(" dpix = %d\n",dpix); + trimright = region->c2 - newregion->c2; + trimleft = newregion->c1 - region->c1; + if (trimleft < trimright) { + if (trimleft > dpix) + newregion->c1 = region->c1 + dpix; + newregion->c2 = newregion->c1 + maxpix - 1; + } else { + if (trimright > dpix) + newregion->c2 = region->c2 - dpix; + newregion->c1 = newregion->c2 - maxpix + 1; + } + if (newregion->c1 < region->c1) + newregion->c1 = region->c1; + if (newregion->c2 > region->c2) + newregion->c2 = region->c2; + nc = newregion->c2 - newregion->c1 + 1; +#if (WILLUSDEBUGX & 1) + printf(" Post Trim. C's = %4d %4d %4d %4d\n",region->c1,newregion->c1,newregion->c2,region->c2); +#endif + region_width_inches = (double) nc / src_dpi; + } + + /* + ** Try breaking the region into smaller horizontal pieces (wrap text lines) + */ + /* + printf("allow_text_wrapping=%d, region_width_inches=%g, max_region_width_inches=%g\n", + allow_text_wrapping,region_width_inches,max_region_width_inches); + */ + /* New in v1.50, if allow_text_wrapping==2, unwrap short lines. */ + if (allow_text_wrapping == 2 + || (allow_text_wrapping == 1 + && region_width_inches > max_region_width_inches)) { + bmpregion_analyze_justification_and_line_spacing(newregion, breakinfo, + masterinfo, colcount, rowcount, pageinfo, 1, force_scale); + return; + } + + /* + ** If allowed, re-submit each vertical region individually + */ + if (allow_vertical_breaks) { + bmpregion_analyze_justification_and_line_spacing(newregion, breakinfo, + masterinfo, colcount, rowcount, pageinfo, 0, force_scale); + return; + } + + /* AT THIS POINT, BITMAP IS NOT TO BE BROKEN UP HORIZONTALLY OR VERTICALLY */ + /* (IT CAN STILL BE FULLY JUSTIFIED IF ALLOWED.) */ + + /* + ** Scale region to fit the destination device width and add to the master bitmap. + ** + ** + ** Start by copying source region to new bitmap + ** + */ +// printf("c1=%d\n",newregion->c1); + /* Is it a figure? */ + tall_region = (double) (newregion->r2 - newregion->r1 + 1) / src_dpi + >= dst_min_figure_height_in; + /* Re-trim left and right? */ + if ((trim_flags & 0x80) == 0) { + /* If tall region and figure justification turned on ... */ + if ((tall_region && dst_figure_justify >= 0) + /* ... or if centered region ... */ + || ((trim_flags & 3) != 3 + && ((justification_flags & 3) == 1 + || ((justification_flags & 3) == 3 + && (dst_justify == 1 + || (dst_justify < 0 + && (justification_flags + & 0xc) == 4)))))) { + bmpregion_trim_margins(newregion, colcount, rowcount, 0x3); + nc = newregion->c2 - newregion->c1 + 1; + region_width_inches = (double) nc / src_dpi; + } + } +#if (WILLUSDEBUGX & 1) + aprintf("atomic region: " ANSI_CYAN "%.2f x %.2f in" ANSI_NORMAL " c1=%d, (%d x %d) (rbdel=%d) just=0x%02X\n", + (double)(newregion->c2-newregion->c1+1)/src_dpi, + (double)(newregion->r2-newregion->r1+1)/src_dpi, + newregion->c1, + (newregion->c2-newregion->c1+1), + (newregion->r2-newregion->r1+1), + rowbase_delta,justification_flags); +#endif + /* Copy atomic region into bmp */ + bmp = &_bmp; + bmp_init(bmp); + bmp->width = nc; + bmp->height = nr; + if (dst_color) + bmp->bpp = 24; + else { + bmp->bpp = 8; + for (i = 0; i < 256; i++) + bmp->red[i] = bmp->blue[i] = bmp->green[i] = i; + } + bmp_alloc(bmp); + bpp = dst_color ? 3 : 1; +// printf("r1=%d, r2=%d\n",newregion->r1,newregion->r2); + for (i = newregion->r1; i <= newregion->r2; i++) { + unsigned char *psrc, *pdst; + + pdst = bmp_rowptr_from_top(bmp, i - newregion->r1); + psrc = bmp_rowptr_from_top(dst_color ? newregion->bmp : newregion->bmp8, + i) + bpp * newregion->c1; + memcpy(pdst, psrc, nc * bpp); + } + /* + ** Now scale to appropriate destination size. + ** + ** force_scale is used to maintain uniform scaling so that + ** most of the regions are scaled at the same value. + ** + ** force_scale = -2.0 : Fit column width to display width + ** force_scale = -1.0 : Use output dpi unless the region doesn't fit. + ** In that case, scale it down until it fits. + ** force_scale > 0.0 : Scale region by force_scale. + ** + */ + /* Max viewable pixel width on device screen */ + wmax = (int) (masterinfo->bmp.width - (dst_marleft + dst_marright) * dst_dpi + + 0.5); + if (force_scale > 0.) + w = (int) (force_scale * bmp->width + 0.5); + else { + if (region_width_inches < max_region_width_inches) + w = (int) (region_width_inches * dst_dpi + .5); + else + w = wmax; + } + /* Special processing for tall regions (likely figures) */ + if (tall_region && w < wmax && dst_fit_to_page != 0) { + if (dst_fit_to_page < 0) + w = wmax; + else { + w = (int) (w * (1. + (double) dst_fit_to_page / 100.) + 0.5); + if (w > wmax) + w = wmax; + } + } + h = (int) (((double) w / bmp->width) * bmp->height + .5); + + /* + ** If scaled dimensions are finite, add to master bitmap. + */ + if (w > 0 && h > 0) { + WILLUSBITMAP *tmp, _tmp; + int nocr; + + last_scale_factor_internal = (double) w / bmp->width; +#ifdef HAVE_OCR + if (dst_ocr) + { + nocr=(int)((double)bmp->width/w+0.5); + if (nocr < 1) + nocr=1; + if (nocr > 10) + nocr=10; + w *= nocr; + h *= nocr; + } + else +#endif + nocr = 1; + tmp = &_tmp; + bmp_init(tmp); + bmp_resample(tmp, bmp, (double) 0., (double) 0., (double) bmp->width, + (double) bmp->height, w, h); + bmp_free(bmp); + /* + { + static int nn=0; + char filename[256]; + sprintf(filename,"xxx%02d.png",nn++); + bmp_write(tmp,filename,stdout,100); + } + */ + /* + ** Add scaled bitmap to destination. + */ + /* Allocate more rows if necessary */ + while (masterinfo->rows + tmp->height / nocr > masterinfo->bmp.height) + bmp_more_rows(&masterinfo->bmp, 1.4, 255); + /* Check special justification for tall regions */ + if (tall_region && dst_figure_justify >= 0) + justification_flags = dst_figure_justify; + bmp_src_to_dst(masterinfo, tmp, justification_flags, region->bgcolor, + nocr, (int) ((double) src_dpi * tmp->width / bmp->width + .5)); + bmp_free(tmp); + } + + /* Store delta to base of text row (used by wrapbmp_flush()) */ + last_rowbase_internal = rowbase_delta; + /* .05 was .072 in v1.35 */ + /* dst_add_gap(&masterinfo->bmp,&masterinfo->rows,0.05); */ + /* + if (revert) + restore_output_dpi(); + */ +} + +static void dst_add_gap_src_pixels(char *caller, MASTERINFO *masterinfo, + int pixels) + +{ + double gap_inches; + + /* + aprintf("%s " ANSI_GREEN "dst_add" ANSI_NORMAL " %.3f in (%d pix)\n",caller,(double)pixels/src_dpi,pixels); + */ + if (last_scale_factor_internal < 0.) + gap_inches = (double) pixels / src_dpi; + else + gap_inches = (double) pixels * last_scale_factor_internal / dst_dpi; + gap_inches *= vertical_multiplier; + if (gap_inches > max_vertical_gap_inches) + gap_inches = max_vertical_gap_inches; + dst_add_gap(masterinfo, gap_inches); +} + +static void dst_add_gap(MASTERINFO *masterinfo, double inches) + +{ + int n, bw; + unsigned char *p; + + n = (int) (inches * dst_dpi + .5); + if (n < 1) + n = 1; + while (masterinfo->rows + n > masterinfo->bmp.height) + bmp_more_rows(&masterinfo->bmp, 1.4, 255); + bw = bmp_bytewidth(&masterinfo->bmp) * n; + p = bmp_rowptr_from_top(&masterinfo->bmp, masterinfo->rows); + memset(p, 255, bw); + masterinfo->rows += n; +} + +/* + ** + ** Add already-scaled source bmp to destination bmp. + ** Source bmp may be narrower than destination--if so, it may be fully justifed. + ** dst = destination bitmap + ** src = source bitmap + ** dst and src bpp must match! + ** All rows of src are applied to masterinfo->bmp starting at row masterinfo->rows + ** Full justification is done if requested. + ** + */ +static void bmp_src_to_dst(MASTERINFO *masterinfo, WILLUSBITMAP *src, + int justification_flags, int whitethresh, int nocr, int dpi) + +{ + WILLUSBITMAP *src1, _src1; + WILLUSBITMAP *tmp; +#ifdef HAVE_OCR + WILLUSBITMAP _tmp; + OCRWORDS _words,*words; +#endif + int dw, dw2; + int i, srcbytespp, srcbytewidth, go_full; + int destwidth, destx0, just; + + if (src->width <= 0 || src->height <= 0) + return; + /* + printf("@bmp_src_to_dst. dst->bpp=%d, src->bpp=%d, src=%d x %d\n",masterinfo->bmp.bpp,src->bpp,src->width,src->height); + */ + /* + { + static int count=0; + static char filename[256]; + + printf(" @bmp_src_to_dst...\n"); + sprintf(filename,"src%05d.png",count++); + bmp_write(src,filename,stdout,100); + } + */ + /* + if (fulljust && dst_fulljustify) + printf("srcbytespp=%d, srcbytewidth=%d, destwidth=%d, destx0=%d, destbytewidth=%d\n", + srcbytespp,srcbytewidth,destwidth,destx0,dstbytewidth); + */ + + /* Determine what justification to use */ + /* Left? */ + if ((justification_flags & 3) == 0 /* Mandatory left just */ + || ((justification_flags & 3) == 3 /* Use user settings */ + && (dst_justify == 0 + || (dst_justify < 0 + && (justification_flags & 0xc) == 0)))) + just = 0; + else if ((justification_flags & 3) == 2 + || ((justification_flags & 3) == 3 + && (dst_justify == 2 + || (dst_justify < 0 + && (justification_flags & 0xc) == 8)))) + just = 2; + else + just = 1; + + /* Full justification? */ + destwidth = (int) (masterinfo->bmp.width + - (dst_marleft + dst_marright) * dst_dpi + .5); + go_full = (destwidth * nocr > src->width + && (((justification_flags & 0x30) == 0x10) + || ((justification_flags & 0x30) == 0 // Use user settings + && (dst_fulljustify == 1 + || (dst_fulljustify < 0 + && (justification_flags & 0xc0) + == 0x40))))); + + /* Put fully justified text into src1 bitmap */ + if (go_full) { + src1 = &_src1; + bmp_init(src1); + bmp_fully_justify(src1, src, nocr * destwidth, whitethresh, just); + } else + src1 = src; + +#if (WILLUSDEBUGX & 1) + printf("@bmp_src_to_dst: jflags=0x%02X just=%d, go_full=%d\n",justification_flags,just,go_full); + printf(" destx0=%d, destwidth=%d, src->width=%d\n",destx0,destwidth,src->width); +#endif +#ifdef HAVE_OCR + if (dst_ocr) + { + /* Run OCR on the bitmap */ + words=&_words; + ocrwords_init(words); + ocrwords_fill_in(words,src1,whitethresh,dpi); + /* Scale bitmap and word positions to destination size */ + if (nocr>1) + { + tmp=&_tmp; + bmp_init(tmp); + bmp_integer_resample(tmp,src1,nocr); + ocrwords_int_scale(words,nocr); + } + else + tmp=src1; + } + else +#endif + tmp = src1; + /* + printf("writing...\n"); + ocrwords_box(words,tmp); + bmp_write(tmp,"out.png",stdout,100); + exit(10); + */ + destx0 = (int) (dst_marleft * dst_dpi + .5); + if (just == 0) + dw = destx0; + else if (just == 1) + dw = destx0 + (destwidth - tmp->width) / 2; + else + dw = destx0 + destwidth - tmp->width; + if (dw < 0) + dw = 0; + /* Add OCR words to destination list */ +#ifdef HAVE_OCR + if (dst_ocr) + { + ocrwords_offset(words,dw,masterinfo->rows); + ocrwords_concatenate(dst_ocrwords,words); + ocrwords_free(words); + } +#endif + + /* Add tmp bitmap to dst */ + srcbytespp = tmp->bpp == 24 ? 3 : 1; + srcbytewidth = tmp->width * srcbytespp; + dw2 = masterinfo->bmp.width - tmp->width - dw; + dw *= srcbytespp; + dw2 *= srcbytespp; + for (i = 0; i < tmp->height; i++, masterinfo->rows++) { + unsigned char *pdst, *psrc; + + psrc = bmp_rowptr_from_top(tmp, i); + pdst = bmp_rowptr_from_top(&masterinfo->bmp, masterinfo->rows); + memset(pdst, 255, dw); + pdst += dw; + memcpy(pdst, psrc, srcbytewidth); + pdst += srcbytewidth; + memset(pdst, 255, dw2); + } + +#ifdef HAVE_OCR + if (dst_ocr && nocr>1) + bmp_free(tmp); +#endif + if (go_full) + bmp_free(src1); +} + +/* + ** Spread words out in src and put into jbmp at scaling nocr + ** In case the text can't be expanded enough, + ** just=0 (left justify), 1 (center), 2 (right justify) + */ +static void bmp_fully_justify(WILLUSBITMAP *jbmp, WILLUSBITMAP *src, + int jbmpwidth, int whitethresh, int just) + +{ + BMPREGION srcregion; + BREAKINFO *colbreaks, _colbreaks; + WILLUSBITMAP gray; + int *gappos, *gapsize; + int i, srcbytespp, srcbytewidth, jbmpbytewidth, newwidth, destx0, ng; + static char *funcname = "bmp_fully_justify"; + + /* + { + char filename[256]; + count++; + sprintf(filename,"out%03d.png",count); + bmp_write(src,filename,stdout,100); + } + */ + /* Init/allocate destination bitmap */ + jbmp->width = jbmpwidth; + jbmp->height = src->height; + jbmp->bpp = src->bpp; + if (jbmp->bpp == 8) + for (i = 0; i < 256; i++) + jbmp->red[i] = jbmp->green[i] = jbmp->blue[i] = i; + bmp_alloc(jbmp); + + /* Find breaks in the text row */ + colbreaks = &_colbreaks; + colbreaks->textrow = NULL; + srcregion.bgcolor = whitethresh; + srcregion.c1 = 0; + srcregion.c2 = src->width - 1; + srcregion.r1 = 0; + srcregion.r2 = src->height - 1; + srcbytespp = src->bpp == 24 ? 3 : 1; + if (srcbytespp == 3) { + srcregion.bmp = src; + srcregion.bmp8 = &gray; + bmp_init(srcregion.bmp8); + bmp_convert_to_greyscale_ex(srcregion.bmp8, src); + } else { + srcregion.bmp = src; + srcregion.bmp8 = src; + } + breakinfo_alloc(103, colbreaks, src->width); + { + int *colcount, *rowcount; + + colcount = rowcount = NULL; + willus_dmem_alloc_warn(8, (void **) &colcount, + sizeof(int) * (src->width + src->height), funcname, 10); + rowcount = &colcount[src->width]; + bmpregion_one_row_find_breaks(&srcregion, colbreaks, colcount, rowcount, + 1); + willus_dmem_free(8, (double **) &colcount, funcname); + } + if (srcbytespp == 3) + bmp_free(srcregion.bmp8); + ng = colbreaks->n - 1; + gappos = NULL; + if (ng > 0) { + int maxsize, ms2, mingap, j; + + willus_dmem_alloc_warn(9, (void **) &gappos, (2 * sizeof(int)) * ng, + funcname, 10); + gapsize = &gappos[ng]; + for (i = 0; i < ng; i++) { + gappos[i] = colbreaks->textrow[i].c2 + 1; + gapsize[i] = colbreaks->textrow[i].gap; + } + + /* Take only the largest group of gaps */ + for (maxsize = i = 0; i < ng; i++) + if (maxsize < gapsize[i]) + maxsize = gapsize[i]; + mingap = srcregion.lcheight * word_spacing; + if (mingap < 2) + mingap = 2; + if (maxsize > mingap) + maxsize = mingap; + ms2 = maxsize / 2; + for (i = j = 0; i < ng; i++) + if (gapsize[i] > ms2) { + if (j != i) { + gapsize[j] = gapsize[i]; + gappos[j] = gappos[i]; + } + j++; + } + ng = j; + + /* Figure out total pixel expansion */ + newwidth = src->width * 1.25; + if (newwidth > jbmp->width) + newwidth = jbmp->width; + } else + newwidth = src->width; + breakinfo_free(103, colbreaks); + + /* Starting column in destination bitmap */ + if (just == 1) + destx0 = (jbmp->width - newwidth) / 2; + else if (just == 2) + destx0 = (jbmp->width - newwidth); + else + destx0 = 0; + + jbmpbytewidth = bmp_bytewidth(jbmp); + srcbytewidth = bmp_bytewidth(src); + + /* Clear entire fully justified bitmap */ + memset(bmp_rowptr_from_top(jbmp, 0), 255, jbmpbytewidth * jbmp->height); + + /* Spread out source pieces to fully justify them */ + for (i = 0; i <= ng; i++) { + int j, dx0, dx, sx0; + unsigned char *pdst, *psrc; + + dx = i < ng ? + (i > 0 ? gappos[i] - gappos[i - 1] : gappos[i] + 1) : + (i > 0 ? src->width - (gappos[i - 1] + 1) : src->width); + dx *= srcbytespp; + sx0 = i == 0 ? 0 : (gappos[i - 1] + 1); + dx0 = destx0 + sx0 + (i == 0 ? 0 : (newwidth - src->width) * i / ng); + psrc = bmp_rowptr_from_top(src, 0) + sx0 * srcbytespp; + pdst = bmp_rowptr_from_top(jbmp, 0) + dx0 * srcbytespp; + for (j = 0; j < src->height; j++, pdst += jbmpbytewidth, psrc += + srcbytewidth) + memcpy(pdst, psrc, dx); + } + if (gappos != NULL) + willus_dmem_free(9, (double **) &gappos, funcname); +} + +/* + ** flags&1 : trim c1 + ** flags&2 : trim c2 + ** flags&4 : trim r1 + ** flags&8 : trim r2 + ** flags&16 : Find rowbase, font size, etc. + ** + ** Row base is where row dist crosses 50% on r2 side. + ** Font size is where row dist crosses 5% on other side (r1 side). + ** Lowercase font size is where row dist crosses 50% on r1 side. + ** + ** For 12 pt font: + ** Single spacing is 14.66 pts (Calibri), 13.82 pts (Times), 13.81 pts (Arial) + ** Size of cap letter is 7.7 pts (Calibri), 8.1 pts (Times), 8.7 pts (Arial) + ** Size of small letter is 5.7 pts (Calibri), 5.6 pts (Times), 6.5 pts (Arial) + ** Mean line spacing = 1.15 - 1.22 (~1.16) + ** Mean cap height = 0.68 + ** Mean small letter height = 0.49 + ** + */ +static void bmpregion_trim_margins(BMPREGION *region, int *colcount0, + int *rowcount0, int flags) + +{ + int i, j, n; /* ,r1,r2,dr1,dr2,dr,vtrim,vspace; */ + int *colcount, *rowcount; + static char *funcname = "bmpregion_trim_margins"; + + /* To detect a hyphen, we need to trim and calc text base row */ + if (flags & 32) + flags |= 0x1f; + if (colcount0 == NULL) + willus_dmem_alloc_warn(10, (void **) &colcount, + sizeof(int) * (region->c2 + 1), funcname, 10); + else + colcount = colcount0; + if (rowcount0 == NULL) + willus_dmem_alloc_warn(11, (void **) &rowcount, + sizeof(int) * (region->r2 + 1), funcname, 10); + else + rowcount = rowcount0; + n = region->c2 - region->c1 + 1; + /* + printf("Trim: reg=(%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2); + if (region->c2+1 > cca || region->r2+1 > rca) + { + printf("A ha 0!\n"); + exit(10); + } + */ + memset(colcount, 0, (region->c2 + 1) * sizeof(int)); + memset(rowcount, 0, (region->r2 + 1) * sizeof(int)); + for (j = region->r1; j <= region->r2; j++) { + unsigned char *p; + p = bmp_rowptr_from_top(region->bmp8, j) + region->c1; + for (i = 0; i < n; i++, p++) + if (p[0] < region->bgcolor) { + rowcount[j]++; + colcount[i + region->c1]++; + } + } + /* + ** Trim excess margins + */ + if (flags & 1) + trim_to(colcount, ®ion->c1, region->c2, + src_left_to_right ? 2.0 : 4.0); + if (flags & 2) + trim_to(colcount, ®ion->c2, region->c1, + src_left_to_right ? 4.0 : 2.0); + if (colcount0 == NULL) + willus_dmem_free(10, (double **) &colcount, funcname); + if (flags & 4) + trim_to(rowcount, ®ion->r1, region->r2, 4.0); + if (flags & 8) + trim_to(rowcount, ®ion->r2, region->r1, 4.0); + if (flags & 16) { + int maxcount, mc2, h2; + double f; + + maxcount = 0; + for (i = region->r1; i <= region->r2; i++) + if (rowcount[i] > maxcount) + maxcount = rowcount[i]; + mc2 = maxcount / 2; + for (i = region->r2; i >= region->r1; i--) + if (rowcount[i] > mc2) + break; + region->rowbase = i; + for (i = region->r1; i <= region->r2; i++) + if (rowcount[i] > mc2) + break; + region->h5050 = region->lcheight = region->rowbase - i + 1; + mc2 = maxcount / 20; + for (i = region->r1; i <= region->r2; i++) + if (rowcount[i] > mc2) + break; + region->capheight = region->rowbase - i + 1; + /* + ** Sanity check capheight and lcheight + */ + h2 = height2_calc(&rowcount[region->r1], region->r2 - region->r1 + 1); +#if (WILLUSDEBUGX & 8) + if (region->c2-region->c1 > 1500) + printf("reg %d x %d (%d,%d) - (%d,%d) h2=%d ch/h2=%g\n",region->c2-region->c1+1,region->r2-region->r1+1,region->c1,region->r1,region->c2,region->r2,h2,(double)region->capheight/h2); +#endif + if (region->capheight < h2 * 0.75) + region->capheight = h2; + f = (double) region->lcheight / region->capheight; + if (f < 0.55) + region->lcheight = (int) (0.72 * region->capheight + .5); + else if (f > 0.85) + region->lcheight = (int) (0.72 * region->capheight + .5); +#if (WILLUSDEBUGX & 8) + if (region->c2-region->c1 > 1500) + printf(" lcheight final = %d\n",region->lcheight); +#endif +#if (WILLUSDEBUGX & 10) + if (region->c2-region->c1 > 1500 && region->r2-region->r1 < 100) + { + static int append=0; + FILE *f; + int i; + f=fopen("textrows.ep",append==0?"w":"a"); + append=1; + for (i=region->r1;i<=region->r2;i++) + fprintf(f,"%d %g\n",region->rowbase-i,(double)rowcount[i]/maxcount); + fprintf(f,"//nc\n"); + fclose(f); + } +#endif + } else { + region->h5050 = region->r2 - region->r1 + 1; + region->capheight = 0.68 * (region->r2 - region->r1 + 1); + region->lcheight = 0.5 * (region->r2 - region->r1 + 1); + region->rowbase = region->r2; + } +#if (WILLUSDEBUGX & 2) + printf("trim:\n reg->c1=%d, reg->c2=%d\n",region->c1,region->c2); + printf(" reg->r1=%d, reg->r2=%d, reg->rowbase=%d\n\n",region->r1,region->r2,region->rowbase); +#endif + if (rowcount0 == NULL) + willus_dmem_free(11, (double **) &rowcount, funcname); +} + +/* + ** Does region end in a hyphen? If so, fill in HYPHENINFO structure. + */ +static void bmpregion_hyphen_detect(BMPREGION *region) + +{ + int i, j; /* ,r1,r2,dr1,dr2,dr,vtrim,vspace; */ + int width; + int *r0, *r1, *r2, *r3; + int rmin, rmax, rowbytes, nrmid, rsum; + int cstart, cend, cdir; + unsigned char *p; + static char *funcname = "bmpregion_hyphen_detect"; + +#if (WILLUSDEBUGX & 16) + static int count=0; + char pngfile[256]; + FILE *out; + + count++; + printf("@bmpregion_hyphen_detect count=%d\n",count); + sprintf(pngfile,"word%04d.png",count); + bmpregion_write(region,pngfile); + sprintf(pngfile,"word%04d.txt",count); + out=fopen(pngfile,"w"); + fprintf(out,"c1=%d, c2=%d, r1=%d, r2=%d\n",region->c1,region->c2,region->r1,region->r2); + fprintf(out,"lcheight=%d\n",region->lcheight); +#endif + + region->hyphen.ch = -1; + region->hyphen.c2 = -1; + if (!k2_hyphen_detect) + return; + width = region->c2 - region->c1 + 1; + if (width < 2) + return; + willus_dmem_alloc_warn(27, (void **) &r0, sizeof(int) * 4 * width, funcname, + 10); + r1 = &r0[width]; + r2 = &r1[width]; + r3 = &r2[width]; + for (i = 0; i < width; i++) + r0[i] = r1[i] = r2[i] = r3[i] = -1; + rmin = region->rowbase - region->capheight - region->lcheight * .04; + if (rmin < region->r1) + rmin = region->r1; + rmax = region->rowbase + region->lcheight * .04; + if (rmax > region->r2) + rmax = region->r2; + rowbytes = bmp_bytewidth(region->bmp8); + p = bmp_rowptr_from_top(region->bmp8, 0); + nrmid = rsum = 0; + if (src_left_to_right) { + cstart = region->c2; + cend = region->c1 - 1; + cdir = -1; + } else { + cstart = region->c1; + cend = region->c2 + 1; + cdir = 1; + } +#if (WILLUSDEBUGX & 16) + fprintf(out," j r0 r1 r2 r3\n"); +#endif + for (j = cstart; j != cend; j += cdir) { + int r, rmid, dr, drmax; + +// printf("j=%d\n",j); + rmid = (rmin + rmax) / 2; +// printf(" rmid=%d\n",rmid); + drmax = region->r2 + 1 - rmid > rmid - region->r1 + 1 ? + region->r2 + 1 - rmid : rmid - region->r1 + 1; + /* Find dark region closest to center line */ + for (dr = 0; dr < drmax; dr++) { + if (rmid + dr <= region->r2 + && p[(rmid + dr) * rowbytes + j] < region->bgcolor) + break; + if (rmid - dr >= region->r1 + && p[(rmid - dr) * rowbytes + j] < region->bgcolor) { + dr = -dr; + break; + } + } +#if (WILLUSDEBUGX & 16) + fprintf(out," dr=%d/%d, rmid+dr=%d, rmin=%d, rmax=%d, nrmid=%d\n",dr,drmax,rmid+dr,rmin,rmax,nrmid); +#endif + /* No dark detected or mark is outside hyphen region? */ + /* Termination criterion #1 */ + if (dr >= drmax + || (nrmid > 2 && (double) nrmid / region->lcheight > .1 + && (rmid + dr < rmin || rmid + dr > rmax))) { + if (region->hyphen.ch >= 0 && dr >= drmax) + continue; + if (nrmid > 2 && (double) nrmid / region->lcheight > .35) { + region->hyphen.ch = j - cdir; + region->hyphen.r1 = rmin; + region->hyphen.r2 = rmax; + } + if (dr < drmax) { + region->hyphen.c2 = j; + break; + } + continue; + } + if (region->hyphen.ch >= 0) { + region->hyphen.c2 = j; + break; + } + nrmid++; + rmid += dr; + /* Dark spot is outside expected hyphen area */ + /* + if (rmidrmax) + { + if (nrmid>0) + break; + continue; + } + */ + for (r = rmid; r >= region->r1; r--) + if (p[r * rowbytes + j] >= region->bgcolor) + break; + r1[j - region->c1] = r + 1; + r0[j - region->c1] = -1; + if (r >= region->r1) { + for (; r >= region->r1; r--) + if (p[r * rowbytes + j] < region->bgcolor) + break; + if (r >= region->r1) + r0[j - region->c1] = r; + } + for (r = rmid; r <= region->r2; r++) + if (p[r * rowbytes + j] >= region->bgcolor) + break; + r2[j - region->c1] = r - 1; + r3[j - region->c1] = -1; + if (r <= region->r2) { + for (; r <= region->r2; r++) + if (p[r * rowbytes + j] < region->bgcolor) + break; + if (r <= region->r2) + r3[j - region->c1] = r; + } +#if (WILLUSDEBUGX & 16) + fprintf(out," %4d %4d %4d %4d %4d\n",j,r0[j-region->c1],r1[j-region->c1],r2[j-region->c1],r3[j-region->c1]); +#endif + if (region->hyphen.c2 < 0 + && (r0[j - region->c1] >= 0 || r3[j - region->c1] >= 0)) + region->hyphen.c2 = j; + /* Termination criterion #2 */ + if (nrmid > 2 && (double) nrmid / region->lcheight > .35 + && (r1[j - region->c1] > rmax || r2[j - region->c1] < rmin)) { + region->hyphen.ch = j - cdir; + region->hyphen.r1 = rmin; + region->hyphen.r2 = rmax; + if (region->hyphen.c2 < 0) + region->hyphen.c2 = j; + break; + } + // rc=(r1[j-region->c1]+r2[j-region->c1])/2; + /* DQ possible hyphen if r1/r2 out of range */ + if (nrmid > 1) { + /* Too far away from last values? */ + if ((double) (rmin - r1[j - region->c1]) / region->lcheight > .1 + || (double) (r2[j - region->c1] - rmax) / region->lcheight + > .1) + break; + if ((double) nrmid / region->lcheight > .1 && nrmid > 1) { + if ((double) fabs(rmin - r1[j - region->c1]) / region->lcheight + > .1 + || (double) (rmax - r2[j - region->c1]) + / region->lcheight > .1) + break; + } + } + if (nrmid == 1 || r1[j - region->c1] < rmin) + rmin = r1[j - region->c1]; + if (nrmid == 1 || r2[j - region->c1] > rmax) + rmax = r2[j - region->c1]; + if ((double) nrmid / region->lcheight > .1 && nrmid > 1) { + double rmean; + + /* Can't be too thick */ + if ((double) (rmax - rmin) / region->lcheight > .55 + || (double) (rmax - rmin) / region->lcheight < .08) + break; + /* Must be reasonably well centered above baseline */ + rmean = (double) (rmax + rmin) / 2; + if ((double) (region->rowbase - rmean) / region->lcheight < 0.35 + || (double) (region->rowbase - rmean) / region->lcheight + > 0.85) + break; + if ((double) (region->rowbase - rmax) / region->lcheight < 0.2 + || (double) (region->rowbase - rmin) / region->lcheight + > 0.92) + break; + } + } +#if (WILLUSDEBUGX & 16) + fprintf(out," ch=%d, c2=%d, r1=%d, r2=%d\n",region->hyphen.ch,region->hyphen.c2,region->hyphen.r1,region->hyphen.r2); + fclose(out); +#endif + /* More sanity checks--better to miss a hyphen than falsely detect it. */ + if (region->hyphen.ch >= 0) { + double ar; + /* If it's only a hyphen, then it's probably actually a dash--don't detect it. */ + if (region->hyphen.c2 < 0) + region->hyphen.ch = -1; + /* Check aspect ratio */ + ar = (double) (region->hyphen.r2 - region->hyphen.r1) / nrmid; + if (ar < 0.08 || ar > 0.75) + region->hyphen.ch = -1; + } + willus_dmem_free(27, (double **) &r0, funcname); +#if (WILLUSDEBUGX & 16) + if (region->hyphen.ch>=0) + printf("\n\n GOT HYPHEN.\n\n"); + printf(" Exiting bmpregion_hyphen_detect\n"); +#endif +} + +#if (defined(WILLUSDEBUGX) || defined(WILLUSDEBUG)) +static void bmpregion_write(BMPREGION *region,char *filename) + +{ + int i,bpp; + WILLUSBITMAP *bmp,_bmp; + + bmp=&_bmp; + bmp_init(bmp); + bmp->width=region->c2-region->c1+1; + bmp->height=region->r2-region->r1+1; + bmp->bpp=region->bmp->bpp; + bpp=bmp->bpp==8?1:3; + bmp_alloc(bmp); + for (i=0;i<256;i++) + bmp->red[i]=bmp->green[i]=bmp->blue[i]=i; + for (i=0;iheight;i++) + { + unsigned char *s,*d; + s=bmp_rowptr_from_top(region->bmp,region->r1+i)+region->c1*bpp; + d=bmp_rowptr_from_top(bmp,i); + memcpy(d,s,bmp->width*bpp); + } + bmp_write(bmp,filename,stdout,97); + bmp_free(bmp); +} +#endif + +#if (WILLUSDEBUGX & 6) +static void breakinfo_echo(BREAKINFO *breakinfo) + +{ + int i; + printf("@breakinfo_echo...\n"); + for (i=0;in;i++) + printf(" %2d. r1=%4d, rowbase=%4d, r2=%4d, c1=%4d, c2=%4d\n", + i+1,breakinfo->textrow[i].r1, + breakinfo->textrow[i].rowbase, + breakinfo->textrow[i].r2, + breakinfo->textrow[i].c1, + breakinfo->textrow[i].c2); +} +#endif + +/* + ** Calculate weighted height of a rectangular region. + ** This weighted height is intended to be close to the height of + ** a capital letter, or the height of the majority of the region. + ** + */ +static int height2_calc(int *rc, int n) + +{ + int i, thresh, i1, h2; + int *c; + static char *funcname = "height2_calc"; +#if (WILLUSDEBUGX & 8) + int cmax; +#endif + + if (n <= 0) + return (1); + willus_dmem_alloc_warn(12, (void **) &c, sizeof(int) * n, funcname, 10); + memcpy(c, rc, n * sizeof(int)); + sorti(c, n); +#if (WILLUSDEBUGX & 8) + cmax=c[n-1]; +#endif + for (i = 0; i < n - 1 && c[i] == 0; i++) + ; + thresh = c[(i + n) / 3]; + willus_dmem_free(12, (double **) &c, funcname); + for (i = 0; i < n - 1; i++) + if (rc[i] >= thresh) + break; + i1 = i; + for (i = n - 1; i > i1; i--) + if (rc[i] >= thresh) + break; +#if (WILLUSDEBUGX & 8) +// printf("thresh = %g, i1=%d, i2=%d\n",(double)thresh/cmax,i1,i); +#endif + h2 = i - i1 + 1; /* Guaranteed to be >=1 */ + return (h2); +} + +static void trim_to(int *count, int *i1, int i2, double gaplen) + +{ + int del, dcount, igaplen, clevel, dlevel, defect_start, last_defect; + + igaplen = (int) (gaplen * src_dpi / 72.); + if (igaplen < 1) + igaplen = 1; + /* clevel=(int)(defect_size_pts*src_dpi/72./3.); */ + clevel = 0; + dlevel = (int) (pow(defect_size_pts * src_dpi / 72., 2.) * PI / 4. + .5); + del = i2 > (*i1) ? 1 : -1; + defect_start = -1; + last_defect = -1; + dcount = 0; + for (; (*i1) != i2; (*i1) = (*i1) + del) { + if (count[(*i1)] <= clevel) { + dcount = 0; /* Reset defect size */ + continue; + } + /* Mark found */ + if (dcount == 0) { + if (defect_start >= 0) + last_defect = defect_start; + defect_start = (*i1); + } + dcount += count[(*i1)]; + if (dcount >= dlevel) { + if (last_defect >= 0 && abs(defect_start - last_defect) <= igaplen) + (*i1) = last_defect; + else + (*i1) = defect_start; + return; + } + } + if (defect_start < 0) + return; + if (last_defect < 0) { + (*i1) = defect_start; + return; + } + if (abs(defect_start - last_defect) <= igaplen) + (*i1) = last_defect; + else + (*i1) = defect_start; +} + +/* + ** A region that needs its line spacing and justification analyzed. + ** + ** The region may be wider than the max desirable region width. + ** + ** Input: breakinfo should be valid row-break information for the region. + ** + ** Calls bmpregion_one_row_wrap_and_add() for each text row from the + ** breakinfo structure that is within the region. + ** + */ +static void bmpregion_analyze_justification_and_line_spacing(BMPREGION *region, + BREAKINFO *breakinfo, MASTERINFO *masterinfo, int *colcount, + int *rowcount, PAGEINFO *pageinfo, int allow_text_wrapping, + double force_scale) + +{ + int i, i1, i2, ntr, mean_row_gap, maxgap, line_spacing, nls, nch; + BMPREGION *newregion, _newregion; + double *id, *c1, *c2, *ch, *lch, *ls; + int *just, *indented, *short_line; + double capheight, lcheight, fontsize; + int textheight, ragged_right, src_line_spacing; + static char *funcname = "bmpregion_analyze_justification_and_line_spacing"; + +#if (WILLUSDEBUGX & 1) + printf("@bmpregion_analyze_justification_and_line_spacing"); + printf(" (%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2); + printf(" centering = %d\n",breakinfo->centered); +#endif +#if (WILLUSDEBUGX & 2) + breakinfo_echo(breakinfo); +#endif + + /* Locate the vertical part indices in the breakinfo structure */ + newregion = &_newregion; + breakinfo_sort_by_row_position(breakinfo); + for (i = 0; i < breakinfo->n; i++) { + TEXTROW *textrow; + textrow = &breakinfo->textrow[i]; + if ((textrow->r1 + textrow->r2) / 2 >= region->r1) + break; + } + if (i >= breakinfo->n) + return; + i1 = i; + for (; i < breakinfo->n; i++) { + TEXTROW *textrow; + textrow = &breakinfo->textrow[i]; + if ((textrow->r1 + textrow->r2) / 2 > region->r2) + break; + } + i2 = i - 1; + if (i2 < i1) + return; + ntr = i2 - i1 + 1; +#if (WILLUSDEBUGX & 1) + printf(" i1=%d, i2=%d, ntr=%d\n",i1,i2,ntr); +#endif + + willus_dmem_alloc_warn(13, (void **) &c1, sizeof(double) * 6 * ntr, + funcname, 10); + willus_dmem_alloc_warn(14, (void **) &just, sizeof(int) * 3 * ntr, funcname, + 10); + c2 = &c1[ntr]; + ch = &c2[ntr]; + lch = &ch[ntr]; + ls = &lch[ntr]; + id = &ls[ntr]; + indented = &just[ntr]; + short_line = &indented[ntr]; + for (i = 0; i < ntr; i++) + id[i] = i; + + /* Find baselines / font size */ + capheight = lcheight = 0.; + maxgap = -1; + for (nch = nls = 0, i = i1; i <= i2; i++) { + TEXTROW *textrow; + double ar, rh; + int marking_flags; + + textrow = &breakinfo->textrow[i]; + c1[i - i1] = (double) textrow->c1; + c2[i - i1] = (double) textrow->c2; + if (i < i2 && maxgap < textrow->gap) { + maxgap = textrow->gap; + if (maxgap < 2) + maxgap = 2; + } + if (textrow->c2 < textrow->c1) + ar = 100.; + else + ar = (double) (textrow->r2 - textrow->r1 + 1) + / (double) (textrow->c2 - textrow->c1 + 1); + rh = (double) (textrow->r2 - textrow->r1 + 1) / src_dpi; + if (i < i2 && ar <= no_wrap_ar_limit + && rh <= no_wrap_height_limit_inches) + ls[nls++] = breakinfo->textrow[i + 1].r1 - textrow->r1; + if (ar <= no_wrap_ar_limit && rh <= no_wrap_height_limit_inches) { + ch[nch] = textrow->capheight; + lch[nch] = textrow->lcheight; + nch++; + } + + /* Mark region w/gray, mark rowbase also */ + marking_flags = (i == i1 ? 0 : 1) | (i == i2 ? 0 : 2); + if (i < i2 || textrow->r2 - textrow->rowbase > 1) + marking_flags |= 0x10; + (*newregion) = (*region); + newregion->r1 = textrow->r1; + newregion->r2 = textrow->r2; + newregion->c1 = textrow->c1; + newregion->c2 = textrow->c2; + newregion->rowbase = textrow->rowbase; + mark_source_page(newregion, 5, marking_flags); +#if (WILLUSDEBUGX & 1) + printf(" Row %2d: (%4d,%4d) - (%4d,%4d) rowbase=%4d, lch=%d, h5050=%d, rh=%d\n",i-i1+1,textrow->c1,textrow->r1,textrow->c2,textrow->r2,textrow->rowbase,textrow->lcheight,textrow->h5050,textrow->rowheight); +#endif + } + wrapbmp_set_maxgap(maxgap); + if (nch < 1) + capheight = lcheight = 2; // Err on the side of too small + else { + capheight = median_val(ch, nch); + lcheight = median_val(lch, nch); + } +// printf("capheight = %g, lcheight = %g\n",capheight,lcheight); + bmpregion_is_centered(region, breakinfo, i1, i2, &textheight); + /* + ** For 12 pt font: + ** Single spacing is 14.66 pts (Calibri), 13.82 pts (Times), 13.81 pts (Arial) + ** Size of cap letter is 7.7 pts (Calibri), 8.1 pts (Times), 8.7 pts (Arial) + ** Size of small letter is 5.7 pts (Calibri), 5.6 pts (Times), 6.5 pts (Arial) + ** Mean line spacing = 1.15 - 1.22 (~1.16) + ** Mean cap height = 0.68 + ** Mean small letter height = 0.49 + */ + fontsize = (capheight + lcheight) / 1.17; +// printf("font size = %g pts.\n",(fontsize/src_dpi)*72.); + /* + ** Set line spacing for this region + */ + if (nls > 0) + src_line_spacing = median_val(ls, nls); + else + src_line_spacing = fontsize * 1.2; + if (vertical_line_spacing < 0 + && src_line_spacing + <= fabs(vertical_line_spacing) * fontsize * 1.16) + line_spacing = src_line_spacing; + else + line_spacing = fabs(vertical_line_spacing) * fontsize * 1.16; +#if (WILLUSDEBUGX & 1) + printf(" font size = %.2f pts = %d pixels\n",(fontsize/src_dpi)*72.,(int)(fontsize+.5)); + printf(" src_line_spacing = %d, line_spacing = %d\n",src_line_spacing,line_spacing); +#endif + /* + if (ntr==1) + rheight= (int)((breakinfo->textrow[i1].r2 - breakinfo->textrow[i1].r1)*1.25+.5); + else + rheight = (int)((double)(breakinfo->textrow[i2].rowbase - breakinfo->textrow[i1].rowbase)/(ntr-1)+.5); + */ + mean_row_gap = line_spacing - textheight; + if (mean_row_gap <= 1) + mean_row_gap = 1; + + /* Try to figure out if we have a ragged right edge */ + if (ntr < 3) + ragged_right = 1; + else { + int flushcount; + + if (src_left_to_right) { + for (flushcount = i = 0; i < ntr; i++) { +#if (WILLUSDEBUGX & 1) + printf(" flush_factors[%d] = %g (<.5), %g in (<.1)\n", + i,(double)(region->c2-c2[i])/textheight,(double)(region->c2-c2[i])/src_dpi); +#endif + if ((double) (region->c2 - c2[i]) / textheight < 0.5 + && (double) (region->c2 - c2[i]) / src_dpi < 0.1) + flushcount++; + } + } else { + for (flushcount = i = 0; i < ntr; i++) { +#if (WILLUSDEBUGX & 1) + printf(" flush_factors[%d] = %g (<.5), %g in (<.1)\n", + i,(double)(c1[i]-region->c1)/textheight,(double)(c1[i]-region->c1)/src_dpi); +#endif + if ((double) (c1[i] - region->c1) / textheight < 0.5 + && (double) (c1[i] - region->c1) / src_dpi < 0.1) + flushcount++; + } + } + ragged_right = (flushcount <= ntr / 2); + /* + if (src_left_to_right) + { + sortxyd(c2,id,ntr); + del = region->c2 - c2[ntr-1-ntr/3]; + sortxyd(id,c2,ntr); + } + else + { + sortxyd(c1,id,ntr); + del = c1[ntr/3] - region->c1; + sortxyd(id,c1,ntr); + } + del /= textheight; + printf("del=%g\n",del); + ragged_right = (del > 0.5); + */ + } +#if (WILLUSDEBUGX & 1) + printf("ragged_right=%d\n",ragged_right); +#endif + + /* Store justification and other info line by line */ + for (i = i1; i <= i2; i++) { + double indent1, del; + double i1f, ilfi, i2f, ilf, ifmin, dif; + int centered; + + TEXTROW *textrow; + textrow = &breakinfo->textrow[i]; + i1f = (double) (c1[i - i1] - region->c1) + / (region->c2 - region->c1 + 1); + i2f = (double) (region->c2 - c2[i - i1]) + / (region->c2 - region->c1 + 1); + ilf = src_left_to_right ? i1f : i2f; + ilfi = ilf * (region->c2 - region->c1 + 1) / src_dpi; /* Indent in inches */ + ifmin = i1f < i2f ? i1f : i2f; + dif = fabs(i1f - i2f); + if (ifmin < .01) + ifmin = 0.01; + if (src_left_to_right) + indent1 = (double) (c1[i - i1] - region->c1) / textheight; + else + indent1 = (double) (region->c2 - c2[i - i1]) / textheight; +// printf(" row %2d: indent1=%g\n",i-i1,indent1); + if (!breakinfo->centered) { + indented[i - i1] = (indent1 > 0.5 && ilfi < 1.2 && ilf < .25); + centered = + (!indented[i - i1] && indent1 > 1.0 && dif / ifmin < 0.5); + } else { + centered = (dif < 0.1 || dif / ifmin < 0.5); + indented[i - i1] = (indent1 > 0.5 && ilfi < 1.2 && ilf < .25 + && !centered); + } +#if (WILLUSDEBUGX & 1) + printf("Indent %d: %d. indent1=%g, ilf=%g, centered=%d\n",i-i1+1,indented[i-i1],indent1,ilf,centered); + printf(" indent1=%g, i1f=%g, i2f=%g\n",indent1,i1f,i2f); +#endif + if (centered) + just[i - i1] = 4; + else { + /* + ** The .01 favors left justification over right justification in + ** close cases. + */ + if (src_left_to_right) + just[i - i1] = indented[i - i1] || (i1f < i2f + .01) ? 0 : 8; + else + just[i - i1] = indented[i - i1] || (i2f < i1f + .01) ? 8 : 0; + } + if (src_left_to_right) + del = (double) (region->c2 - textrow->c2); + else + del = (double) (textrow->c1 - region->c1); + /* Should we keep wrapping after this line? */ + if (!ragged_right) + short_line[i - i1] = (del / textheight > 0.5); + else + short_line[i - i1] = (del / (region->c2 - region->c1) > 0.25); + /* If this row is a bigger/smaller row (font) than the next row, don't wrap. */ + if (!short_line[i - i1] && i < i2) { + TEXTROW *t1; + t1 = &breakinfo->textrow[i + 1]; + if ((textrow->h5050 > t1->h5050 * 1.5 + || textrow->h5050 * 1.5 < t1->h5050) + && (i == 0 + || (i > 0 + && (textrow->rowheight > t1->rowheight * 1.5 + || textrow->rowheight * 1.5 + < t1->rowheight)))) + short_line[i - i1] = 1; + } + if (!ragged_right) + just[i - i1] |= 0x40; +#if (WILLUSDEBUGX & 1) + printf(" just[%d]=0x%02X, shortline[%d]=%d\n",i-i1,just[i-i1],i-i1,short_line[i-i1]); + printf(" textrow->c2=%d, region->c2=%d, del=%g, textheight=%d\n",textrow->c2,region->c2,del,textheight); +#endif + /* If short line, it should still be fully justified if it is wrapped. */ + /* + if (short_line[i-i1]) + just[i-i1] = (just[i-i1]&0xf)|0x60; + */ + } + /* + { + double mean1,mean2,stdev1,stdev2; + array_mean(c1,ntr,&mean1,&stdev1); + array_mean(c2,ntr,&mean2,&stdev2); + printf("Mean c1, c2 = %g, %g; stddevs = %g, %g\n",mean1,mean2,stdev1,stdev2); + printf("textheight = %d, line_spacing = %d\n",textheight,line_spacing); + } + */ + for (i = i1; i <= i2; i++) { + TEXTROW *textrow; + int justflags, trimflags, centered, marking_flags, gap; + +#if (WILLUSDEBUGX & 1) + aprintf("Row " ANSI_YELLOW "%d of %d" ANSI_NORMAL " (wrap=%d)\n",i-i1+1,i2-i1+1,allow_text_wrapping); +#endif + textrow = &breakinfo->textrow[i]; + (*newregion) = (*region); + newregion->r1 = textrow->r1; + newregion->r2 = textrow->r2; + + /* The |3 tells it to use the user settings for left/right/center */ + justflags = just[i - i1] | 0x3; + centered = ((justflags & 0xc) == 4); +#if (WILLUSDEBUGX & 1) + printf(" justflags[%d]=0x%2X, centered=%d, indented=%d\n",i-i1,justflags,centered,indented[i-i1]); +#endif + if (allow_text_wrapping) { + /* If this line is indented or if the justification has changed, */ + /* then start a new line. */ + if (centered || indented[i - i1] + || (i > i1 + && (just[i - i1] & 0xc) != (just[i - i1 - 1] & 0xc))) { +#ifdef WILLUSDEBUG + printf("wrapflush4\n"); +#endif + wrapbmp_flush(masterinfo, 0, pageinfo, 1); + } +#ifdef WILLUSDEBUG + printf(" c1=%d, c2=%d\n",newregion->c1,newregion->c2); +#endif + marking_flags = 0xc | (i == i1 ? 0 : 1) | (i == i2 ? 0 : 2); + bmpregion_one_row_wrap_and_add(newregion, breakinfo, i, i1, i2, + masterinfo, justflags, colcount, rowcount, pageinfo, + line_spacing, mean_row_gap, textrow->rowbase, marking_flags, + indented[i - i1]); + if (centered || short_line[i - i1]) { +#ifdef WILLUSDEBUG + printf("wrapflush5\n"); +#endif + wrapbmp_flush(masterinfo, 0, pageinfo, 2); + } + continue; + } +#ifdef WILLUSDEBUG + printf("wrapflush5a\n"); +#endif + wrapbmp_flush(masterinfo, 0, pageinfo, 1); + /* If default justifications, ignore all analysis and just center it. */ + if (dst_justify < 0 && dst_fulljustify < 0) { + newregion->c1 = region->c1; + newregion->c2 = region->c2; + justflags = 0xad; /* Force centered region, no justification */ + trimflags = 0x80; + } else + trimflags = 0; + /* No wrapping: text wrap, trim flags, vert breaks, fscale, just */ + bmpregion_add(newregion, breakinfo, masterinfo, 0, trimflags, 0, + force_scale, justflags, 5, colcount, rowcount, pageinfo, 0, + textrow->r2 - textrow->rowbase); + if (vertical_line_spacing < 0) { + int gap1; + gap1 = line_spacing - (textrow->r2 - textrow->r1 + 1); + if (i < i2) + gap = textrow->gap > gap1 ? gap1 : textrow->gap; + else { + gap = textrow->rowheight + - (textrow->rowbase + last_rowbase_internal); + if (gap < mean_row_gap / 2.) + gap = mean_row_gap; + } + } else { + gap = line_spacing - (textrow->r2 - textrow->r1 + 1); + if (gap < mean_row_gap / 2.) + gap = mean_row_gap; + } + if (i < i2) + dst_add_gap_src_pixels("No-wrap line", masterinfo, gap); + else { + last_h5050_internal = textrow->h5050; + beginning_gap_internal = gap; + } + } + willus_dmem_free(14, (double **) &just, funcname); + willus_dmem_free(13, (double **) &c1, funcname); +#ifdef WILLUSDEBUG + printf("Done wrap_and_add.\n"); +#endif +} + +static int bmpregion_is_centered(BMPREGION *region, BREAKINFO *breakinfo, + int i1, int i2, int *th) + +{ + int j, i, cc, n1, ntr; + int textheight; + +#if (WILLUSDEBUGX & 1) + printf("@bmpregion_is_centered: region=(%d,%d) - (%d,%d)\n",region->c1,region->r1,region->c2,region->r2); + printf(" nrows = %d\n",i2-i1+1); +#endif + ntr = i2 - i1 + 1; + for (j = 0; j < 3; j++) { + for (n1 = textheight = 0, i = i1; i <= i2; i++) { + TEXTROW *textrow; + double ar, rh; + + textrow = &breakinfo->textrow[i]; + if (textrow->c2 < textrow->c1) + ar = 100.; + else + ar = (double) (textrow->r2 - textrow->r1 + 1) + / (double) (textrow->c2 - textrow->c1 + 1); + rh = (double) (textrow->r2 - textrow->r1 + 1) / src_dpi; + if (j == 2 || (j >= 1 && rh <= no_wrap_height_limit_inches) + || (j == 0 && rh <= no_wrap_height_limit_inches + && ar <= no_wrap_ar_limit)) { + textheight += textrow->rowbase - textrow->r1 + 1; + n1++; + } + } + if (n1 > 0) + break; + } + textheight = (int) ((double) textheight / n1 + .5); + if (th != NULL) { + (*th) = textheight; +#if (WILLUSDEBUGX & 1) + printf(" textheight assigned (%d)\n",textheight); +#endif + return (breakinfo->centered); + } + + /* + ** Does region appear to be centered? + */ + for (cc = 0, i = i1; i <= i2; i++) { + double indent1, indent2; + +#if (WILLUSDEBUGX & 1) + printf(" tr[%d].c1,c2 = %d, %d\n",i,breakinfo->textrow[i].c1,breakinfo->textrow[i].c2); +#endif + indent1 = (double) (breakinfo->textrow[i].c1 - region->c1) / textheight; + indent2 = (double) (region->c2 - breakinfo->textrow[i].c2) / textheight; +#if (WILLUSDEBUGX & 1) + printf(" tr[%d].indent1,2 = %g, %g\n",i,indent1,indent2); +#endif + /* If only one line and it spans the entire region, call it centered */ + /* Sometimes this won't be the right thing to to. */ + if (i1 == i2 && indent1 < .5 && indent2 < .5) { +#if (WILLUSDEBUGX & 1) + printf(" One line default to bigger region (%s).\n",breakinfo->centered?"not centered":"centered"); +#endif + return (1); + } + if (fabs(indent1 - indent2) > 1.5) { +#if (WILLUSDEBUGX & 1) + printf(" Region not centered.\n"); +#endif + return (0); + } + if (indent1 > 1.0) + cc++; + } +#if (WILLUSDEBUGX & 1) + printf("Region centering: i=%d, i2=%d, cc=%d, ntr=%d\n",i,i2,cc,ntr); +#endif + if (cc > ntr / 2) { +#if (WILLUSDEBUGX & 1) + printf(" Region is centered (enough obviously centered lines).\n"); +#endif + return (1); + } +#if (WILLUSDEBUGX & 1) + printf(" Not centered (not enough obviously centered lines).\n"); +#endif + return (0); +} + +/* array.c */ +/* + ** + ** Compute mean and standard deviation + ** + */ +double array_mean(double *a, int n, double *mean, double *stddev) + +{ + int i; + double sum, avg, sum_sq; + + if (n < 1) + return (0.); + for (sum = sum_sq = i = 0; i < n; i++) + sum += a[i]; + avg = sum / n; + if (mean != NULL) + (*mean) = avg; + if (stddev != NULL) { + double sum_sq; + + for (sum_sq = i = 0; i < n; i++) + sum_sq += (a[i] - avg) * (a[i] - avg); + (*stddev) = sqrt(sum_sq / n); + } + return (avg); +} + +/* + ** CAUTION: This function re-orders the x[] array! + */ +static double median_val(double *x, int n) + +{ + int i1, n1; + + if (n < 4) + return (array_mean(x, n, NULL, NULL)); + sortd(x, n); + if (n == 4) { + n1 = 2; + i1 = 1; + } else if (n == 5) { + n1 = 3; + i1 = 1; + } else { + n1 = n / 3; + i1 = (n - n1) / 2; + } + return (array_mean(&x[i1], n1, NULL, NULL)); +} + +/* + ** + ** Searches the region for vertical break points and stores them into + ** the BREAKINFO structure. + ** + ** apsize_in = averaging aperture size in inches. Use -1 for dynamic aperture. + ** + */ +static void bmpregion_find_vertical_breaks(BMPREGION *region, + BREAKINFO *breakinfo, int *colcount, int *rowcount, double apsize_in) + +{ + static char *funcname = "bmpregion_find_vertical_breaks"; + int nr, i, brc, brcmin, dtrc, trc, aperture, aperturemax, figrow, labelrow; + int ntr, rhmin_pix; + BMPREGION *newregion, _newregion; + int *rowthresh; + double min_fig_height, max_fig_gap, max_label_height; + + min_fig_height = dst_min_figure_height_in; + max_fig_gap = 0.16; + max_label_height = 0.5; + /* Trim region and populate colcount/rowcount arrays */ + bmpregion_trim_margins(region, colcount, rowcount, 0xf); + newregion = &_newregion; + (*newregion) = (*region); + if (debug) + printf("@bmpregion_find_vertical_breaks: (%d,%d) - (%d,%d)\n", + region->c1, region->r1, region->c2, region->r2); + /* + ** brc = consecutive blank pixel rows + ** trc = consecutive non-blank pixel rows + ** dtrc = number of non blank pixel rows since last dump + */ + nr = region->r2 - region->r1 + 1; + willus_dmem_alloc_warn(15, (void **) &rowthresh, sizeof(int) * nr, funcname, + 10); + brcmin = max_vertical_gap_inches * src_dpi; + aperturemax = (int) (src_dpi / 72. + .5); + if (aperturemax < 2) + aperturemax = 2; + aperture = (int) (src_dpi * apsize_in + .5); + /* + for (i=region->r1;i<=region->r2;i++) + printf("rowcount[%d]=%d\n",i,rowcount[i]); + */ + breakinfo->rhmean_pixels = 0; // Mean text row height + ntr = 0; // Number of text rows + /* Fill rowthresh[] array */ + for (dtrc = 0, i = region->r1; i <= region->r2; i++) { + int ii, i1, i2, sum, pt; + + if (apsize_in < 0.) { + aperture = (int) (dtrc / 13.7 + .5); + if (aperture > aperturemax) + aperture = aperturemax; + if (aperture < 2) + aperture = 2; + } + i1 = i - aperture / 2; + i2 = i1 + aperture - 1; + if (i1 < region->r1) + i1 = region->r1; + if (i2 > region->r2) + i2 = region->r2; + pt = (int) ((i2 - i1 + 1) * gtr_in * src_dpi + .5); /* pixel count threshold */ + if (pt < 1) + pt = 1; + /* Sum over row aperture */ + for (sum = 0, ii = i1; ii <= i2; sum += rowcount[ii], ii++) + ; + /* Does row have few enough black pixels to be considered blank? */ + if ((rowthresh[i - region->r1] = 10 * sum / pt) <= 40) { + if (dtrc > 0) { + breakinfo->rhmean_pixels += dtrc; + ntr++; + } + dtrc = 0; + } else + dtrc++; + } + if (dtrc > 0) { + breakinfo->rhmean_pixels += dtrc; + ntr++; + } + if (ntr > 0) + breakinfo->rhmean_pixels /= ntr; + /* + printf("rhmean=%d (ntr=%d)\n",breakinfo->rhmean_pixels,ntr); + { + FILE *f; + static int count=0; + f=fopen("rthresh.ep",count==0?"w":"a"); + count++; + for (i=region->r1;i<=region->r2;i++) + nprintf(f,"%d\n",rowthresh[i-region->r1]); + nprintf(f,"//nc\n"); + fclose(f); + } + */ + /* Minimum text row height required (pixels) */ + rhmin_pix = breakinfo->rhmean_pixels / 3; + if (rhmin_pix < .04 * src_dpi) + rhmin_pix = .04 * src_dpi; + if (rhmin_pix > .13 * src_dpi) + rhmin_pix = .13 * src_dpi; + if (rhmin_pix < 1) + rhmin_pix = 1; + /* + for (rmax=region->r2;rmax>region->r1;rmax--) + if (rowthresh[rmax-region->r1]>10) + break; + */ + /* Look for "row" gaps in the region so that it can be broken into */ + /* multiple "rows". */ + breakinfo->n = 0; + for (labelrow = figrow = -1, dtrc = trc = brc = 0, i = region->r1; + i <= region->r2; i++) { + /* Does row have few enough black pixels to be considered blank? */ + if (rowthresh[i - region->r1] <= 10) { + trc = 0; + brc++; + /* + ** Max allowed white space between rows = max_vertical_gap_inches + */ + if (dtrc == 0) { + if (brc > brcmin) + newregion->r1++; + continue; + } + /* + ** Big enough blank gap, so add one row / line + */ + if (dtrc + brc >= rhmin_pix) { + int i0, iopt; + double region_height_inches; + double gap_inches; + + if (dtrc < src_dpi * 0.02) + dtrc = src_dpi * 0.02; + if (dtrc < 2) + dtrc = 2; + /* Look for more optimum point */ + for (i0 = iopt = i; i <= region->r2 && i - i0 < dtrc; i++) { + if (rowthresh[i - region->r1] + < rowthresh[iopt - region->r1]) { + iopt = i; + if (rowthresh[i - region->r1] == 0) + break; + } + if (rowthresh[i - region->r1] > 100) + break; + } + /* If at end of region and haven't found perfect break, stay at end */ + if (i > region->r2 && rowthresh[iopt - region->r1] > 0) + i = region->r2; + else + i = iopt; + newregion->r2 = i - 1; + region_height_inches = (double) (newregion->r2 - newregion->r1 + + 1) / src_dpi; + + /* Could this region be a figure? */ + if (figrow < 0 && region_height_inches >= min_fig_height) { + /* If so, set figrow and don't process it yet. */ + figrow = newregion->r1; + labelrow = -1; + newregion->r1 = i; + dtrc = trc = 0; + brc = 1; + continue; + } + /* Are we processing a figure? */ + if (figrow >= 0) { + /* Compute most recent gap */ + if (labelrow >= 0) + gap_inches = (double) (labelrow - newregion->r1) + / src_dpi; + else + gap_inches = -1.; + /* If gap and region height are small enough, tack them on to the figure. */ + if (region_height_inches < max_label_height + && gap_inches > 0. && gap_inches < max_fig_gap) + newregion->r1 = figrow; + else { + /* Not small enough--dump the previous figure. */ + newregion->r2 = newregion->r1 - 1; + newregion->r1 = figrow; + newregion->c1 = region->c1; + newregion->c2 = region->c2; + bmpregion_trim_margins(newregion, colcount, rowcount, + 0x1f); + if (newregion->r2 > newregion->r1) + textrow_assign_bmpregion( + &breakinfo->textrow[breakinfo->n++], + newregion); + if (gap_inches > 0. && gap_inches < max_fig_gap) { + /* This new region might be a figure--set it as the new figure */ + /* and don't dump it yet. */ + figrow = newregion->r2 + 1; + labelrow = -1; + newregion->r1 = i; + dtrc = trc = 0; + brc = 1; + continue; + } else { + newregion->r1 = newregion->r2 + 1; + newregion->r2 = i - 1; + } + } + /* Cancel figure processing */ + figrow = -1; + labelrow = -1; + } + /* + if (newregion->r2 >= rmax) + i=newregion->r2=region->r2; + */ + newregion->c1 = region->c1; + newregion->c2 = region->c2; + bmpregion_trim_margins(newregion, colcount, rowcount, 0x1f); + if (newregion->r2 > newregion->r1) + textrow_assign_bmpregion( + &breakinfo->textrow[breakinfo->n++], newregion); + newregion->r1 = i; + dtrc = trc = 0; + brc = 1; + } + } else { + if (figrow >= 0 && labelrow < 0) + labelrow = i; + dtrc++; + trc++; + brc = 0; + } + } + newregion->r2 = region->r2; + if (dtrc > 0 && newregion->r2 - newregion->r1 + 1 > 0) { + /* If we were processing a figure, include it. */ + if (figrow >= 0) + newregion->r1 = figrow; + newregion->c1 = region->c1; + newregion->c2 = region->c2; + bmpregion_trim_margins(newregion, colcount, rowcount, 0x1f); + if (newregion->r2 > newregion->r1) + textrow_assign_bmpregion(&breakinfo->textrow[breakinfo->n++], + newregion); + } + /* Compute gaps between rows and row heights */ + breakinfo_compute_row_gaps(breakinfo, region->r2); + willus_dmem_free(15, (double **) &rowthresh, funcname); +} + +static void textrow_assign_bmpregion(TEXTROW *textrow, BMPREGION *region) + +{ + textrow->r1 = region->r1; + textrow->r2 = region->r2; + textrow->c1 = region->c1; + textrow->c2 = region->c2; + textrow->rowbase = region->rowbase; + textrow->lcheight = region->lcheight; + textrow->capheight = region->capheight; + textrow->h5050 = region->h5050; +} + +static void breakinfo_compute_row_gaps(BREAKINFO *breakinfo, int r2) + +{ + int i, n; + + n = breakinfo->n; + if (n <= 0) + return; + breakinfo->textrow[0].rowheight = breakinfo->textrow[0].r2 + - breakinfo->textrow[0].r1; + for (i = 0; i < n - 1; i++) + breakinfo->textrow[i].gap = breakinfo->textrow[i + 1].r1 + - breakinfo->textrow[i].rowbase - 1; + /* + breakinfo->textrow[i].rowheight = breakinfo->textrow[i+1].r1 - breakinfo->textrow[i].r1; + */ + for (i = 1; i < n; i++) + breakinfo->textrow[i].rowheight = breakinfo->textrow[i].rowbase + - breakinfo->textrow[i - 1].rowbase; + breakinfo->textrow[n - 1].gap = r2 - breakinfo->textrow[n - 1].rowbase; +} + +static void breakinfo_compute_col_gaps(BREAKINFO *breakinfo, int c2) + +{ + int i, n; + + n = breakinfo->n; + if (n <= 0) + return; + for (i = 0; i < n - 1; i++) { + breakinfo->textrow[i].gap = breakinfo->textrow[i + 1].c1 + - breakinfo->textrow[i].c2 - 1; + breakinfo->textrow[i].rowheight = breakinfo->textrow[i + 1].c1 + - breakinfo->textrow[i].c1; + } + breakinfo->textrow[n - 1].gap = c2 - breakinfo->textrow[n - 1].c2; + breakinfo->textrow[n - 1].rowheight = breakinfo->textrow[n - 1].c2 + - breakinfo->textrow[n - 1].c1; +} + +static void breakinfo_remove_small_col_gaps(BREAKINFO *breakinfo, int lcheight, + double mingap) + +{ + int i, j; + + if (mingap < word_spacing) + mingap = word_spacing; + for (i = 0; i < breakinfo->n - 1; i++) { + double gap; + + gap = (double) breakinfo->textrow[i].gap / lcheight; + if (gap >= mingap) + continue; + breakinfo->textrow[i].c2 = breakinfo->textrow[i + 1].c2; + breakinfo->textrow[i].gap = breakinfo->textrow[i + 1].gap; + if (breakinfo->textrow[i + 1].r1 < breakinfo->textrow[i].r1) + breakinfo->textrow[i].r1 = breakinfo->textrow[i + 1].r1; + if (breakinfo->textrow[i + 1].r2 > breakinfo->textrow[i].r2) + breakinfo->textrow[i].r2 = breakinfo->textrow[i + 1].r2; + for (j = i + 1; j < breakinfo->n - 1; j++) + breakinfo->textrow[j] = breakinfo->textrow[j + 1]; + breakinfo->n--; + i--; + } +} + +static void breakinfo_remove_small_rows(BREAKINFO *breakinfo, double fracrh, + double fracgap, BMPREGION *region, int *colcount, int *rowcount) + +{ + int i, j, mg, mh, mg0, mg1; + int c1, c2, nc; + int *rh, *gap; + static char *funcname = "breakinfo_remove_small_rows"; + +#if (WILLUSDEBUGX & 2) + printf("@breakinfo_remove_small_rows(fracrh=%g,fracgap=%g)\n",fracrh,fracgap); +#endif + if (breakinfo->n < 2) + return; + c1 = region->c1; + c2 = region->c2; + nc = c2 - c1 + 1; + willus_dmem_alloc_warn(16, (void **) &rh, 2 * sizeof(int) * breakinfo->n, + funcname, 10); + gap = &rh[breakinfo->n]; + for (i = 0; i < breakinfo->n; i++) { + rh[i] = breakinfo->textrow[i].r2 - breakinfo->textrow[i].r1 + 1; + if (i < breakinfo->n - 1) + gap[i] = breakinfo->textrow[i].gap; + } + sorti(rh, breakinfo->n); + sorti(gap, breakinfo->n - 1); + mh = rh[breakinfo->n / 2]; + mh *= fracrh; + if (mh < 1) + mh = 1; + mg0 = gap[(breakinfo->n - 1) / 2]; + mg = mg0 * fracgap; + mg1 = mg0 * 0.7; + if (mg < 1) + mg = 1; +#if (WILLUSDEBUGX & 2) + printf("mh = %d x %g = %d\n",rh[breakinfo->n/2],fracrh,mh); + printf("mg = %d x %g = %d\n",gap[breakinfo->n/2],fracgap,mg); +#endif + for (i = 0; i < breakinfo->n; i++) { + TEXTROW *textrow; + int trh, gs1, gs2, g1, g2, gap_is_big, row_too_small; + double m1, m2, row_width_inches; + + textrow = &breakinfo->textrow[i]; + trh = textrow->r2 - textrow->r1 + 1; + if (i == 0) { + g1 = mg0 + 1; + gs1 = mg + 1; + } else { + g1 = textrow->r1 - breakinfo->textrow[i - 1].r2 - 1; + gs1 = breakinfo->textrow[i - 1].gap; + } + if (i == breakinfo->n - 1) { + g2 = mg0 + 1; + gs2 = mg + 1; + } else { + g2 = breakinfo->textrow[i + 1].r1 - textrow->r2 - 1; + gs2 = breakinfo->textrow[i].gap; + } +#if (WILLUSDEBUGX & 2) + printf(" rowheight[%d] = %d, mh=%d, gs1=%d, gs2=%d\n",i,trh,gs1,gs2); +#endif + gap_is_big = (trh >= mh || (gs1 >= mg && gs2 >= mg)); + /* + ** Is the row width small and centered? If so, it should probably + ** be attached to its nearest neighbor--it's usually a fragment of + ** an equation or a table/figure. + */ + row_width_inches = (double) (textrow->c2 - textrow->c1 + 1) / src_dpi; + m1 = fabs(textrow->c1 - c1) / nc; + m2 = fabs(textrow->c2 - c2) / nc; + row_too_small = m1 > 0.1 && m2 > 0.1 + && row_width_inches < little_piece_threshold_inches + && (g1 <= mg1 || g2 <= mg1); +#if (WILLUSDEBUGX & 2) + printf(" m1=%g, m2=%g, rwi=%g, g1=%d, g2=%d, mg0=%d\n",m1,m2,row_width_inches,g1,g2,mg0); +#endif + if (gap_is_big && !row_too_small) + continue; +#if (WILLUSDEBUGX & 2) + printf(" row[%d] to be combined w/next row.\n",i); +#endif + if (row_too_small) { + if (g1 < g2) + i--; + } else { + if (gs1 < gs2) + i--; + } + /* + printf("Removing row. nrows=%d, rh=%d, gs1=%d, gs2=%d\n",breakinfo->n,trh,gs1,gs2); + printf(" mh = %d, mg = %d\n",rh[breakinfo->n/2],gap[(breakinfo->n-1)/2]); + */ + breakinfo->textrow[i].r2 = breakinfo->textrow[i + 1].r2; + if (breakinfo->textrow[i + 1].c2 > breakinfo->textrow[i].c2) + breakinfo->textrow[i].c2 = breakinfo->textrow[i + 1].c2; + if (breakinfo->textrow[i + 1].c1 < breakinfo->textrow[i].c1) + breakinfo->textrow[i].c1 = breakinfo->textrow[i + 1].c1; + /* Re-compute rowbase, capheight, lcheight */ + { + BMPREGION newregion; + newregion = (*region); + newregion.c1 = breakinfo->textrow[i].c1; + newregion.c2 = breakinfo->textrow[i].c2; + newregion.r1 = breakinfo->textrow[i].r1; + newregion.r2 = breakinfo->textrow[i].r2; + bmpregion_trim_margins(&newregion, colcount, rowcount, 0x1f); + newregion.c1 = breakinfo->textrow[i].c1; + newregion.c2 = breakinfo->textrow[i].c2; + newregion.r1 = breakinfo->textrow[i].r1; + newregion.r2 = breakinfo->textrow[i].r2; + textrow_assign_bmpregion(&breakinfo->textrow[i], &newregion); + } + for (j = i + 1; j < breakinfo->n - 1; j++) + breakinfo->textrow[j] = breakinfo->textrow[j + 1]; + breakinfo->n--; + i--; + } + willus_dmem_free(16, (double **) &rh, funcname); +} + +static void breakinfo_alloc(int index, BREAKINFO *breakinfo, int nrows) + +{ + static char *funcname = "breakinfo_alloc"; + + willus_dmem_alloc_warn(index, (void **) &breakinfo->textrow, + sizeof(TEXTROW) * (nrows / 2 + 2), funcname, 10); +} + +static void breakinfo_free(int index, BREAKINFO *breakinfo) + +{ + static char *funcname = "breakinfo_free"; + + willus_dmem_free(index, (double **) &breakinfo->textrow, funcname); +} + +static void breakinfo_sort_by_gap(BREAKINFO *breakinfo) + +{ + int n, top, n1; + TEXTROW *x, x0; + + x = breakinfo->textrow; + n = breakinfo->n; + if (n < 2) + return; + top = n / 2; + n1 = n - 1; + while (1) { + if (top > 0) { + top--; + x0 = x[top]; + } else { + x0 = x[n1]; + x[n1] = x[0]; + n1--; + if (!n1) { + x[0] = x0; + return; + } + } + { + int parent, child; + + parent = top; + child = top * 2 + 1; + while (child <= n1) { + if (child < n1 && x[child].gap < x[child + 1].gap) + child++; + if (x0.gap < x[child].gap) { + x[parent] = x[child]; + parent = child; + child += (parent + 1); + } else + break; + } + x[parent] = x0; + } + } +} + +static void breakinfo_sort_by_row_position(BREAKINFO *breakinfo) + +{ + int n, top, n1; + TEXTROW *x, x0; + + x = breakinfo->textrow; + n = breakinfo->n; + if (n < 2) + return; + top = n / 2; + n1 = n - 1; + while (1) { + if (top > 0) { + top--; + x0 = x[top]; + } else { + x0 = x[n1]; + x[n1] = x[0]; + n1--; + if (!n1) { + x[0] = x0; + return; + } + } + { + int parent, child; + + parent = top; + child = top * 2 + 1; + while (child <= n1) { + if (child < n1 && x[child].r1 < x[child + 1].r1) + child++; + if (x0.r1 < x[child].r1) { + x[parent] = x[child]; + parent = child; + child += (parent + 1); + } else + break; + } + x[parent] = x0; + } + } +} + +/* + ** Add a vertically-contiguous rectangular region to the destination bitmap. + ** The rectangular region may be broken up horizontally (wrapped). + */ +static void bmpregion_one_row_find_breaks(BMPREGION *region, + BREAKINFO *breakinfo, int *colcount, int *rowcount, int add_to_dbase) + +{ + int nc, i, mingap, col0, dr, thlow, thhigh; + int *bp; + BMPREGION *newregion, _newregion; + static char *funcname = "bmpregion_one_row_find_breaks"; + + if (debug) + printf("@bmpregion_one_row_find_breaks(%d,%d)-(%d,%d)\n", region->c1, + region->r1, region->c2, region->r2); + newregion = &_newregion; + (*newregion) = (*region); + bmpregion_trim_margins(newregion, colcount, rowcount, 0x1f); + region->lcheight = newregion->lcheight; + region->capheight = newregion->capheight; + region->rowbase = newregion->rowbase; + region->h5050 = newregion->h5050; + nc = newregion->c2 - newregion->c1 + 1; + breakinfo->n = 0; + if (nc < 6) + return; + /* + ** Look for "space-sized" gaps, i.e. gaps that would occur between words. + ** Use this as pixel counting aperture. + */ + dr = newregion->lcheight; + mingap = dr * word_spacing * 0.8; + if (mingap < 2) + mingap = 2; + + /* + ** Find places where there are gaps (store in bp array) + ** Could do this more intelligently--maybe calculate a histogram? + */ + willus_dmem_alloc_warn(18, (void **) &bp, sizeof(int) * nc, funcname, 10); + for (i = 0; i < nc; i++) + bp[i] = 0; + if (src_left_to_right) { + for (i = newregion->c1; i <= newregion->c2; i++) { + int i1, i2, pt, sum, ii; + i1 = i - mingap / 2; + i2 = i1 + mingap - 1; + if (i1 < newregion->c1) + i1 = newregion->c1; + if (i2 > newregion->c2) + i2 = newregion->c2; + pt = (int) ((i2 - i1 + 1) * gtw_in * src_dpi + .5); + if (pt < 1) + pt = 1; + for (sum = 0, ii = i1; ii <= i2; ii++, sum += colcount[ii]) + ; + bp[i - newregion->c1] = 10 * sum / pt; + } + } else { + for (i = newregion->c2; i >= newregion->c1; i--) { + int i1, i2, pt, sum, ii; + i1 = i - mingap / 2; + i2 = i1 + mingap - 1; + if (i1 < newregion->c1) + i1 = newregion->c1; + if (i2 > newregion->c2) + i2 = newregion->c2; + pt = (int) ((i2 - i1 + 1) * gtw_in * src_dpi + .5); + if (pt < 1) + pt = 1; + for (sum = 0, ii = i1; ii <= i2; ii++, sum += colcount[ii]) + ; + bp[i - newregion->c1] = 10 * sum / pt; + } + } +#if (WILLUSDEBUGX & 4) + if (region->r1 > 3699 && region->r1<3750) + { + static int a=0; + FILE *f; + f=fopen("outbp.ep",a==0?"w":"a"); + a++; + fprintf(f,"/sa l \"(%d,%d)-(%d,%d) lch=%d\" 2\n",region->c1,region->r1,region->c2,region->r2,region->lcheight); + for (i=0;ic1; col0 <= newregion->c2; col0++) { + int copt, c0; + BMPREGION xregion; + + xregion = (*newregion); + xregion.c1 = col0; + for (; col0 <= newregion->c2; col0++) + if (bp[col0 - newregion->c1] >= thhigh) + break; + if (col0 > newregion->c2) + break; + for (col0++; col0 <= newregion->c2; col0++) + if (bp[col0 - newregion->c1] < thlow) + break; + for (copt = c0 = col0; col0 <= newregion->c2 && col0 - c0 <= dr; + col0++) { + if (bp[col0 - newregion->c1] < bp[copt - newregion->c1]) + copt = col0; + if (bp[col0 - newregion->c1] > thhigh) + break; + } + if (copt > newregion->c2) + copt = newregion->c2; + xregion.c2 = copt; + if (xregion.c2 - xregion.c1 < 2) + continue; + bmpregion_trim_margins(&xregion, colcount, rowcount, 0x1f); + textrow_assign_bmpregion(&breakinfo->textrow[breakinfo->n++], &xregion); + col0 = copt; + if (copt == newregion->c2) + break; + } + breakinfo_compute_col_gaps(breakinfo, newregion->c2); + willus_dmem_free(18, (double **) &bp, funcname); + + /* Remove small gaps */ + { + double median_gap; + word_gaps_add(add_to_dbase ? breakinfo : NULL, region->lcheight, + &median_gap); + breakinfo_remove_small_col_gaps(breakinfo, region->lcheight, + median_gap / 1.9); + } +} + +/* + ** pi = preserve indentation + */ +static void bmpregion_one_row_wrap_and_add(BMPREGION *region, + BREAKINFO *rowbreakinfo, int index, int i1, int i2, + MASTERINFO *masterinfo, int justflags, int *colcount, int *rowcount, + PAGEINFO *pageinfo, int line_spacing, int mean_row_gap, int rowbase, + int marking_flags, int pi) + +{ + int nc, nr, i, i0, gappix; + double aspect_ratio, region_height; + BREAKINFO *colbreaks, _colbreaks; + BMPREGION *newregion, _newregion; + +#if (WILLUSDEBUGX & 4) + printf("@bmpregion_one_row_wrap_and_add, index=%d, i1=%d, i2=%d\n",index,i1,i2); +#endif + newregion = &_newregion; + (*newregion) = (*region); + bmpregion_trim_margins(newregion, colcount, rowcount, 0xf); + nc = newregion->c2 - newregion->c1 + 1; + nr = newregion->r2 - newregion->r1 + 1; + if (nc < 6) + return; + aspect_ratio = (double) nr / nc; + region_height = (double) nr / src_dpi; + if (aspect_ratio > no_wrap_ar_limit + && region_height > no_wrap_height_limit_inches) { + newregion->r1 = region->r1; + newregion->r2 = region->r2; +#ifdef WILLUSDEBUG + printf("wrapflush6\n"); +#endif + wrapbmp_flush(masterinfo, 0, pageinfo, 1); + if (index > i1) + dst_add_gap_src_pixels("Tall region", masterinfo, + rowbreakinfo->textrow[index - 1].gap); + bmpregion_add(newregion, rowbreakinfo, masterinfo, 0, 0xf, 0, -1.0, 0, + 2, colcount, rowcount, pageinfo, 0xf, + rowbreakinfo->textrow[index].r2 + - rowbreakinfo->textrow[index].rowbase); + if (index < i2) + gap_override_internal = rowbreakinfo->textrow[index].gap; + return; + } + colbreaks = &_colbreaks; + colbreaks->textrow = NULL; + breakinfo_alloc(106, colbreaks, newregion->c2 - newregion->c1 + 1); + bmpregion_one_row_find_breaks(newregion, colbreaks, colcount, rowcount, 1); + if (pi && colbreaks->n > 0) { + if (src_left_to_right) + colbreaks->textrow[0].c1 = region->c1; + else + colbreaks->textrow[colbreaks->n - 1].c2 = region->c2; + } + /* + hs=0.; + for (i=0;in;i++) + hs += (colbreaks->textrow[i].r2-colbreaks->textrow[i].r1); + hs /= colbreaks->n; + */ + /* + ** Find appropriate letter height to use for word spacing + */ + { + double median_gap; + word_gaps_add(NULL, newregion->lcheight, &median_gap); + gappix = (int) (median_gap * newregion->lcheight + .5); + } +#if (WILLUSDEBUGX & 4) + printf("Before small gap removal, column breaks:\n"); + breakinfo_echo(colbreaks); +#endif +#if (WILLUSDEBUGX & 4) + printf("After small gap removal, column breaks:\n"); + breakinfo_echo(colbreaks); +#endif + if (show_marked_source) + for (i = 0; i < colbreaks->n; i++) { + BMPREGION xregion; + xregion = (*newregion); + xregion.c1 = colbreaks->textrow[i].c1; + xregion.c2 = colbreaks->textrow[i].c2; + mark_source_page(&xregion, 2, marking_flags); + } +#if (WILLUSDEBUGX & 4) + for (i=0;in;i++) + printf(" colbreak[%d] = %d - %d\n",i,colbreaks->textrow[i].c1,colbreaks->textrow[i].c2); +#endif + /* Maybe skip gaps < 0.5*median_gap or collect gap/rowheight ratios and skip small gaps */ + /* (Could be thrown off by full-justified articles where some lines have big gaps.) */ + /* Need do call a separate function that removes these gaps. */ + for (i0 = 0; i0 < colbreaks->n;) { + int i1, i2, toolong, rw, remaining_width_pixels; + BMPREGION reg; + + toolong = 0; /* Avoid compiler warning */ + for (i = i0; i < colbreaks->n; i++) { + int wordgap; + + wordgap = wrapbmp_ends_in_hyphen() ? 0 : gappix; + i1 = src_left_to_right ? i0 : colbreaks->n - 1 - i; + i2 = src_left_to_right ? i : colbreaks->n - 1 - i0; + rw = (colbreaks->textrow[i2].c2 - colbreaks->textrow[i1].c1 + 1); + remaining_width_pixels = wrapbmp_remaining(); + toolong = (rw + wordgap > remaining_width_pixels); +#if (WILLUSDEBUGX & 4) + printf(" i1=%d, i2=%d, rw=%d, rw+gap=%d, remainder=%d, toolong=%d\n",i1,i2,rw,rw+wordgap,remaining_width_pixels,toolong); +#endif + /* + ** If we're too long with just one word and there is already + ** stuff on the queue, then flush it and re-evaluate. + */ + if (i == i0 && toolong && wrapbmp_width() > 0) { +#ifdef WILLUSDEBUG + printf("wrapflush8\n"); +#endif + wrapbmp_flush(masterinfo, 1, pageinfo, 0); + i--; + continue; + } + /* + ** If we're not too long and we're not done yet, add another word. + */ + if (i < colbreaks->n - 1 && !toolong) + continue; + /* + ** Add the regions from i0 to i (or i0 to i-1) + */ + break; + } + if (i > i0 && toolong) + i--; + i1 = src_left_to_right ? i0 : colbreaks->n - 1 - i; + i2 = src_left_to_right ? i : colbreaks->n - 1 - i0; + reg = (*newregion); + reg.c1 = colbreaks->textrow[i1].c1; + reg.c2 = colbreaks->textrow[i2].c2; +#if (WILLUSDEBUGX & 4) + printf(" Adding i1=%d to i2=%d\n",i1,i2); +#endif + /* Trim the word top/bottom */ + bmpregion_trim_margins(®, colcount, rowcount, 0xc); + reg.c1 = colbreaks->textrow[i1].c1; + reg.c2 = colbreaks->textrow[i2].c2; + reg.lcheight = newregion->lcheight; + reg.capheight = newregion->capheight; + reg.rowbase = newregion->rowbase; + reg.h5050 = newregion->h5050; + if (reg.r1 > reg.rowbase) + reg.r1 = reg.rowbase; + if (reg.r2 < reg.rowbase) + reg.r2 = reg.rowbase; + /* Add it to the existing line queue */ + wrapbmp_add(®, gappix, line_spacing, rowbase, mean_row_gap, + justflags); + if (toolong) { +#ifdef WILLUSDEBUG + printf("wrapflush7\n"); +#endif + wrapbmp_flush(masterinfo, 1, pageinfo, 0); + } + i0 = i + 1; + } + breakinfo_free(106, colbreaks); +} + +static WILLUSBITMAP _wrapbmp, *wrapbmp; +static int wrapbmp_base; +static int wrapbmp_line_spacing; +static int wrapbmp_gap; +static int wrapbmp_bgcolor; +static int wrapbmp_just; +static int wrapbmp_rhmax; +static int wrapbmp_thmax; +static int wrapbmp_maxgap = 2; +static int wrapbmp_height_extended; +static HYPHENINFO wrapbmp_hyphen; + +void wrapbmp_init(void) + +{ + wrapbmp = &_wrapbmp; + bmp_init(wrapbmp); + wrapbmp_set_color(dst_color); + wrapbmp->width = 0; + wrapbmp->height = 0; + wrapbmp_base = 0; + wrapbmp_line_spacing = -1; + wrapbmp_gap = -1; + wrapbmp_bgcolor = -1; + wrapbmp_height_extended = 0; + wrapbmp_just = 0x8f; + wrapbmp_rhmax = -1; + wrapbmp_thmax = -1; + wrapbmp_hyphen.ch = -1; + just_flushed_internal = 0; + beginning_gap_internal = -1; + last_h5050_internal = -1; +} + +static int wrapbmp_ends_in_hyphen(void) + +{ + return (wrapbmp_hyphen.ch >= 0); +} + +static void wrapbmp_set_color(int is_color) + +{ + if (is_color) + wrapbmp->bpp = 24; + else { + int i; + + wrapbmp->bpp = 8; + for (i = 0; i < 256; i++) + wrapbmp->red[i] = wrapbmp->blue[i] = wrapbmp->green[i] = i; + } +} + +static void wrapbmp_free(void) + +{ + bmp_free(wrapbmp); +} + +static void wrapbmp_set_maxgap(int value) + +{ + wrapbmp_maxgap = value; +} + +static int wrapbmp_width(void) + +{ + return (wrapbmp->width); +} + +static int wrapbmp_remaining(void) + +{ + int maxpix, w; + maxpix = max_region_width_inches * src_dpi; + /* Don't include hyphen if wrapbmp ends in a hyphen */ + if (wrapbmp_hyphen.ch < 0) + w = wrapbmp->width; + else if (src_left_to_right) + w = wrapbmp_hyphen.c2 + 1; + else + w = wrapbmp->width - wrapbmp_hyphen.c2; + return (maxpix - w); +} + +/* + ** region = bitmap region to add to line + ** gap = horizontal pixel gap between existing region and region being added + ** line_spacing = desired spacing between lines of text (pixels) + ** rbase = position of baseline in region + ** gio = gap if over--gap above top of text if it goes over line_spacing. + */ +// static int bcount=0; +static void wrapbmp_add(BMPREGION *region, int gap, int line_spacing, int rbase, + int gio, int just_flags) + +{ + WILLUSBITMAP *tmp, _tmp; + int i, rh, th, bw, new_base, h2, bpp, width0; +// static char filename[256]; + +#ifdef WILLUSDEBUG + printf("@wrapbmp_add %d x %d (w=%d).\n",region->c2-region->c1+1,region->r2-region->r1+1,wrapbmp->width); +#endif + bmpregion_hyphen_detect(region); /* Figure out if what we're adding ends in a hyphen */ + if (wrapbmp_ends_in_hyphen()) + gap = 0; + wrapbmp_hyphen_erase(); + just_flushed_internal = 0; // Reset "just flushed" flag + beginning_gap_internal = -1; // Reset top-of-page or top-of-column gap + last_h5050_internal = -1; // Reset last row font size + if (line_spacing > wrapbmp_line_spacing) + wrapbmp_line_spacing = line_spacing; + if (gio > wrapbmp_gap) + wrapbmp_gap = gio; + wrapbmp_bgcolor = region->bgcolor; + wrapbmp_just = just_flags; + /* + printf(" c1=%d, c2=%d, r1=%d, r2=%d\n",region->c1,region->c2,region->r1,region->r2); + printf(" gap=%d, line_spacing=%d, rbase=%d, gio=%d\n",gap,line_spacing,rbase,gio); + */ + bpp = dst_color ? 3 : 1; + rh = rbase - region->r1 + 1; + if (rh > wrapbmp_rhmax) + wrapbmp_rhmax = rh; + th = rh + (region->r2 - rbase); + if (th > wrapbmp_thmax) + wrapbmp_thmax = th; + /* + { + WILLUSBITMAP *bmp,_bmp; + + bmp=&_bmp; + bmp_init(bmp); + bmp->height=region->r2-region->r1+1; + bmp->width=region->c2-region->c1+1; + bmp->bpp=bpp*8; + if (bpp==1) + for (i=0;i<256;i++) + bmp->red[i]=bmp->blue[i]=bmp->green[i]=i; + bmp_alloc(bmp); + bw=bmp_bytewidth(bmp); + memset(bmp_rowptr_from_top(bmp,0),255,bw*bmp->height); + for (i=region->r1;i<=region->r2;i++) + { + unsigned char *d,*s; + d=bmp_rowptr_from_top(bmp,i-region->r1); + s=bmp_rowptr_from_top(dst_color?region->bmp:region->bmp8,i)+bpp*region->c1; + if (i==rbase) + memset(d,0,bw); + else + memcpy(d,s,bw); + } + sprintf(filename,"out%05d.png",bcount++); + bmp_write(bmp,filename,stdout,100); + bmp_free(bmp); + } + */ + if (wrapbmp->width == 0) { + /* Put appropriate gap in */ + if (last_rowbase_internal >= 0 + && rh < wrapbmp_line_spacing - last_rowbase_internal) { + rh = wrapbmp_line_spacing - last_rowbase_internal; + if (rh < 2) + rh = 2; + th = rh + (region->r2 - rbase); + wrapbmp_height_extended = 0; + } else + wrapbmp_height_extended = (last_rowbase_internal >= 0); + wrapbmp_base = rh - 1; + wrapbmp->height = th; +#ifdef WILLUSDEBUG + printf("@wrapbmp_add: bmpheight set to %d (wls=%d, lrbi=%d)\n",wrapbmp->height,wrapbmp_line_spacing,last_rowbase_internal); +#endif + wrapbmp->width = region->c2 - region->c1 + 1; + bmp_alloc(wrapbmp); + bw = bmp_bytewidth(wrapbmp); + memset(bmp_rowptr_from_top(wrapbmp, 0), 255, bw * wrapbmp->height); + for (i = region->r1; i <= region->r2; i++) { + unsigned char *d, *s; + d = bmp_rowptr_from_top(wrapbmp, wrapbmp_base + (i - rbase)); + s = bmp_rowptr_from_top(dst_color ? region->bmp : region->bmp8, i) + + bpp * region->c1; + memcpy(d, s, bw); + } +#ifdef WILLUSDEBUG + if (wrapbmp->height<=wrapbmp_base) + { + printf("1. SCREEECH!\n"); + printf("wrapbmp = %d x %d, base=%d\n",wrapbmp->width,wrapbmp->height,wrapbmp_base); + exit(10); + } +#endif + /* Copy hyphen info from added region */ + wrapbmp_hyphen = region->hyphen; + if (wrapbmp_ends_in_hyphen()) { + wrapbmp_hyphen.r1 += (wrapbmp_base - rbase); + wrapbmp_hyphen.r2 += (wrapbmp_base - rbase); + wrapbmp_hyphen.ch -= region->c1; + wrapbmp_hyphen.c2 -= region->c1; + } + return; + } + width0 = wrapbmp->width; /* Starting wrapbmp width */ + tmp = &_tmp; + bmp_init(tmp); + bmp_copy(tmp, wrapbmp); + tmp->width += gap + region->c2 - region->c1 + 1; + if (rh > wrapbmp_base) { + wrapbmp_height_extended = 1; + new_base = rh - 1; + } else + new_base = wrapbmp_base; + if (region->r2 - rbase > wrapbmp->height - 1 - wrapbmp_base) + h2 = region->r2 - rbase; + else + h2 = wrapbmp->height - 1 - wrapbmp_base; + tmp->height = new_base + h2 + 1; + bmp_alloc(tmp); + bw = bmp_bytewidth(tmp); + memset(bmp_rowptr_from_top(tmp, 0), 255, bw * tmp->height); + bw = bmp_bytewidth(wrapbmp); + /* + printf("3. wbh=%d x %d, tmp=%d x %d x %d, new_base=%d, wbbase=%d\n",wrapbmp->width,wrapbmp->height,tmp->width,tmp->height,tmp->bpp,new_base,wrapbmp_base); + */ + for (i = 0; i < wrapbmp->height; i++) { + unsigned char *d, *s; + d = bmp_rowptr_from_top(tmp, i + new_base - wrapbmp_base) + + (src_left_to_right ? 0 : tmp->width - 1 - wrapbmp->width) + * bpp; + s = bmp_rowptr_from_top(wrapbmp, i); + memcpy(d, s, bw); + } + bw = bpp * (region->c2 - region->c1 + 1); + if (region->r1 + new_base - rbase < 0 + || region->r2 + new_base - rbase > tmp->height - 1) { + aprintf(ANSI_YELLOW "INTERNAL ERROR--TMP NOT DIMENSIONED PROPERLY.\n"); + aprintf("(%d-%d), tmp->height=%d\n" ANSI_NORMAL, + region->r1 + new_base - rbase, region->r2 + new_base - rbase, + tmp->height); + exit(10); + } + for (i = region->r1; i <= region->r2; i++) { + unsigned char *d, *s; + + d = bmp_rowptr_from_top(tmp, i + new_base - rbase) + + (src_left_to_right ? wrapbmp->width + gap : 0) * bpp; + s = bmp_rowptr_from_top(dst_color ? region->bmp : region->bmp8, i) + + bpp * region->c1; + memcpy(d, s, bw); + } + bmp_copy(wrapbmp, tmp); + bmp_free(tmp); + /* Copy region's hyphen info */ + wrapbmp_hyphen = region->hyphen; + if (wrapbmp_ends_in_hyphen()) { + wrapbmp_hyphen.r1 += (new_base - rbase); + wrapbmp_hyphen.r2 += (new_base - rbase); + if (src_left_to_right) { + wrapbmp_hyphen.ch += width0 + gap - region->c1; + wrapbmp_hyphen.c2 += width0 + gap - region->c1; + } else { + wrapbmp_hyphen.ch -= region->c1; + wrapbmp_hyphen.c2 -= region->c1; + } + } + wrapbmp_base = new_base; +#ifdef WILLUSDEBUG + if (wrapbmp->height<=wrapbmp_base) + { + printf("2. SCREEECH!\n"); + printf("wrapbmp = %d x %d, base=%d\n",wrapbmp->width,wrapbmp->height,wrapbmp_base); + exit(10); + } +#endif +} + +static void wrapbmp_flush(MASTERINFO *masterinfo, int allow_full_justification, + PAGEINFO *pageinfo, int use_bgi) + +{ + BMPREGION region; + WILLUSBITMAP *bmp8, _bmp8; + int gap, just, nomss, dh; + int *colcount, *rowcount; + static char *funcname = "wrapbmp_flush"; +// char filename[256]; + + if (wrapbmp->width <= 0) { + if (use_bgi == 1 && beginning_gap_internal > 0) + dst_add_gap_src_pixels("wrapbmp_bgi0", masterinfo, + beginning_gap_internal); + beginning_gap_internal = -1; + last_h5050_internal = -1; + if (use_bgi) + just_flushed_internal = 1; + return; + } +#ifdef WILLUSDEBUG + printf("@wrapbmp_flush()\n"); +#endif + /* + { + char filename[256]; + int i; + static int bcount=0; + for (i=0;iheight;i++) + { + unsigned char *p; + int j; + p=bmp_rowptr_from_top(wrapbmp,i); + for (j=0;jwidth;j++) + if (p[j]>240) + p[j]=192; + } + sprintf(filename,"out%05d.png",bcount++); + bmp_write(wrapbmp,filename,stdout,100); + } + */ + colcount = rowcount = NULL; + willus_dmem_alloc_warn(19, (void **) &colcount, + (wrapbmp->width + 16) * sizeof(int), funcname, 10); + willus_dmem_alloc_warn(20, (void **) &rowcount, + (wrapbmp->height + 16) * sizeof(int), funcname, 10); + region.c1 = 0; + region.c2 = wrapbmp->width - 1; + region.r1 = 0; + region.r2 = wrapbmp->height - 1; + region.rowbase = wrapbmp_base; + region.bmp = wrapbmp; + region.bgcolor = wrapbmp_bgcolor; +#ifdef WILLUSDEBUG + printf("Bitmap is %d x %d (baseline=%d)\n",wrapbmp->width,wrapbmp->height,wrapbmp_base); +#endif + + /* Sanity check on row spacing -- don't let it be too large. */ + nomss = wrapbmp_rhmax * 1.7; /* Nominal single-spaced height for this row */ + if (last_rowbase_internal < 0) + dh = 0; + else { + dh = (int) (wrapbmp_line_spacing - last_rowbase_internal + - 1.2 * fabs(vertical_line_spacing) * nomss + .5); + if (vertical_line_spacing < 0.) { + int dh1; + if (wrapbmp_maxgap > 0) + dh1 = region.rowbase + 1 - wrapbmp_rhmax - wrapbmp_maxgap; + else + dh1 = (int) (wrapbmp_line_spacing - last_rowbase_internal + - 1.2 * nomss + .5); + if (dh1 > dh) + dh = dh1; + } + } + if (dh > 0) { +#ifdef WILLUSDEBUG + aprintf(ANSI_YELLOW "dh > 0 = %d" ANSI_NORMAL "\n",dh); + printf(" wrapbmp_line_spacing=%d\n",wrapbmp_line_spacing); + printf(" nomss = %d\n",nomss); + printf(" vls = %g\n",vertical_line_spacing); + printf(" lrbi=%d\n",last_rowbase_internal); + printf(" wrapbmp_maxgap=%d\n",wrapbmp_maxgap); + printf(" wrapbmp_rhmax=%d\n",wrapbmp_rhmax); +#endif + region.r1 = dh; + /* + if (dh>200) + { + bmp_write(wrapbmp,"out.png",stdout,100); + exit(10); + } + */ + } + if (wrapbmp->bpp == 24) { + bmp8 = &_bmp8; + bmp_init(bmp8); + bmp_convert_to_greyscale_ex(bmp8, wrapbmp); + region.bmp8 = bmp8; + } else + region.bmp8 = wrapbmp; + if (gap_override_internal > 0) { + region.r1 = wrapbmp_base - wrapbmp_rhmax + 1; + if (region.r1 < 0) + region.r1 = 0; + if (region.r1 > wrapbmp_base) + region.r1 = wrapbmp_base; + gap = gap_override_internal; + gap_override_internal = -1; + } else { + if (wrapbmp_height_extended) + gap = wrapbmp_gap; + else + gap = 0; + } +#ifdef WILLUSDEBUG + printf("wf: gap=%d\n",gap); +#endif + if (gap > 0) + dst_add_gap_src_pixels("wrapbmp", masterinfo, gap); + if (!allow_full_justification) + just = (wrapbmp_just & 0xcf) | 0x20; + else + just = wrapbmp_just; + bmpregion_add(®ion, NULL, masterinfo, 0, 0, 0, -1.0, just, 2, colcount, + rowcount, pageinfo, 0xf, wrapbmp->height - 1 - wrapbmp_base); + if (wrapbmp->bpp == 24) + bmp_free(bmp8); + willus_dmem_free(20, (double **) &rowcount, funcname); + willus_dmem_free(19, (double **) &colcount, funcname); + wrapbmp->width = 0; + wrapbmp->height = 0; + wrapbmp_line_spacing = -1; + wrapbmp_gap = -1; + wrapbmp_rhmax = -1; + wrapbmp_thmax = -1; + wrapbmp_hyphen.ch = -1; + if (use_bgi == 1 && beginning_gap_internal > 0) + dst_add_gap_src_pixels("wrapbmp_bgi1", masterinfo, + beginning_gap_internal); + beginning_gap_internal = -1; + last_h5050_internal = -1; + if (use_bgi) + just_flushed_internal = 1; +} + +static void wrapbmp_hyphen_erase(void) + +{ + WILLUSBITMAP *bmp, _bmp; + int bw, bpp, c0, c1, c2, i; + + if (wrapbmp_hyphen.ch < 0) + return; +#if (WILLUSDEBUGX & 16) + printf("@hyphen_erase, bmp=%d x %d x %d\n",wrapbmp->width,wrapbmp->height,wrapbmp->bpp); + printf(" ch=%d, c2=%d, r1=%d, r2=%d\n",wrapbmp_hyphen.ch,wrapbmp_hyphen.c2,wrapbmp_hyphen.r1,wrapbmp_hyphen.r2); +#endif + bmp = &_bmp; + bmp_init(bmp); + bmp->bpp = wrapbmp->bpp; + if (bmp->bpp == 8) + for (i = 0; i < 256; i++) + bmp->red[i] = bmp->blue[i] = bmp->green[i] = i; + bmp->height = wrapbmp->height; + if (src_left_to_right) { + bmp->width = wrapbmp_hyphen.c2 + 1; + c0 = 0; + c1 = wrapbmp_hyphen.ch; + c2 = bmp->width - 1; + } else { + bmp->width = wrapbmp->width - wrapbmp_hyphen.c2; + c0 = wrapbmp_hyphen.c2; + c1 = 0; + c2 = wrapbmp_hyphen.ch - wrapbmp_hyphen.c2; + } + bmp_alloc(bmp); + bpp = bmp->bpp == 24 ? 3 : 1; + bw = bpp * bmp->width; + for (i = 0; i < bmp->height; i++) + memcpy(bmp_rowptr_from_top(bmp, i), + bmp_rowptr_from_top(wrapbmp, i) + bpp * c0, bw); + bw = (c2 - c1 + 1) * bpp; + if (bw > 0) + for (i = wrapbmp_hyphen.r1; i <= wrapbmp_hyphen.r2; i++) + memset(bmp_rowptr_from_top(bmp, i) + bpp * c1, 255, bw); +#if (WILLUSDEBUGX & 16) + { + static int count=1; + char filename[256]; + sprintf(filename,"be%04d.png",count); + bmp_write(wrapbmp,filename,stdout,100); + sprintf(filename,"ae%04d.png",count); + bmp_write(bmp,filename,stdout,100); + count++; + } +#endif + bmp_copy(wrapbmp, bmp); + bmp_free(bmp); +} + +/* + ** src is only allocated if dst_color != 0 + */ +static void white_margins(WILLUSBITMAP *src, WILLUSBITMAP *srcgrey) + +{ + int i, n; + BMPREGION *region, _region; + + region = &_region; + region->bmp = srcgrey; + get_white_margins(region); + n = region->c1; + for (i = 0; i < srcgrey->height; i++) { + unsigned char *p; + if (dst_color) { + p = bmp_rowptr_from_top(src, i); + memset(p, 255, n * 3); + } + p = bmp_rowptr_from_top(srcgrey, i); + memset(p, 255, n); + } + n = srcgrey->width - 1 - region->c2; + for (i = 0; i < srcgrey->height; i++) { + unsigned char *p; + if (dst_color) { + p = bmp_rowptr_from_top(src, i) + 3 * (src->width - n); + memset(p, 255, n * 3); + } + p = bmp_rowptr_from_top(srcgrey, i) + srcgrey->width - n; + memset(p, 255, n); + } + n = region->r1; + for (i = 0; i < n; i++) { + unsigned char *p; + if (dst_color) { + p = bmp_rowptr_from_top(src, i); + memset(p, 255, src->width * 3); + } + p = bmp_rowptr_from_top(srcgrey, i); + memset(p, 255, srcgrey->width); + } + n = srcgrey->height - 1 - region->r2; + for (i = srcgrey->height - n; i < srcgrey->height; i++) { + unsigned char *p; + if (dst_color) { + p = bmp_rowptr_from_top(src, i); + memset(p, 255, src->width * 3); + } + p = bmp_rowptr_from_top(srcgrey, i); + memset(p, 255, srcgrey->width); + } +} + +static void get_white_margins(BMPREGION *region) + +{ + int n; + double defval; + + defval = 0.25; + if (mar_left < 0.) + mar_left = defval; + n = (int) (0.5 + mar_left * src_dpi); + if (n > region->bmp->width) + n = region->bmp->width; + region->c1 = n; + if (mar_right < 0.) + mar_right = defval; + n = (int) (0.5 + mar_right * src_dpi); + if (n > region->bmp->width) + n = region->bmp->width; + region->c2 = region->bmp->width - 1 - n; + if (mar_top < 0.) + mar_top = defval; + n = (int) (0.5 + mar_top * src_dpi); + if (n > region->bmp->height) + n = region->bmp->height; + region->r1 = n; + if (mar_bot < 0.) + mar_bot = defval; + n = (int) (0.5 + mar_bot * src_dpi); + if (n > region->bmp->height) + n = region->bmp->height; + region->r2 = region->bmp->height - 1 - n; +} + +/* + ** bitmap_orientation() + ** + ** 1.0 means neutral + ** + ** >> 1.0 means document is likely portrait (no rotation necessary) + ** (max is 100.) + ** + ** << 1.0 means document is likely landscape (need to rotate it) + ** (min is 0.01) + ** + */ +static double bitmap_orientation(WILLUSBITMAP *bmp) + +{ + int i, ic, wtcalc; + double hsum, vsum, rat; + + wtcalc = -1; + for (vsum = 0., hsum = 0., ic = 0, i = 20; i <= 85; i += 5, ic++) { + double nv, nh; + int wth, wtv; + +#ifdef DEBUG + printf("h %d:\n",i); +#endif + if (ic == 0) + wth = -1; + else + wth = wtcalc; + wth = -1; + nh = bmp_inflections_horizontal(bmp, 8, i, &wth); +#ifdef DEBUG + { + FILE *f; + f=fopen("inf.ep","a"); + fprintf(f,"/ag\n"); + fclose(f); + } + printf("v %d:\n",i); +#endif + if (ic == 0) + wtv = -1; + else + wtv = wtcalc; + wtv = -1; + nv = bmp_inflections_vertical(bmp, 8, i, &wtv); + if (ic == 0) { + if (wtv > wth) + wtcalc = wtv; + else + wtcalc = wth; + continue; + } +// exit(10); + hsum += nh * i * i * i; + vsum += nv * i * i * i; + } + if (vsum == 0. && hsum == 0.) + rat = 1.0; + else if (hsum < vsum && hsum / vsum < .01) + rat = 100.; + else + rat = vsum / hsum; + if (rat < .01) + rat = .01; + // printf(" page %2d: %8.4f\n",pagenum,rat); + // fprintf(out,"\t%8.4f",vsum/hsum); + // fprintf(out,"\n"); + return (rat); +} + +static double bmp_inflections_vertical(WILLUSBITMAP *srcgrey, int ndivisions, + int delta, int *wthresh) + +{ + int y0, y1, ny, i, nw, nisum, ni, wt, wtmax; + double *g; + char *funcname = "bmp_inflections_vertical"; + + nw = srcgrey->width / ndivisions; + y0 = srcgrey->height / 6; + y1 = srcgrey->height - y0; + ny = y1 - y0; + willus_dmem_alloc_warn(21, (void **) &g, ny * sizeof(double), funcname, 10); + wtmax = -1; + for (nisum = 0, i = 0; i < 10; i++) { + int x0, x1, nx, j; + + x0 = (srcgrey->width - nw) * (i + 2) / 13; + x1 = x0 + nw; + if (x1 > srcgrey->width) + x1 = srcgrey->width; + nx = x1 - x0; + for (j = y0; j < y1; j++) { + int k, rsum; + unsigned char *p; + + p = bmp_rowptr_from_top(srcgrey, j) + x0; + for (rsum = k = 0; k < nx; k++, p++) + rsum += p[0]; + g[j - y0] = (double) rsum / nx; + } + wt = (*wthresh); + ni = inflection_count(g, ny, delta, &wt); + if ((*wthresh) < 0 && ni >= 3 && wt > wtmax) + wtmax = wt; + if (ni > nisum) + nisum = ni; + } + willus_dmem_free(21, &g, funcname); + if ((*wthresh) < 0) + (*wthresh) = wtmax; + return (nisum); +} + +static double bmp_inflections_horizontal(WILLUSBITMAP *srcgrey, int ndivisions, + int delta, int *wthresh) + +{ + int x0, x1, nx, bw, i, nh, nisum, ni, wt, wtmax; + double *g; + char *funcname = "bmp_inflections_vertical"; + + nh = srcgrey->height / ndivisions; + x0 = srcgrey->width / 6; + x1 = srcgrey->width - x0; + nx = x1 - x0; + bw = bmp_bytewidth(srcgrey); + willus_dmem_alloc_warn(22, (void **) &g, nx * sizeof(double), funcname, 10); + wtmax = -1; + for (nisum = 0, i = 0; i < 10; i++) { + int y0, y1, ny, j; + + y0 = (srcgrey->height - nh) * (i + 2) / 13; + y1 = y0 + nh; + if (y1 > srcgrey->height) + y1 = srcgrey->height; + ny = y1 - y0; + for (j = x0; j < x1; j++) { + int k, rsum; + unsigned char *p; + + p = bmp_rowptr_from_top(srcgrey, y0) + j; + for (rsum = k = 0; k < ny; k++, p += bw) + rsum += p[0]; + g[j - x0] = (double) rsum / ny; + } + wt = (*wthresh); + ni = inflection_count(g, nx, delta, &wt); + if ((*wthresh) < 0 && ni >= 3 && wt > wtmax) + wtmax = wt; + if (ni > nisum) + nisum = ni; + } + willus_dmem_free(22, &g, funcname); + if ((*wthresh) < 0) + (*wthresh) = wtmax; + return (nisum); +} + +static int inflection_count(double *x, int n, int delta, int *wthresh) + +{ + int i, i0, ni, ww, c, ct, wt, mode; + double meandi, meandisq, f1, f2, stdev; + double *xs; + static int hist[256]; + static char *funcname = "inflection_count"; + + /* Find threshold white value that peaks must exceed */ + if ((*wthresh) < 0) { + for (i = 0; i < 256; i++) + hist[i] = 0; + for (i = 0; i < n; i++) { + i0 = floor(x[i]); + if (i0 > 255) + i0 = 255; + hist[i0]++; + } + ct = n * .15; + for (c = 0, i = 255; i >= 0; i--) { + c += hist[i]; + if (c > ct) + break; + } + wt = i - 10; + if (wt < 192) + wt = 192; +#ifdef DEBUG + printf("wt=%d\n",wt); +#endif + (*wthresh) = wt; + } else + wt = (*wthresh); + ww = n / 150; + if (ww < 1) + ww = 1; + willus_dmem_alloc_warn(23, (void **) &xs, sizeof(double) * n, funcname, 10); + for (i = 0; i < n - ww; i++) { + int j; + for (xs[i] = 0., j = 0; j < ww; j++, xs[i] += x[i + j]) + ; + xs[i] /= ww; + } + meandi = meandisq = 0.; + if (xs[0] <= wt - delta) + mode = 1; + else if (xs[0] >= wt) + mode = -1; + else + mode = 0; + for (i0 = 0, ni = 0, i = 1; i < n - ww; i++) { + if (mode == 1 && xs[i] >= wt) { + if (i0 > 0) { + meandi += i - i0; + meandisq += (i - i0) * (i - i0); + ni++; + } + i0 = i; + mode = -1; + continue; + } + if (xs[i] <= wt - delta) + mode = 1; + } + stdev = 1.0; /* Avoid compiler warning */ + if (ni > 0) { + meandi /= ni; + meandisq /= ni; + stdev = sqrt(fabs(meandi * meandi - meandisq)); + } + f1 = meandi / n; + if (f1 > .15) + f1 = .15; + if (ni > 2) { + if (stdev / meandi < .05) + f2 = 20.; + else + f2 = meandi / stdev; + } else + f2 = 1.; +#ifdef DEBUG + printf(" ni=%3d, f1=%8.4f, f2=%8.4f, f1*f2*ni=%8.4f\n",ni,f1,f2,f1*f2*ni); + { + static int count=0; + FILE *f; + int i; + f=fopen("inf.ep",count==0?"w":"a"); + count++; + fprintf(f,"/sa l \"%d\" 1\n",ni); + for (i=0;in = boxes->na = 0; + boxes->box = NULL; +} + +static void pdfboxes_free(PDFBOXES *boxes) + +{ + static char *funcname = "pdfboxes_free"; + willus_dmem_free(24, (double **) &boxes->box, funcname); +} + +#ifdef COMMENT +static void pdfboxes_add_box(PDFBOXES *boxes,PDFBOX *box) + +{ + static char *funcname="pdfboxes_add_box"; + + if (boxes->n>=boxes->na) + { + int newsize; + + newsize = boxes->na < 1024 ? 2048 : boxes->na*2; + /* Just calls willus_mem_alloc if oldsize==0 */ + willus_mem_realloc_robust_warn((void **)&boxes->box,newsize*sizeof(PDFBOX), + boxes->na*sizeof(PDFBOX),funcname,10); + boxes->na=newsize; + } + boxes->box[boxes->n++]=(*box); +} + +static void pdfboxes_delete(PDFBOXES *boxes,int n) + +{ + if (n>0 && nn) + { + int i; + for (i=0;in-n;i++) + boxes->box[i]=boxes->box[i+n]; + } + boxes->n -= n; + if (boxes->n < 0) + boxes->n = 0; +} +#endif + +/* + ** Track gaps between words so that we can tell when one is out of family. + ** lcheight = height of a lowercase letter. + */ +static void word_gaps_add(BREAKINFO *breakinfo, int lcheight, + double *median_gap) + +{ + static int nn = 0; + static double gap[1024]; + static char *funcname = "word_gaps_add"; + + if (breakinfo != NULL && breakinfo->n > 1) { + int i; + + for (i = 0; i < breakinfo->n - 1; i++) { + double g; + g = (double) breakinfo->textrow[i].gap / lcheight; + if (g >= word_spacing) { + gap[nn & 0x3ff] = g; + nn++; + } + } + } + if (median_gap != NULL) { + if (nn > 0) { + int n; + static double *gap_sorted; + + n = (nn > 1024) ? 1024 : nn; + willus_dmem_alloc_warn(28, (void **) &gap_sorted, + sizeof(double) * n, funcname, 10); + memcpy(gap_sorted, gap, n * sizeof(double)); + sortd(gap_sorted, n); + (*median_gap) = gap_sorted[n / 2]; + willus_dmem_free(28, &gap_sorted, funcname); + } else + (*median_gap) = 0.7; + } +} + +/* + ** bmp must be grayscale! (cbmp = color, can be null) + */ +static void bmp_detect_vertical_lines(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp, + double dpi, double minwidth_in, double maxwidth_in, double minheight_in, + double anglemax_deg, int white_thresh) + +{ + int tc, iangle, irow, icol; + int rowstep, na, angle_sign, ccthresh; + int pixmin, halfwidth, bytewidth; + int bs1, nrsteps, dp; + double anglestep; + WILLUSBITMAP *tmp, _tmp; + unsigned char *p0; + + if (debug) + printf("At bmp_detect_vertical_lines...\n"); + if (!bmp_is_grayscale(bmp)) { + printf( + "Internal error. bmp_detect_vertical_lines passed a non-grayscale bitmap.\n"); + exit(10); + } + tmp = &_tmp; + bmp_init(tmp); + bmp_copy(tmp, bmp); + dp = bmp_rowptr_from_top(tmp, 0) - bmp_rowptr_from_top(bmp, 0); + bytewidth = bmp_bytewidth(bmp); + pixmin = (int) (minwidth_in * dpi + .5); + if (pixmin < 1) + pixmin = 1; + halfwidth = pixmin / 4; + if (halfwidth < 1) + halfwidth = 1; + anglestep = atan2((double) halfwidth / dpi, minheight_in); + na = (int) ((anglemax_deg * PI / 180.) / anglestep + .5); + if (na < 1) + na = 1; + rowstep = (int) (dpi / 40. + .5); + if (rowstep < 2) + rowstep = 2; + nrsteps = bmp->height / rowstep; + bs1 = bytewidth * rowstep; + ccthresh = (int) (minheight_in * dpi / rowstep + .5); + if (ccthresh < 2) + ccthresh = 2; + if (debug && verbose) + printf( + " na = %d, rowstep = %d, ccthresh = %d, white_thresh = %d, nrsteps=%d\n", + na, rowstep, ccthresh, white_thresh, nrsteps); + /* + bmp_write(bmp,"out.png",stdout,97); + wfile_written_info("out.png",stdout); + */ + p0 = bmp_rowptr_from_top(bmp, 0); + for (tc = 0; tc < 100; tc++) { + int ccmax, ic0max, ir0max; + double tanthmax; + + ccmax = -1; + ic0max = ir0max = 0; + tanthmax = 0.; + for (iangle = 0; iangle <= na; iangle++) { + for (angle_sign = 1; angle_sign >= -1; angle_sign -= 2) { + double th, tanth, tanthx; + int ic1, ic2; + + if (iangle == 0 && angle_sign == -1) + continue; + th = (PI / 180.) * iangle * angle_sign * fabs(anglemax_deg) + / na; + tanth = tan(th); + tanthx = tanth * rowstep; + if (angle_sign == 1) { + ic1 = -(int) (bmp->height * tanth + 1.); + ic2 = bmp->width - 1; + } else { + ic1 = (int) (-bmp->height * tanth + 1.); + ic2 = bmp->width - 1 + (int) (-bmp->height * tanth + 1.); + } +// printf("iangle=%2d, angle_sign=%2d, ic1=%4d, ic2=%4d\n",iangle,angle_sign,ic1,ic2); + for (icol = ic1; icol <= ic2; icol++) { + unsigned char *p; + int cc, ic0, ir0; + p = p0; + if (icol < 0 || icol > bmp->width - 1) + for (irow = 0; irow < nrsteps; irow++, p += bs1) { + int ic; + ic = icol + irow * tanthx; + if (ic >= 0 && ic < bmp->width) + break; + } + else + irow = 0; + for (ir0 = ic0 = cc = 0; irow < nrsteps; irow++, p += bs1) { + int ic; + ic = icol + irow * tanthx; + if (ic < 0 || ic >= bmp->width) + break; + if ((p[ic] < white_thresh + || p[ic + bytewidth] < white_thresh) + && (p[ic + dp] < white_thresh + || p[ic + bytewidth + dp] < white_thresh)) { + if (cc == 0) { + ic0 = ic; + ir0 = irow * rowstep; + } + cc++; + if (cc > ccmax) { + ccmax = cc; + tanthmax = tanth; + ic0max = ic0; + ir0max = ir0; + } + } else + cc = 0; + } + } + } + } + if (ccmax < ccthresh) + break; + if (debug) + printf( + " Vert line detected: ccmax=%d (pix=%d), tanthmax=%g, ic0max=%d, ir0max=%d\n", + ccmax, ccmax * rowstep, tanthmax, ic0max, ir0max); + if (!vert_line_erase(bmp, cbmp, tmp, ir0max, ic0max, tanthmax, + minheight_in, minwidth_in, maxwidth_in, white_thresh)) + break; + } + /* + bmp_write(tmp,"outt.png",stdout,95); + wfile_written_info("outt.png",stdout); + bmp_write(bmp,"out2.png",stdout,95); + wfile_written_info("out2.png",stdout); + exit(10); + */ +} + +/* + ** Calculate max vert line length. Line is terminated by nw consecutive white pixels + ** on either side. + */ +static int vert_line_erase(WILLUSBITMAP *bmp, WILLUSBITMAP *cbmp, + WILLUSBITMAP *tmp, int row0, int col0, double tanth, + double minheight_in, double minwidth_in, double maxwidth_in, + int white_thresh) + +{ + int lw, cc, maxdev, nw, dir, i, n; + int *c1, *c2, *w; + static char *funcname = "vert_line_erase"; + + willus_dmem_alloc_warn(26, (void **) &c1, sizeof(int) * 3 * bmp->height, + funcname, 10); + c2 = &c1[bmp->height]; + w = &c2[bmp->height]; + /* + maxdev = (int)((double)bmp->height / minheight_in +.5); + if (maxdev < 3) + maxdev=3; + */ + nw = (int) ((double) src_dpi / 100. + .5); + if (nw < 2) + nw = 2; + maxdev = nw; + for (i = 0; i < bmp->height; i++) + c1[i] = c2[i] = -1; + n = 0; + for (dir = -1; dir <= 1; dir += 2) { + int del, brc; + + brc = 0; + for (del = (dir == -1) ? 0 : 1; 1; del++) { + int r, c; + unsigned char *p; + + r = row0 + dir * del; + if (r < 0 || r > bmp->height - 1) + break; + c = col0 + (r - row0) * tanth; + if (c < 0 || c > bmp->width - 1) + break; + p = bmp_rowptr_from_top(bmp, r); + for (i = c; i <= c + maxdev && i < bmp->width; i++) + if (p[i] < white_thresh) + break; + if (i > c + maxdev || i >= bmp->width) { + for (i = c - 1; i >= c - maxdev && i >= 0; i--) + if (p[i] < white_thresh) + break; + if (i < c - maxdev || i < 0) { + brc++; + if (brc >= nw) + break; + continue; + } + } + brc = 0; + for (c = i, cc = 0; i < bmp->width; i++) + if (p[i] < white_thresh) + cc = 0; + else { + cc++; + if (cc >= nw) + break; + } + c2[r] = i - cc; + if (c2[r] > bmp->width - 1) + c2[r] = bmp->width - 1; + for (cc = 0, i = c; i >= 0; i--) + if (p[i] < white_thresh) + cc = 0; + else { + cc++; + if (cc >= nw) + break; + } + c1[r] = i + cc; + if (c1[r] < 0) + c1[r] = 0; + w[n++] = c2[r] - c1[r] + 1; + c1[r] -= cc; + if (c1[r] < 0) + c1[r] = 0; + c2[r] += cc; + if (c2[r] > bmp->width - 1) + c2[r] = bmp->width - 1; + } + } + if (n > 1) + sorti(w, n); + if (n < 10 || n < minheight_in * src_dpi || w[n / 4] < minwidth_in * src_dpi + || w[3 * n / 4] > maxwidth_in * src_dpi + || (erase_vertical_lines == 1 && w[n - 1] > maxwidth_in * src_dpi)) { + /* Erase area in temp bitmap */ + for (i = 0; i < bmp->height; i++) { + unsigned char *p; + int cmax; + + if (c1[i] < 0 || c2[i] < 0) + continue; + cmax = (c2[i] - c1[i]) + 1; + p = bmp_rowptr_from_top(tmp, i) + c1[i]; + for (; cmax > 0; cmax--, p++) + (*p) = 255; + } + } else { + /* Erase line width in source bitmap */ + lw = w[3 * n / 4] + nw * 2; + if (lw > maxwidth_in * src_dpi / 2) + lw = maxwidth_in * src_dpi / 2; + for (i = 0; i < bmp->height; i++) { + unsigned char *p; + int c0, cmin, cmax, count, white; + + if (c1[i] < 0 || c2[i] < 0) + continue; + c0 = col0 + (i - row0) * tanth; + cmin = c0 - lw - 1; + if (cmin < c1[i]) + cmin = c1[i]; + cmax = c0 + lw + 1; + if (cmax > c2[i]) + cmax = c2[i]; + p = bmp_rowptr_from_top(bmp, i); + c0 = (p[cmin] > p[cmax]) ? cmin : cmax; + white = p[c0]; + if (white <= white_thresh) + white = white_thresh + 1; + if (white > 255) + white = 255; + count = (cmax - cmin) + 1; + p = &p[cmin]; + for (; count > 0; count--, p++) + (*p) = white; + if (cbmp != NULL) { + unsigned char *p0; + p = bmp_rowptr_from_top(cbmp, i); + p0 = p + c0 * 3; + p = p + cmin * 3; + count = (cmax - cmin) + 1; + for (; count > 0; count--, p += 3) { + p[0] = p0[0]; + p[1] = p0[1]; + p[2] = p0[2]; + } + } + } + } + willus_dmem_free(26, (double **) &c1, funcname); + return (1); +} + +/* + ** mem_index... controls which memory allocactions get a protective margin + ** around them. + */ +static int mem_index_min = 999; +static int mem_index_max = 999; +static void willus_dmem_alloc_warn(int index, void **ptr, int size, + char *funcname, int exitcode) + +{ + if (index >= mem_index_min && index <= mem_index_max) { + char *ptr1; + void *x; + willus_mem_alloc_warn((void **) &ptr1, size + 2048, funcname, exitcode); + ptr1 += 1024; + x = (void *) ptr1; + (*ptr) = x; + } else + willus_mem_alloc_warn(ptr, size, funcname, exitcode); +} + +static void willus_dmem_free(int index, double **ptr, char *funcname) + +{ + if ((*ptr) == NULL) + return; + if (index >= mem_index_min && index <= mem_index_max) { + double *x; + char *ptr1; + x = (*ptr); + ptr1 = (char *) x; + ptr1 -= 1024; + x = (double *) ptr1; + willus_mem_free(&x, funcname); + (*ptr) = NULL; + } else + willus_mem_free(ptr, funcname); +} + +/* mem.c */ +/* +** The reason I don't simply use malloc is because I want to allocate +** memory using type long instead of type size_t. On some compilers, +** like gcc, these are the same, so it doesn't matter. On other +** compilers, like Turbo C, these are different. +** +*/ +static int willus_mem_alloc(double **ptr,long size,char *name) + + { +#if (defined(WIN32) && !defined(__DMC__)) + unsigned long memsize; + memsize = (unsigned long)size; +#ifdef USEGLOBAL + (*ptr) = (memsize==size) ? (double *)GlobalAlloc(GPTR,memsize) : NULL; +#else + (*ptr) = (memsize==size) ? (double *)CoTaskMemAlloc(memsize) : NULL; +#endif +#else + size_t memsize; + memsize=(size_t)size; + (*ptr) = (memsize==size) ? (double *)malloc(memsize) : NULL; +#endif +/* +{ +f=fopen("mem.dat","a"); +fprintf(f,"willus_mem_alloc(%d,%s)\n",size,name); +fclose(f); +} +*/ + return((*ptr)!=NULL); + } + +/* +** Prints an integer to 's' with commas separating every three digits. +** E.g. 45,399,350 +** Correctly handles negative values. +*/ +static void comma_print(char *s,long size) + + { + int i,m,neg; + char tbuf[80]; + + if (!size) + { + s[0]='0'; + s[1]='\0'; + return; + } + s[0]='\0'; + neg=0; + if (size<0) + { + size=-size; + neg=1; + } + for (i=0,m=size%1000;size;i++,size=(size-m)/1000,m=size%1000) + { + sprintf(tbuf,m==size ? "%d%s":"%03d%s",m,i>0 ? "," : ""); + strcat(tbuf,s); + strcpy(s,tbuf); + } + if (neg) + { + strcpy(tbuf,"-"); + strcat(tbuf,s); + strcpy(s,tbuf); + } + } + + +static void mem_warn(char *name,int size,int exitcode) + + { + static char buf[128]; + + aprintf("\n" ANSI_RED "\aCannot allocate enough memory for " + "function %s." ANSI_NORMAL "\n",name); + comma_print(buf,size); + aprintf(" " ANSI_RED "(Needed %s bytes.)" ANSI_NORMAL "\n\n",buf); + if (exitcode!=0) + { + aprintf(" " ANSI_RED "Program terminated." ANSI_NORMAL "\n\n"); + exit(exitcode); + } + } + +static int willus_mem_alloc_warn(void **ptr, int size, char *name, int exitcode) + +{ + int status; + + status = willus_mem_alloc((double **) ptr, (long) size, name); + if (!status) + mem_warn(name, size, exitcode); + return (status); +} + +static void willus_mem_free(double **ptr, char *name) + +{ + if ((*ptr) != NULL) { +#if (defined(WIN32) && !defined(__DMC__)) +#ifdef USEGLOBAL + GlobalFree((void *)(*ptr)); +#else + CoTaskMemFree((void *)(*ptr)); +#endif +#else + free((void *) (*ptr)); +#endif + (*ptr) = NULL; + } +} + +static int willus_mem_realloc_robust(double **ptr,long newsize,long oldsize,char *name) + + { +#if (defined(WIN32) && !defined(__DMC__)) + unsigned long memsize; + void *newptr; +#else + size_t memsize; + void *newptr; +#endif + +#if (defined(WIN32) && !defined(__DMC__)) + memsize=(unsigned long)newsize; +#else + memsize=(size_t)newsize; +#endif + if (memsize!=newsize) + return(0); + if ((*ptr)==NULL || oldsize<=0) + return(willus_mem_alloc(ptr,newsize,name)); +#if (defined(WIN32) && !defined(__DMC__)) +#ifdef USEGLOBAL + newptr = (void *)GlobalReAlloc((void *)(*ptr),memsize,GMEM_MOVEABLE); +#else + newptr = (void *)CoTaskMemRealloc((void *)(*ptr),memsize); +#endif +#else + newptr = realloc((void *)(*ptr),memsize); +#endif + if (newptr==NULL && willus_mem_alloc((double **)&newptr,newsize,name)) + { + memcpy(newptr,(*ptr),oldsize); + willus_mem_free(ptr,name); + } + if (newptr==NULL) + return(0); + + (*ptr) = newptr; + return(1); + } + + +static int willus_mem_realloc_robust_warn(void **ptr,int newsize,int oldsize,char *name, + int exitcode) + + { + int status; + + status = willus_mem_realloc_robust((double **)ptr,newsize,oldsize,name); + if (!status) + mem_warn(name,newsize,exitcode); + return(status); + } + +/* math.c */ +static void sortd(double *x, int n) + +{ + int top, n1; + double x0; + + if (n < 2) + return; + top = n / 2; + n1 = n - 1; + while (1) { + if (top > 0) { + top--; + x0 = x[top]; + } else { + x0 = x[n1]; + x[n1] = x[0]; + n1--; + if (!n1) { + x[0] = x0; + return; + } + } + { + int parent, child; + + parent = top; + child = top * 2 + 1; + while (child <= n1) { + if (child < n1 && x[child] < x[child + 1]) + child++; + if (x0 < x[child]) { + x[parent] = x[child]; + parent = child; + child += (parent + 1); + } else + break; + } + x[parent] = x0; + } + } +} + +static void sorti(int *x, int n) + +{ + int top, n1; + int x0; + + if (n < 2) + return; + top = n / 2; + n1 = n - 1; + while (1) { + if (top > 0) { + top--; + x0 = x[top]; + } else { + x0 = x[n1]; + x[n1] = x[0]; + n1--; + if (!n1) { + x[0] = x0; + return; + } + } + { + int parent, child; + + parent = top; + child = top * 2 + 1; + while (child <= n1) { + if (child < n1 && x[child] < x[child + 1]) + child++; + if (x0 < x[child]) { + x[parent] = x[child]; + parent = child; + child += (parent + 1); + } else + break; + } + x[parent] = x0; + } + } +} + +/* bmp.c */ +/* + ** Should call bmp_set_type() right after this to set the bitmap type. + */ + +#define RGBSET24(bmp,ptr,r,g,b) \ + if (bmp->type==WILLUSBITMAP_TYPE_NATIVE) \ + { \ + ptr[0]=r; \ + ptr[1]=g; \ + ptr[2]=b; \ + } \ + else \ + { \ + ptr[2]=r; \ + ptr[1]=g; \ + ptr[0]=b; \ + } + +#define RGBGET(bmp,ptr,r,g,b) \ + if (bmp->bpp==8) \ + { \ + r=bmp->red[ptr[0]]; \ + g=bmp->green[ptr[0]]; \ + b=bmp->blue[ptr[0]]; \ + } \ + else if (bmp->type==WILLUSBITMAP_TYPE_NATIVE) \ + { \ + r=ptr[0]; \ + g=ptr[1]; \ + b=ptr[2]; \ + } \ + else \ + { \ + r=ptr[2]; \ + g=ptr[1]; \ + b=ptr[0]; \ + } + +#define RGBGETINCPTR(bmp,ptr,r,g,b) \ + if (bmp->bpp==8) \ + { \ + r=bmp->red[ptr[0]]; \ + g=bmp->green[ptr[0]]; \ + b=bmp->blue[ptr[0]]; \ + ptr++; \ + } \ + else if (bmp->type==WILLUSBITMAP_TYPE_NATIVE) \ + { \ + r=ptr[0]; \ + g=ptr[1]; \ + b=ptr[2]; \ + ptr+=3; \ + } \ + else \ + { \ + r=ptr[2]; \ + g=ptr[1]; \ + b=ptr[0]; \ + ptr+=3; \ + } + +static void bmp_init(WILLUSBITMAP *bmap) + +{ + bmap->data = NULL; + bmap->size_allocated = 0; + bmap->type = WILLUSBITMAP_TYPE_NATIVE; +} + +static int bmp_bytewidth_win32(WILLUSBITMAP *bmp) + + { + return(((bmp->bpp==24 ? bmp->width*3 : bmp->width)+3)&(~0x3)); + } + +/* + ** The width, height, and bpp parameters of the WILLUSBITMAP structure + ** should be set before calling this function. + */ +static int bmp_alloc(WILLUSBITMAP *bmap) + +{ + int size; + static char *funcname = "bmp_alloc"; + + if (bmap->bpp != 8 && bmap->bpp != 24) { + printf("Internal error: call to bmp_alloc has bpp!=8 and bpp!=24!\n"); + exit(10); + } + /* Choose the max size even if not WIN32 to avoid memory faults */ + /* and to allow the possibility of changing the "type" of the */ + /* bitmap without reallocating memory. */ + size = bmp_bytewidth_win32(bmap) * bmap->height; + if (bmap->data != NULL && bmap->size_allocated >= size) + return (1); + if (bmap->data != NULL) + willus_mem_realloc_robust_warn((void **) &bmap->data, size, + bmap->size_allocated, funcname, 10); + else + willus_mem_alloc_warn((void **) &bmap->data, size, funcname, 10); + bmap->size_allocated = size; + return (1); +} + +static void bmp_free(WILLUSBITMAP *bmap) + + { + if (bmap->data!=NULL) + { + willus_mem_free((double **)&bmap->data,"bmp_free"); + bmap->data=NULL; + bmap->size_allocated=0; + } + } + +/* +** If 8-bit, the bitmap is filled with . +** If 24-bit, it gets , , values. +*/ +static void bmp_fill(WILLUSBITMAP *bmp,int r,int g,int b) + + { + int y,n; + + if (bmp->bpp==8 || (r==g && r==b)) + { + memset(bmp->data,r,bmp->size_allocated); + return; + } + if (bmp->type==WILLUSBITMAP_TYPE_WIN32 && bmp->bpp==24) + { + y=r; + r=b; + b=y; + } + for (y=bmp->height-1;y>=0;y--) + { + unsigned char *p; + + p=bmp_rowptr_from_top(bmp,y); + for (n=bmp->width-1;n>=0;n--) + { + (*p)=r; + p++; + (*p)=g; + p++; + (*p)=b; + p++; + } + } + } + + +static int bmp_copy(WILLUSBITMAP *dest, WILLUSBITMAP *src) + +{ + dest->width = src->width; + dest->height = src->height; + dest->bpp = src->bpp; + dest->type = src->type; + if (!bmp_alloc(dest)) + return (0); + memcpy(dest->data, src->data, src->height * bmp_bytewidth(src)); + memcpy(dest->red, src->red, sizeof(int) * 256); + memcpy(dest->green, src->green, sizeof(int) * 256); + memcpy(dest->blue, src->blue, sizeof(int) * 256); + return (1); +} + +static int bmp_bytewidth(WILLUSBITMAP *bmp) { + return (bmp->bpp == 24 ? bmp->width * 3 : bmp->width); +} + +/* + ** row==0 ==> top row of bitmap + ** row==bmp->height-1 ==> bottom row of bitmap + ** (regardless of bitmap type) + */ +static unsigned char *bmp_rowptr_from_top(WILLUSBITMAP *bmp, int row) + +{ + if (bmp->type == WILLUSBITMAP_TYPE_WIN32) + return (&bmp->data[bmp_bytewidth(bmp) * (bmp->height - 1 - row)]); + else + return (&bmp->data[bmp_bytewidth(bmp) * row]); +} + +/* + ** Allocate more bitmap rows. + ** ratio typically something like 1.5 or 2.0 + */ +static void bmp_more_rows(WILLUSBITMAP *bmp, double ratio, int pixval) + +{ + int new_height, new_bytes, bw; + static char *funcname = "bmp_more_rows"; + + new_height = (int) (bmp->height * ratio + .5); + if (new_height <= bmp->height) + return; + bw = bmp_bytewidth(bmp); + new_bytes = bw * new_height; + if (new_bytes > bmp->size_allocated) { + willus_mem_realloc_robust_warn((void **) &bmp->data, new_bytes, + bmp->size_allocated, funcname, 10); + bmp->size_allocated = new_bytes; + } + /* Fill in */ + memset(bmp_rowptr_from_top(bmp, bmp->height), pixval, + (new_height - bmp->height) * bw); + bmp->height = new_height; +} + +static double resample_single(double *y,double x1,double x2) + + { + int i,i1,i2; + double dx,dx1,dx2,sum; + + i1=floor(x1); + i2=floor(x2); + if (i1==i2) + return(y[i1]); + dx=x2-x1; + if (dx>1.) + dx=1.; + dx1= 1.-(x1-i1); + dx2= x2-i2; + sum=0.; + if (dx1 > 1e-8*dx) + sum += dx1*y[i1]; + if (dx2 > 1e-8*dx) + sum += dx2*y[i2]; + for (i=i1+1;i<=i2-1;sum+=y[i],i++); + return(sum/(x2-x1)); + } + +/* +** Resample src[] into dst[]. +** Examples: resample_1d(dst,src,0.,5.,5) would simply copy the +** first five elements of src[] to dst[]. +** +** resample_1d(dst,src,0.,5.,10) would work as follows: +** dst[0] and dst[1] would get src[0]. +** dst[2] and dst[3] would get src[1]. +** and so on. +** +*/ +static void resample_1d(double *dst,double *src,double x1,double x2, + int n) + + { + int i; + double new,last; + + last=x1; + for (i=0;itype==WILLUSBITMAP_TYPE_WIN32 && color>=0) + color=2-color; + for (row=0;rowbpp==8) + { + switch (color) + { + case -1: + for (col=0,p+=x0;colred[p[0]]; + break; + case 1: + for (col=0,p+=x0;colgreen[p[0]]; + break; + case 2: + for (col=0,p+=x0;colblue[p[0]]; + break; + } + } + else + { + p+=color; + for (col=0,p+=3*x0;colwidth (x-coord), and top to bottom go from + ** 0.0 to src->height (y-coord). + ** The cropped rectangle (x1,y1) to (x2,y2) is placed into + ** the destination bitmap, which need not be allocated yet. + ** + ** The destination bitmap will be 8-bit grayscale if the source bitmap + ** passes the bmp_is_grayscale() function. Otherwise it will be 24-bit. + ** + ** Returns 0 for okay. + ** -1 for not enough memory. + ** -2 for bad cropping area or destination bitmap size + */ +static int bmp_resample(WILLUSBITMAP *dest, WILLUSBITMAP *src, double x1, + double y1, double x2, double y2, int newwidth, int newheight) + +{ + int gray, maxlen, colorplanes; + double t; + double *tempbmp; + double *temprow; + int color, hmax, row, col, dy; + static char *funcname = "bmp_resample"; + + /* Clip and sort x1,y1 and x2,y2 */ + if (x1 > src->width) + x1 = src->width; + else if (x1 < 0.) + x1 = 0.; + if (x2 > src->width) + x2 = src->width; + else if (x2 < 0.) + x2 = 0.; + if (y1 > src->height) + y1 = src->height; + else if (y1 < 0.) + y1 = 0.; + if (y2 > src->height) + y2 = src->height; + else if (y2 < 0.) + y2 = 0.; + if (x2 < x1) { + t = x2; + x2 = x1; + x1 = t; + } + if (y2 < y1) { + t = y2; + y2 = y1; + y1 = t; + } + dy = y2 - y1; + dy += 2; + if (x2 - x1 == 0. || y2 - y1 == 0.) + return (-2); + + /* Allocate temp storage */ + maxlen = x2 - x1 > dy + newheight ? (int) (x2 - x1) : dy + newheight; + maxlen += 16; + hmax = newheight > dy ? newheight : dy; + if (!willus_mem_alloc(&temprow, maxlen * sizeof(double), funcname)) + return (-1); + if (!willus_mem_alloc(&tempbmp, hmax * newwidth * sizeof(double), + funcname)) { + willus_mem_free(&temprow, funcname); + return (-1); + } + if ((gray = bmp_is_grayscale(src)) != 0) { + int i; + dest->bpp = 8; + for (i = 0; i < 256; i++) + dest->red[i] = dest->blue[i] = dest->green[i] = i; + } else + dest->bpp = 24; + dest->width = newwidth; + dest->height = newheight; + dest->type = WILLUSBITMAP_TYPE_NATIVE; + if (!bmp_alloc(dest)) { + willus_mem_free(&tempbmp, funcname); + willus_mem_free(&temprow, funcname); + return (-1); + } + colorplanes = gray ? 1 : 3; + for (color = 0; color < colorplanes; color++) { + bmp_resample_1(tempbmp, src, x1, y1, x2, y2, newwidth, newheight, + temprow, gray ? -1 : color); + for (row = 0; row < newheight; row++) { + unsigned char *p; + double *s; + p = bmp_rowptr_from_top(dest, row) + color; + s = &tempbmp[row * newwidth]; + if (colorplanes == 1) + for (col = 0; col < newwidth; + p[0] = (int) (s[0] + .5), col++, s++, p++) + ; + else + for (col = 0; col < newwidth; + p[0] = (int) (s[0] + .5), col++, s++, p += colorplanes) + ; + } + } + willus_mem_free(&tempbmp, funcname); + willus_mem_free(&temprow, funcname); + return (0); +} + +static int bmp8_greylevel_convert(int r,int g,int b) + + { + return((int)((r*0.3+g*0.59+b*0.11)*1.002)); + } + +/* +** One of dest or src can be NULL, which is the +** same as setting them equal to each other, but +** in this case, the bitmap must be 24-bit! +*/ +static int bmp_is_grayscale(WILLUSBITMAP *bmp) + + { + int i; + if (bmp->bpp!=8) + return(0); + for (i=0;i<256;i++) + if (bmp->red[i]!=i || bmp->green[i]!=i || bmp->blue[i]!=i) + return(0); + return(1); + } + +static void bmp_color_xform8(WILLUSBITMAP *dest,WILLUSBITMAP *src,unsigned char *newval) + + { + int i,ir; + + if (src==NULL) + src=dest; + if (dest==NULL) + dest=src; + if (dest!=src) + { + dest->width = src->width; + dest->height = src->height; + dest->bpp = 8; + for (i=0;i<256;i++) + dest->red[i]=dest->green[i]=dest->blue[i]=i; + bmp_alloc(dest); + } + for (ir=0;irheight;ir++) + { + unsigned char *sp,*dp; + sp=bmp_rowptr_from_top(src,ir); + dp=bmp_rowptr_from_top(dest,ir); + for (i=0;iwidth;i++) + dp[i]=newval[sp[i]]; + } + } + +/* +** One of dest or src can be NULL, which is the +** same as setting them equal to each other, but +** in this case, the bitmap must be 24-bit! +*/ +static void bmp_color_xform(WILLUSBITMAP *dest,WILLUSBITMAP *src,unsigned char *newval) + + { + int ir,ic; + + if (src==NULL) + src=dest; + if (dest==NULL) + dest=src; + if (bmp_is_grayscale(src)) + { + bmp_color_xform8(dest,src,newval); + return; + } + if (dest!=src) + { + dest->width = src->width; + dest->height = src->height; + dest->bpp = 24; + bmp_alloc(dest); + } + for (ir=0;irheight;ir++) + { + unsigned char *sp,*dp; + sp=bmp_rowptr_from_top(src,ir); + dp=bmp_rowptr_from_top(dest,ir); + for (ic=0;icwidth;ic++,dp+=3) + { + int r,g,b; + + RGBGETINCPTR(src,sp,r,g,b); + r=newval[r]; + g=newval[g]; + b=newval[b]; + RGBSET24(dest,dp,r,g,b); + } + } + } + +/* +** One of dest or src can be NULL, which is the +** same as setting them equal to each other, but +** in this case, the bitmap must be 24-bit! +** Note: contrast > 1 will increase the contrast. +** contrast < 1 will decrease the contrast. +** contrast of 0 will make all pixels the same value. +** contrast of 1 will not change the image. +*/ +static void bmp_contrast_adjust(WILLUSBITMAP *dest,WILLUSBITMAP *src,double contrast) + + { + int i; + static unsigned char newval[256]; + + for (i=0;i<256;i++) + { + double x,y; + int sgn,v; + x=(i-127.5)/127.5; + sgn = x<0 ? -1 : 1; + if (contrast<0) + sgn = -sgn; + x=fabs(x); + if (fabs(contrast)>1.5) + y=x<.99999 ? 1-exp(fabs(contrast)*x/(x-1)) : 1.; + else + { + y=fabs(contrast)*x; + if (y>1.) + y=1.; + } + y = 127.5+y*sgn*127.5; + v = (int)(y+.5); + if (v<0) + v=0; + if (v>255) + v=255; + newval[i] = v; + } + bmp_color_xform(dest,src,newval); + } + +/* + ** Convert bitmap to grey-scale in-situ + */ +static void bmp_convert_to_greyscale_ex(WILLUSBITMAP *dst, WILLUSBITMAP *src) + +{ + int oldbpr, newbpr, bpp, dp, rownum, colnum, i; + + oldbpr = bmp_bytewidth(src); + dp = src->bpp == 8 ? 1 : 3; + bpp = src->bpp; + dst->bpp = 8; + for (i = 0; i < 256; i++) + dst->red[i] = dst->green[i] = dst->blue[i] = i; + if (dst != src) { + dst->width = src->width; + dst->height = src->height; + bmp_alloc(dst); + } + newbpr = bmp_bytewidth(dst); + /* Possibly restore src->bpp to 24 so RGBGET works right (src & dst may be the same) */ + src->bpp = bpp; + for (rownum = 0; rownum < src->height; rownum++) { + unsigned char *oldp, *newp; + oldp = &src->data[oldbpr * rownum]; + newp = &dst->data[newbpr * rownum]; + for (colnum = 0; colnum < src->width; colnum++, oldp += dp, newp++) { + int r, g, b; + RGBGET(src, oldp, r, g, b); + (*newp) = bmp8_greylevel_convert(r, g, b); + } + } + dst->bpp = 8; /* Possibly restore dst->bpp to 8 since src & dst may be the same. */ +} + +/* bmpmupdf.c */ +static int bmpmupdf_pixmap_to_bmp(WILLUSBITMAP *bmp, fz_context *ctx, + fz_pixmap *pixmap) + +{ + unsigned char *p; + int ncomp, i, row, col; + + bmp->width = fz_pixmap_width(ctx, pixmap); + bmp->height = fz_pixmap_height(ctx, pixmap); + ncomp = fz_pixmap_components(ctx, pixmap); + /* Has to be 8-bit or RGB */ + if (ncomp != 2 && ncomp != 4) + return (-1); + bmp->bpp = (ncomp == 2) ? 8 : 24; + bmp_alloc(bmp); + if (ncomp == 2) + for (i = 0; i < 256; i++) + bmp->red[i] = bmp->green[i] = bmp->blue[i] = i; + p = fz_pixmap_samples(ctx, pixmap); + if (ncomp == 1) + for (row = 0; row < bmp->height; row++) { + unsigned char *dest; + dest = bmp_rowptr_from_top(bmp, row); + memcpy(dest, p, bmp->width); + p += bmp->width; + } + else if (ncomp == 2) + for (row = 0; row < bmp->height; row++) { + unsigned char *dest; + dest = bmp_rowptr_from_top(bmp, row); + for (col = 0; col < bmp->width; col++, dest++, p += 2) + dest[0] = p[0]; + } + else + for (row = 0; row < bmp->height; row++) { + unsigned char *dest; + dest = bmp_rowptr_from_top(bmp, row); + for (col = 0; col < bmp->width; + col++, dest += ncomp - 1, p += ncomp) + memcpy(dest, p, ncomp - 1); + } + return (0); +} diff --git a/k2pdfopt.h b/k2pdfopt.h new file mode 100644 index 000000000..6067ff165 --- /dev/null +++ b/k2pdfopt.h @@ -0,0 +1,33 @@ +/* + ** k2pdfopt.h K2pdfopt optimizes PDF/DJVU files for mobile e-readers + ** (e.g. the Kindle) and smartphones. It works well on + ** multi-column PDF/DJVU files. K2pdfopt is freeware. + ** + ** Copyright (C) 2012 http://willus.com + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU Affero General Public License as + ** published by the Free Software Foundation, either version 3 of the + ** License, or (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU Affero General Public License for more details. + ** + ** You should have received a copy of the GNU Affero General Public License + ** along with this program. If not, see . + ** + */ + +#ifndef _K2PDFOPT_H +#define _K2PDFOPT_H + +#include + +void k2pdfopt_mupdf_reflow_bmp(fz_context *ctx, fz_pixmap *pix, double rot_deg); +void k2pdfopt_mupdf_rfbmp_size(int *width, int *height); +void k2pdfopt_mupdf_rfbmp_ptr(unsigned char** bmp_ptr_ptr); + +#endif + diff --git a/pdf.c b/pdf.c index fdb64b674..97ddd7c4e 100644 --- a/pdf.c +++ b/pdf.c @@ -20,6 +20,7 @@ #include "blitbuffer.h" #include "drawcontext.h" #include "pdf.h" +#include "k2pdfopt.h" #include #include #include @@ -511,6 +512,96 @@ static int closePage(lua_State *L) { return 0; } +static int reflowPage(lua_State *L) { + fz_context *ctx; + fz_device *dev; + fz_pixmap *pix; + fz_rect bounds,bounds2; + fz_matrix ctm; + fz_bbox bbox; + + PdfPage *page = (PdfPage*) luaL_checkudata(L, 1, "pdfpage"); + DrawContext *dc = (DrawContext*) luaL_checkudata(L, 2, "drawcontext"); + + double dpi = 200; + double dpp; + dpp = dpi / 72.; + pix = NULL; + fz_var(pix); + bounds = fz_bound_page(page->doc->xref, page->page); + ctm = fz_scale(dpp, dpp); + // ctm=fz_concat(ctm,fz_rotate(rotation)); + bounds2 = fz_transform_rect(ctm, bounds); + bbox = fz_round_rect(bounds2); + // ctm=fz_translate(0,-page->mediabox.y1); + // ctm=fz_concat(ctm,fz_scale(dpp,-dpp)); + // ctm=fz_concat(ctm,fz_rotate(page->rotate)); + // ctm=fz_concat(ctm,fz_rotate(0)); + // bbox=fz_round_rect(fz_transform_rect(ctm,page->mediabox)); + // pix=fz_new_pixmap_with_rect(colorspace,bbox); + pix = fz_new_pixmap_with_bbox(page->doc->context, fz_device_gray, bbox); + printf("bbox:%d,%d,%d,%d\n",bbox.x0,bbox.y0,bbox.x1,bbox.y1); + fz_clear_pixmap_with_value(page->doc->context, pix, 0xff); + dev = fz_new_draw_device(page->doc->context, pix); +#ifdef MUPDF_TRACE + fz_device *tdev; + fz_try(page->doc->context) { + tdev = fz_new_trace_device(page->doc->context); + fz_run_page(page->doc->xref, page->page, tdev, ctm, NULL); + } + fz_always(page->doc->context) { + fz_free_device(tdev); + } +#endif + fz_run_page(page->doc->xref, page->page, dev, ctm, NULL); + fz_free_device(dev); + + if(dc->gamma >= 0.0) { + fz_gamma_pixmap(page->doc->context, pix, dc->gamma); + } + int width, height; + k2pdfopt_mupdf_reflow_bmp(page->doc->context, pix, 0); + k2pdfopt_mupdf_rfbmp_size(&width, &height); + + lua_pushnumber(L, (double)width); + lua_pushnumber(L, (double)height); + + fz_drop_pixmap(page->doc->context, pix); + + return 2; +} + +static int drawReflowedPage(lua_State *L) { + static unsigned char *bmptr = NULL; + + PdfPage *page = (PdfPage*) luaL_checkudata(L, 1, "pdfpage"); + DrawContext *dc = (DrawContext*) luaL_checkudata(L, 2, "drawcontext"); + BlitBuffer *bb = (BlitBuffer*) luaL_checkudata(L, 3, "blitbuffer"); + k2pdfopt_mupdf_rfbmp_ptr(&bmptr); + + uint8_t *bbptr = (uint8_t*)bb->data; + uint8_t *pmptr = (uint8_t*)bmptr; + + int x_offset = 0; + int y_offset = 0; + + bbptr += bb->pitch * y_offset; + int x, y; + for(y = y_offset; y < bb->h; y++) { + for(x = x_offset/2; x < (bb->w/2); x++) { + int p = x*2 - x_offset; + bbptr[x] = (((pmptr[p + 1] & 0xF0) >> 4) | (pmptr[p] & 0xF0)) ^ 0xFF; + } + bbptr += bb->pitch; + pmptr += bb->w; + if (bb->w & 1) { + bbptr[x] = 255 - (pmptr[x*2] & 0xF0); + } + } + + return 0; +} + static int drawPage(lua_State *L) { fz_pixmap *pix; fz_device *dev; @@ -657,6 +748,8 @@ static const struct luaL_Reg pdfpage_meth[] = { {"getPageLinks", getPageLinks}, {"close", closePage}, {"__gc", closePage}, + {"reflow", reflowPage}, + {"rfdraw", drawReflowedPage}, {"draw", drawPage}, {NULL, NULL} };