fix: #7 Support ImageMagick on Windows

main
namuan 1 year ago
parent c5d5a634a2
commit 2f0bdc84eb

@ -3,6 +3,7 @@ from __future__ import annotations
import logging import logging
import os import os
import pickle import pickle
import platform
import shutil import shutil
import warnings import warnings
from pathlib import Path from pathlib import Path
@ -99,11 +100,30 @@ class VerifyInputFile(WorkflowBase):
} }
class ImageMagickCommand(WorkflowBase):
"""
Use command based on OS
"""
def execute(self) -> dict:
command: str | None = "convert"
if platform.system() == "Windows":
image_magick_path = os.getenv("IMCONV")
assert (
image_magick_path is not None
), "IMCONV environment variable not set. It should point to location of ImageMagick's magick.exe"
command = f"{image_magick_path} {command}"
return {"convert_command": command}
class ConvertPDFToImages(WorkflowBase): class ConvertPDFToImages(WorkflowBase):
""" """
Convert PDF to images using ImageMagick Convert PDF to images using ImageMagick
""" """
convert_command: str
input_pdf_path: Path input_pdf_path: Path
app_dir: Path app_dir: Path
start_page: int start_page: int
@ -118,7 +138,7 @@ class ConvertPDFToImages(WorkflowBase):
image_path = output_dir / f"output-{i}.png" image_path = output_dir / f"output-{i}.png"
if image_path.exists(): if image_path.exists():
continue continue
convert_command = f"""convert -density 150 -trim -background white -alpha remove -quality 100 -sharpen 0x1.0 {input_file_page} -quality 100 {image_path}""" convert_command = f"""{self.convert_command} -density 150 -trim -background white -alpha remove -quality 100 -sharpen 0x1.0 {input_file_page} -quality 100 {image_path}"""
run_command(convert_command) run_command(convert_command)
return {"pdf_images_path": output_dir} return {"pdf_images_path": output_dir}
@ -294,6 +314,7 @@ ${question}
def training_workflow_steps() -> list: def training_workflow_steps() -> list:
return [ return [
VerifyInputFile, VerifyInputFile,
ImageMagickCommand,
ConvertPDFToImages, ConvertPDFToImages,
ConvertImagesToText, ConvertImagesToText,
CombineAllText, CombineAllText,
@ -304,6 +325,7 @@ def training_workflow_steps() -> list:
def pre_process_workflow_steps() -> list: def pre_process_workflow_steps() -> list:
return [ return [
VerifyInputFile, VerifyInputFile,
ImageMagickCommand,
ConvertPDFToImages, ConvertPDFToImages,
ConvertImagesToText, ConvertImagesToText,
] ]

@ -4,6 +4,7 @@ from doc_search.workflow import (
ConvertImagesToText, ConvertImagesToText,
ConvertPDFToImages, ConvertPDFToImages,
CreateIndex, CreateIndex,
ImageMagickCommand,
LoadIndex, LoadIndex,
VerifyInputFile, VerifyInputFile,
workflow_steps, workflow_steps,
@ -15,6 +16,7 @@ def test_return_expected_workflow() -> None:
assert expected_workflow_steps == [ assert expected_workflow_steps == [
VerifyInputFile, VerifyInputFile,
ImageMagickCommand,
ConvertPDFToImages, ConvertPDFToImages,
ConvertImagesToText, ConvertImagesToText,
CombineAllText, CombineAllText,

Loading…
Cancel
Save