fix: #7 Support ImageMagick on Windows

main
namuan 1 year ago
parent c5d5a634a2
commit 2f0bdc84eb

@ -3,6 +3,7 @@ from __future__ import annotations
import logging
import os
import pickle
import platform
import shutil
import warnings
from pathlib import Path
@ -99,11 +100,30 @@ class VerifyInputFile(WorkflowBase):
}
class ImageMagickCommand(WorkflowBase):
"""
Use command based on OS
"""
def execute(self) -> dict:
command: str | None = "convert"
if platform.system() == "Windows":
image_magick_path = os.getenv("IMCONV")
assert (
image_magick_path is not None
), "IMCONV environment variable not set. It should point to location of ImageMagick's magick.exe"
command = f"{image_magick_path} {command}"
return {"convert_command": command}
class ConvertPDFToImages(WorkflowBase):
"""
Convert PDF to images using ImageMagick
"""
convert_command: str
input_pdf_path: Path
app_dir: Path
start_page: int
@ -118,7 +138,7 @@ class ConvertPDFToImages(WorkflowBase):
image_path = output_dir / f"output-{i}.png"
if image_path.exists():
continue
convert_command = f"""convert -density 150 -trim -background white -alpha remove -quality 100 -sharpen 0x1.0 {input_file_page} -quality 100 {image_path}"""
convert_command = f"""{self.convert_command} -density 150 -trim -background white -alpha remove -quality 100 -sharpen 0x1.0 {input_file_page} -quality 100 {image_path}"""
run_command(convert_command)
return {"pdf_images_path": output_dir}
@ -294,6 +314,7 @@ ${question}
def training_workflow_steps() -> list:
return [
VerifyInputFile,
ImageMagickCommand,
ConvertPDFToImages,
ConvertImagesToText,
CombineAllText,
@ -304,6 +325,7 @@ def training_workflow_steps() -> list:
def pre_process_workflow_steps() -> list:
return [
VerifyInputFile,
ImageMagickCommand,
ConvertPDFToImages,
ConvertImagesToText,
]

@ -4,6 +4,7 @@ from doc_search.workflow import (
ConvertImagesToText,
ConvertPDFToImages,
CreateIndex,
ImageMagickCommand,
LoadIndex,
VerifyInputFile,
workflow_steps,
@ -15,6 +16,7 @@ def test_return_expected_workflow() -> None:
assert expected_workflow_steps == [
VerifyInputFile,
ImageMagickCommand,
ConvertPDFToImages,
ConvertImagesToText,
CombineAllText,

Loading…
Cancel
Save