fix: #7 Support ImageMagick on Windows

1 year ago · 2f0bdc84eb
parent c5d5a634a2
commit 2f0bdc84eb
2 changed files with 25 additions and 1 deletions
--- a/src/doc_search/workflow/init.py
+++ b/src/doc_search/workflow/init.py
@ -3,6 +3,7 @@ from __future__ import annotations
 import logging
 import os
 import pickle
 import platform
 import shutil
 import warnings
 from pathlib import Path
@ -99,11 +100,30 @@ class VerifyInputFile(WorkflowBase):
        }
 class ImageMagickCommand(WorkflowBase):
    """
    Use command based on OS
    """
    def execute(self) -> dict:
        command: str | None = "convert"
        if platform.system() == "Windows":
            image_magick_path = os.getenv("IMCONV")
            assert (
                image_magick_path is not None
            ), "IMCONV environment variable not set. It should point to location of ImageMagick's magick.exe"
            command = f"{image_magick_path} {command}"
        return {"convert_command": command}
 class ConvertPDFToImages(WorkflowBase):
    """
    Convert PDF to images using ImageMagick
    """
    convert_command: str
    input_pdf_path: Path
    app_dir: Path
    start_page: int
@ -118,7 +138,7 @@ class ConvertPDFToImages(WorkflowBase):
            image_path = output_dir / f"output-{i}.png"
            if image_path.exists():
                continue
-            convert_command = f"""convert -density 150 -trim -background white -alpha remove -quality 100 -sharpen 0x1.0 {input_file_page} -quality 100 {image_path}"""
+            convert_command = f"""{self.convert_command} -density 150 -trim -background white -alpha remove -quality 100 -sharpen 0x1.0 {input_file_page} -quality 100 {image_path}"""
            run_command(convert_command)
        return {"pdf_images_path": output_dir}
@ -294,6 +314,7 @@ ${question}
 def training_workflow_steps() -> list:
    return [
        VerifyInputFile,
        ImageMagickCommand,
        ConvertPDFToImages,
        ConvertImagesToText,
        CombineAllText,
@ -304,6 +325,7 @@ def training_workflow_steps() -> list:
 def pre_process_workflow_steps() -> list:
    return [
        VerifyInputFile,
        ImageMagickCommand,
        ConvertPDFToImages,
        ConvertImagesToText,
    ]
--- a/tests/workflow_steps_test.py
+++ b/tests/workflow_steps_test.py
@ -4,6 +4,7 @@ from doc_search.workflow import (
    ConvertImagesToText,
    ConvertPDFToImages,
    CreateIndex,
    ImageMagickCommand,
    LoadIndex,
    VerifyInputFile,
    workflow_steps,
@ -15,6 +16,7 @@ def test_return_expected_workflow() -> None:
    assert expected_workflow_steps == [
        VerifyInputFile,
        ImageMagickCommand,
        ConvertPDFToImages,
        ConvertImagesToText,
        CombineAllText,