mirror of https://github.com/bhaskatripathi/pdfGPT
Merge d21ce0e91c
into 649afc6a24
commit
2df4f4e7a7
@ -0,0 +1,22 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
||||
{
|
||||
"name": "Python 3",
|
||||
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
||||
"image": "mcr.microsoft.com/devcontainers/python:0-3.11"
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
// "postCreateCommand": "pip3 install --user -r requirements.txt",
|
||||
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
}
|
@ -0,0 +1,2 @@
|
||||
.vscode
|
||||
.devcontainer
|
@ -0,0 +1,21 @@
|
||||
## MIT License
|
||||
|
||||
**Copyright (c) [2023] [multiple]**
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
**THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.**
|
@ -1,94 +1,132 @@
|
||||
import json
|
||||
from tempfile import _TemporaryFileWrapper
|
||||
import json # importing the JSON module for encoding and decoding data
|
||||
import requests # importing the Requests library for making HTTP requests
|
||||
import gradio as gr # importing the Gradio library for building web interfaces
|
||||
|
||||
import gradio as gr
|
||||
import requests
|
||||
# Define a function named ask_api that accepts 5 parameters -
|
||||
# lcserve_host, url, file, question, openAI_key - and returns a string
|
||||
|
||||
|
||||
def ask_api(
|
||||
lcserve_host: str,
|
||||
url: str,
|
||||
file: _TemporaryFileWrapper,
|
||||
file,
|
||||
question: str,
|
||||
openAI_key: str,
|
||||
) -> str:
|
||||
if not lcserve_host.startswith('http'):
|
||||
return '[ERROR]: Invalid API Host'
|
||||
# Check if lcserve_host starts with "http"
|
||||
if not lcserve_host.startswith("http"):
|
||||
# Throw an exception if lcserve_host is invalid
|
||||
raise ValueError("Invalid API Host")
|
||||
|
||||
if url.strip() == '' and file == None:
|
||||
return '[ERROR]: Both URL and PDF is empty. Provide atleast one.'
|
||||
# If neither url nor file is provided, throw an exception
|
||||
if not any([url.strip(), file]):
|
||||
raise ValueError("Either URL or PDF should be provided.")
|
||||
|
||||
if url.strip() != '' and file != None:
|
||||
return '[ERROR]: Both URL and PDF is provided. Please provide only one (eiter URL or PDF).'
|
||||
# If both url and file are provided, throw an exception
|
||||
if all([url.strip(), file]):
|
||||
raise ValueError("Both URL and PDF are provided. Please provide only one.")
|
||||
|
||||
if question.strip() == '':
|
||||
return '[ERROR]: Question field is empty'
|
||||
# If question field is empty, throw an exception
|
||||
if not question.strip():
|
||||
raise ValueError("Question field is empty.")
|
||||
|
||||
# Create a dictionary _data with two keys "question" and "envs"
|
||||
_data = {
|
||||
'question': question,
|
||||
'envs': {
|
||||
'OPENAI_API_KEY': openAI_key,
|
||||
},
|
||||
"question": question,
|
||||
"envs": {"OPENAI_API_KEY": openAI_key},
|
||||
}
|
||||
|
||||
if url.strip() != '':
|
||||
r = requests.post(
|
||||
f'{lcserve_host}/ask_url',
|
||||
json={'url': url, **_data},
|
||||
)
|
||||
# If url is provided, make a POST request to "lcserve_host"/ask_url route with data _data
|
||||
if url.strip():
|
||||
r = requests.post(f"{lcserve_host}/ask_url", json={"url": url, **_data})
|
||||
|
||||
# Otherwise open the file in binary mode and make a POST request to "lcserve_host"/ask_file route with data _data and the file
|
||||
else:
|
||||
with open(file.name, 'rb') as f:
|
||||
with open(file.name, "rb") as f:
|
||||
r = requests.post(
|
||||
f'{lcserve_host}/ask_file',
|
||||
params={'input_data': json.dumps(_data)},
|
||||
files={'file': f},
|
||||
f"{lcserve_host}/ask_file",
|
||||
params={"input_data": json.dumps(_data)},
|
||||
files={"file": f},
|
||||
)
|
||||
|
||||
if r.status_code != 200:
|
||||
raise ValueError(f'[ERROR]: {r.text}')
|
||||
try:
|
||||
# Raise an HTTPError if one occurs while making a request to the server
|
||||
r.raise_for_status()
|
||||
except requests.exceptions.HTTPError as e:
|
||||
raise ValueError( # Throw a ValueError if the request fails
|
||||
f"Request failed with status code {r.status_code}: {e}"
|
||||
) from e
|
||||
|
||||
return r.json()['result']
|
||||
# Return the value of the "result" key in the JSON response
|
||||
return r.json()["result"]
|
||||
|
||||
|
||||
title = 'PDF GPT'
|
||||
# Define variables title and description which describe our Gradio interface
|
||||
title = "PDF GPT"
|
||||
description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
|
||||
|
||||
# Define a Gradio Blocks object named demo
|
||||
with gr.Blocks() as demo:
|
||||
gr.Markdown(f'<center><h1>{title}</h1></center>')
|
||||
# Add a Markdown heading and description to the Gradio interface
|
||||
gr.Markdown(f"<center><h1>{title}</h1></center>")
|
||||
gr.Markdown(description)
|
||||
|
||||
# Create two side-by-side Groups for input fields and outputs
|
||||
with gr.Row():
|
||||
with gr.Group():
|
||||
# Add a Textbox widget to accept the API host URL from the user
|
||||
lcserve_host = gr.Textbox(
|
||||
label='Enter your API Host here',
|
||||
value='http://localhost:8080',
|
||||
placeholder='http://localhost:8080',
|
||||
label="Enter your API Host here",
|
||||
value="http://localhost:8080",
|
||||
placeholder="http://localhost:8080",
|
||||
)
|
||||
|
||||
# Add a link to the OpenAI API key webpage and a Password textbox to get the user's API Key
|
||||
gr.Markdown(
|
||||
f'<p style="text-align:center">Get your Open AI API key <a href="https://platform.openai.com/account/api-keys">here</a></p>'
|
||||
'<p style="text-align:center">Get your Open AI API key <a href="https://platform.openai.com/account/api-keys">here</a></p>'
|
||||
)
|
||||
openAI_key = gr.Textbox(
|
||||
label='Enter your OpenAI API key here', type='password'
|
||||
label="Enter your OpenAI API key here", type="password"
|
||||
)
|
||||
pdf_url = gr.Textbox(label='Enter PDF URL here')
|
||||
|
||||
# Add a Text box that allows users to enter URL of the PDF file they want to chat with
|
||||
pdf_url = gr.Textbox(label="Enter PDF URL here")
|
||||
|
||||
# Add a File Upload widget so that users can upload their PDF/Research Paper/Book
|
||||
gr.Markdown("<center><h4>OR<h4></center>")
|
||||
file = gr.File(
|
||||
label='Upload your PDF/ Research Paper / Book here', file_types=['.pdf']
|
||||
label="Upload your PDF/ Research Paper / Book here", file_types=[".pdf"]
|
||||
)
|
||||
question = gr.Textbox(label='Enter your question here')
|
||||
btn = gr.Button(value='Submit')
|
||||
btn.style(full_width=True)
|
||||
|
||||
with gr.Group():
|
||||
answer = gr.Textbox(label='The answer to your question is :')
|
||||
# Add a field for the user to enter their question
|
||||
question = gr.Textbox(label="Enter your question here")
|
||||
|
||||
btn.click(
|
||||
ask_api,
|
||||
inputs=[lcserve_host, pdf_url, file, question, openAI_key],
|
||||
outputs=[answer],
|
||||
)
|
||||
# Add a submit button for user to trigger their API request
|
||||
btn = gr.Button(value="Submit")
|
||||
btn.style(full_width=True)
|
||||
|
||||
demo.app.server.timeout = 60000 # Set the maximum return time for the results of accessing the upstream server
|
||||
|
||||
demo.launch(server_port=7860, enable_queue=True) # `enable_queue=True` to ensure the validity of multi-user requests
|
||||
# Add another group for the output area where the answer will be shown
|
||||
with gr.Group():
|
||||
answer = gr.Textbox(label="The answer to your question is :")
|
||||
|
||||
# Define function onclick() which will be called when the user clicks the "submit" button
|
||||
def on_click():
|
||||
try:
|
||||
# Call the ask_api function and update the answer in the Gradio UI
|
||||
ans = ask_api(
|
||||
lcserve_host.value,
|
||||
pdf_url.value,
|
||||
file,
|
||||
question.value,
|
||||
openAI_key.value,
|
||||
)
|
||||
answer.update(str(ans))
|
||||
except ValueError as e:
|
||||
# Update the response with an error message if an error occurs during the API call
|
||||
answer.update(f"[ERROR]: {str(e)}")
|
||||
|
||||
btn.click(on_click)
|
||||
|
||||
# Launch the Gradio interface on port number 7860
|
||||
demo.launch(server_port=7860)
|
@ -1,8 +1,15 @@
|
||||
PyMuPDF
|
||||
numpy
|
||||
scikit-learn
|
||||
tensorflow>=2.0.0
|
||||
tensorflow-hub
|
||||
openai==0.10.2
|
||||
gradio
|
||||
# Machine learning libraries
|
||||
numpy>=1.17.0
|
||||
scikit-learn>=0.22.0
|
||||
tensorflow-macos>=2.0.0,<3.0.0
|
||||
tensorflow-hub>=0.9.0,<1.0.0
|
||||
|
||||
# Natural language processing libraries
|
||||
PyMuPDF>=1.18.13
|
||||
openai>=0.10.2,<0.11
|
||||
|
||||
# User interface library
|
||||
gradio>=1.4.0
|
||||
|
||||
# Language detection server
|
||||
langchain-serve>=0.0.19
|
||||
|
Loading…
Reference in New Issue