Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from huggingface_hub import InferenceClient | |
| embedding_model = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cpu'} | |
| ) | |
| client = InferenceClient( | |
| model="HuggingFaceH4/zephyr-7b-beta", | |
| token=os.environ.get("HF_TOKEN") | |
| ) | |
| vectorstore = None | |
| def process_pdf(pdf_file): | |
| global vectorstore | |
| if pdf_file is None: | |
| return "Please upload a PDF file." | |
| try: | |
| loader = PyPDFLoader(pdf_file.name) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| chunks = text_splitter.split_documents(documents) | |
| vectorstore = FAISS.from_documents(documents=chunks, embedding=embedding_model) | |
| return f"β Processed {len(documents)} pages into {len(chunks)} chunks." | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| def answer_question(question): | |
| global vectorstore | |
| if vectorstore is None: | |
| return "Upload a PDF first.", "" | |
| if not question.strip(): | |
| return "Enter a question.", "" | |
| try: | |
| docs = vectorstore.similarity_search(question, k=3) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| messages = [ | |
| {"role": "system", "content": "Answer based on the provided context only. If the answer is not in the context, say so."}, | |
| {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"} | |
| ] | |
| response = client.chat_completion(messages, max_tokens=512, temperature=0.7) | |
| response = response.choices[0].message.content | |
| sources = [f"{i}. Page {doc.metadata.get('page', 'N/A')}" for i, doc in enumerate(docs, 1)] | |
| return response, "\n".join(sources) | |
| except Exception as e: | |
| return f"Error: {str(e)}", "" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π RAG Document Q&A") | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| btn1 = gr.Button("Process PDF") | |
| status = gr.Textbox(label="Status") | |
| with gr.Column(): | |
| question = gr.Textbox(label="Question") | |
| btn2 = gr.Button("Ask") | |
| answer = gr.Textbox(label="Answer", lines=5) | |
| sources = gr.Textbox(label="Sources") | |
| btn1.click(process_pdf, pdf, status) | |
| btn2.click(answer_question, question, [answer, sources]) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |