Spaces:

Parthiban97
/

Chat_With_IPYNB_Files

Sleeping

App Files Files Community

Parthiban97 commited on Jul 8, 2024

Commit

c111a70

verified ·

1 Parent(s): 737ea28

Upload 2 files

Browse files

Files changed (2) hide show

app.py +155 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import streamlit as st
+import os
+import tempfile
+import time
+import nbformat
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.chains import create_retrieval_chain
+from langchain_community.vectorstores import FAISS
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from dotenv import load_dotenv
+from langchain_core.documents import Document
+load_dotenv()
+st.set_page_config(page_title="Chat with Notebooks", page_icon=":books:")
+st.title("Chat Gemini Document Q&A with Jupyter Notebooks")
+# Custom prompt template
+custom_context_input = """
+<context>
+{context}
+</context>
+Questions:{input}
+"""
+# Default prompt template
+default_prompt_template = """
+Answer the questions based on the provided context only.
+Please provide the most accurate response based on the question
+<context>
+{context}
+</context>
+Questions:{input}
+"""
+def load_notebook(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        notebook = nbformat.read(f, as_version=4)
+    return notebook
+def extract_text_from_notebook(notebook):
+    text = []
+    for cell in notebook.cells:
+        if cell.cell_type == 'markdown':
+            text.append(cell.source)
+        elif cell.cell_type == 'code':
+            text.append(cell.source)
+            if 'outputs' in cell:
+                for output in cell.outputs:
+                    if output.output_type == 'stream':
+                        text.append(output.text)
+                    elif output.output_type == 'execute_result' and 'data' in output:
+                        text.append(output.data.get('text/plain', ''))
+    return "\n".join(text)
+def vector_embedding(ipynb_files):
+    if "vectors" not in st.session_state:
+        st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+    documents = []
+    for ipynb_file in ipynb_files:
+        # Save the uploaded file to a temporary location
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".ipynb") as tmp_file:
+            tmp_file.write(ipynb_file.getvalue())
+            tmp_file_path = tmp_file.name
+        # Load the .ipynb file from the temporary file path
+        notebook = load_notebook(tmp_file_path)
+        text = extract_text_from_notebook(notebook)
+        # Create a Document object instead of using plain text
+        documents.append(Document(page_content=text))
+        # Remove the temporary file
+        os.remove(tmp_file_path)
+    # Ensure documents are properly segmented or chunked
+    st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
+    try:
+        segmented_documents = st.session_state.text_splitter.split_documents(documents)
+        st.session_state.final_documents = segmented_documents
+        if st.session_state.final_documents:
+            # Embedding using FAISS
+            st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
+            st.success("Document embedding is completed!")
+        else:
+            st.warning("No documents found to embed.")
+    except Exception as e:
+        st.error(f"Error splitting or embedding documents: {str(e)}")
+        st.session_state.final_documents = []  # Handle empty documents or retry
+# Define model options for Gemini
+model_options = [
+  "models/gemini-1.0-pro",
+    "models/gemini-1.0-pro-001",
+    "models/gemini-1.0-pro-latest",
+    "models/gemini-1.0-pro-vision-latest",
+    "models/gemini-1.5-flash-latest",
+    "models/gemini-1.5-pro-latest",
+    "models/gemini-pro",
+    "models/gemini-pro-vision"
+]
+# Sidebar elements
+with st.sidebar:
+    st.header("Configuration")
+    st.markdown("Enter your API key below:")
+    google_api_key = st.text_input("Enter your Google API Key", type="password", help="Get your API key from [Google AI Studio](https://aistudio.google.com/app/apikey)")
+    selected_model = st.selectbox("Select Gemini Model", model_options)
+    os.environ["GOOGLE_API_KEY"] = str(google_api_key)
+    st.markdown("Upload your .ipynb files:")
+    uploaded_files = st.file_uploader("Choose .ipynb files", accept_multiple_files=True, type="ipynb")
+    # Custom prompt text areas
+    st.markdown("Enter a custom prompt template (optional):")
+    custom_prompt_template = st.text_area("Custom Prompt Template", placeholder="Enter your custom prompt here...")
+    if st.button("Start Document Embedding"):
+        if uploaded_files:
+            vector_embedding(uploaded_files)
+            st.success("Vector Store DB is Ready")
+        else:
+            st.warning("Please upload at least one .ipynb file.")
+# Main section for question input and results
+prompt1 = st.text_area("Enter Your Question From Documents")
+if prompt1 and "vectors" in st.session_state:
+    if custom_prompt_template:
+        custom_prompt = custom_prompt_template + custom_context_input
+        prompt = ChatPromptTemplate.from_template(custom_prompt)
+    else:
+        prompt = ChatPromptTemplate.from_template(default_prompt_template)
+    llm = ChatGoogleGenerativeAI(model=selected_model, temperature=0.3)
+    document_chain = create_stuff_documents_chain(llm, prompt)
+    retriever = st.session_state.vectors.as_retriever()
+    retrieval_chain = create_retrieval_chain(retriever, document_chain)
+    start = time.process_time()
+    response = retrieval_chain.invoke({'input': prompt1})
+    st.write("Response time:", time.process_time() - start)
+    st.write(response['answer'])
+    # With a Streamlit expander
+    with st.expander("Document Similarity Search"):
+        # Find the relevant chunks
+        for i, doc in enumerate(response["context"]):
+            st.write(doc.page_content)
+            st.write("--------------------------------")

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit
+langchain
+openai
+nbformat
+faiss-cpu
+langchain-google-genai
+langchain-groq
+langchain_community
+python-dotenv