Spaces:

Ab-Romia
/

Context-Aware-AI

Sleeping

App Files Files Community

Ab-Romia commited on Aug 1, 2025

Commit

2d010fd

verified ·

1 Parent(s): 4f57074

Update app/services.py

Browse files

Files changed (1) hide show

app/services.py +122 -29

app/services.py CHANGED Viewed

@@ -5,6 +5,7 @@ import textwrap
 import time
 import rag_setup
 from schemas import ChatRequest, DocumentRequest, TaskRequest
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s [%(levelname)s] %(message)s',
@@ -20,14 +21,24 @@ CACHE_EXPIRATION_SECONDS = 600  # 10 minutes
 def index_document(request_data: DocumentRequest) -> int:
-    logger.info("Starting document indexing process.")
     try:
         # Step 1: Clear any existing documents properly
         existing_ids = rag_setup.collection.get()["ids"]
         if existing_ids:
             rag_setup.collection.delete(ids=existing_ids)
-        logger.info("Cleared existing documents from vector collection.")
         # Step 2: Chunk document
         text_chunks = textwrap.wrap(
@@ -38,23 +49,37 @@ def index_document(request_data: DocumentRequest) -> int:
         )
         if not text_chunks:
-            logger.warning("No text chunks were generated.")
             return 0
         # Step 3: Add chunks to ChromaDB
         chunk_ids = [f"doc_chunk_{i}_{int(time.time())}" for i in range(len(text_chunks))]
-        logger.info(f"Attempting to add {len(chunk_ids)} chunks to ChromaDB...")
         rag_setup.collection.add(documents=text_chunks, ids=chunk_ids)
         return len(text_chunks)
     except Exception as e:
-        logger.error(f"Error during indexing: {str(e)}", exc_info=True)
         raise
 def clear_index():
     """Clears all documents from the vector database."""
     rag_setup.collection.delete(where={})
-    logger.info("Successfully cleared the vector index.")
 async def get_rag_response(request_data: ChatRequest) -> str:
@@ -62,31 +87,52 @@ async def get_rag_response(request_data: ChatRequest) -> str:
     Performs the RAG pipeline: checks cache, retrieves context, generates a response.
     """
     start_total = time.time()
-    logger.info(f"Processing query: '{request_data.prompt}'")
     try:
         # Step 1: Check cache for a recent, identical query
         cached_response = _get_cached_response(request_data.prompt)
         if cached_response:
-            logger.info("Cache hit! Returning cached response.")
             return f"{cached_response}\n\n(This response was retrieved from cache)"
-        logger.info("Cache miss. Proceeding with RAG pipeline.")
         # Step 2: Check if the vector database has any content
-        if rag_setup.collection.count() == 0:
-            logger.warning("Vector DB is empty. Cannot answer query.")
             return "The knowledge base is empty. Please provide some context in the left panel and click 'Index Context' before asking questions."
         # Step 3: Retrieve relevant chunks from ChromaDB
-        logger.info("Retrieving relevant chunks from vector DB...")
         retrieved_chunks = await _retrieve_chunks_async(request_data.prompt)
         if not retrieved_chunks or not retrieved_chunks.get('documents') or not retrieved_chunks['documents'][0]:
-            logger.warning("No relevant chunks found in the vector DB for this query.")
             return "I could not find any relevant information in the provided context to answer your question."
-        context_for_prompt = "\n\n---\n\n".join(retrieved_chunks['documents'][0])
         # Step 4: Construct the final prompt for the LLM
         full_prompt = (
@@ -98,23 +144,36 @@ async def get_rag_response(request_data: ChatRequest) -> str:
             "--- CONTEXT END ---\n\n"
             f'User\'s Question: "{request_data.prompt}"'
         )
         # Step 5: Generate the response using the LLM
-        logger.info("Generating response from OpenRouter...")
         response_text = await _generate_response_async(full_prompt)
         # Step 6: Cache the newly generated response
         _cache_response(request_data.prompt, response_text)
         total_time = time.time() - start_total
-        logger.info(f"Total processing time: {total_time:.2f}s")
         return response_text
     except asyncio.TimeoutError:
-        logger.error("Request timed out during retrieval or generation.")
         return "The request timed out. Please try again or simplify your question."
     except Exception as e:
-        logger.error(f"An unexpected error occurred: {e}", exc_info=True)
         return f"An unexpected error occurred: {e}"
@@ -123,12 +182,23 @@ async def execute_task(request_data: TaskRequest) -> str:
     Executes a specific task on the given context.
     """
     start_total = time.time()
-    logger.info(f"Executing task '{request_data.task_type}' with prompt: '{request_data.prompt}'")
     try:
         # For tasks, we use the full context, not just retrieved chunks
         context = request_data.context
         if not context:
             return "Context is empty. Please provide some text in the 'Knowledge Base' to perform a task."
         # Construct the prompt based on the task type
@@ -139,57 +209,80 @@ async def execute_task(request_data: TaskRequest) -> str:
         elif request_data.task_type == "creative":
             full_prompt = f"Use the following text as inspiration to write a creative piece (e.g., a poem, a short story, a metaphor). The user's prompt can guide the style or topic.\n\n--- INSPIRATION ---\n{context}\n\n--- PROMPT ---\n{request_data.prompt or 'Write a short poem'}"
         else:
             return "Invalid task type specified."
         # Generate the response
-        logger.info("Generating task-based response from OpenRouter...")
         response_text = await _generate_response_async(full_prompt)
         total_time = time.time() - start_total
-        logger.info(f"Task execution time: {total_time:.2f}s")
         return response_text
     except asyncio.TimeoutError:
-        logger.error("Request timed out during task execution.")
         return "The request timed out. Please try again."
     except Exception as e:
-        logger.error(f"An unexpected error occurred during task execution: {e}", exc_info=True)
         return f"An unexpected error occurred: {e}"
 # --- ASYNC WRAPPERS & CACHE HELPERS ---
 async def _retrieve_chunks_async(prompt: str):
     """Asynchronously queries the ChromaDB collection."""
     loop = asyncio.get_event_loop()
-    return await loop.run_in_executor(
         None,
         functools.partial(rag_setup.collection.query, query_texts=[prompt], n_results=3)
     )
 async def _generate_response_async(full_prompt: str):
     """Asynchronously calls the LLM to generate content."""
     loop = asyncio.get_event_loop()
-    return await loop.run_in_executor(
         None,
         rag_setup.generation_model.generate_content,
         full_prompt
     )
 def _get_cached_response(key: str):
     """Checks the cache for a valid (non-expired) entry."""
     if key in _response_cache:
         timestamp, response = _response_cache[key]
         if time.time() - timestamp < CACHE_EXPIRATION_SECONDS:
             return response
         else:
             # Expired, remove from cache
             del _response_cache[key]
     return None
 def _cache_response(key: str, response: str):
     """Adds a response to the cache with the current timestamp."""
-    _response_cache[key] = (time.time(), response)

 import time
 import rag_setup
 from schemas import ChatRequest, DocumentRequest, TaskRequest
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s [%(levelname)s] %(message)s',
 def index_document(request_data: DocumentRequest) -> int:
+    logger.info("=" * 80)
+    logger.info("📚 STARTING DOCUMENT INDEXING PROCESS")
+    logger.info("=" * 80)
+    # Log the incoming context
+    context_preview = request_data.context[:200] + "..." if len(request_data.context) > 200 else request_data.context
+    logger.info(f"📝 CONTEXT TO INDEX (length: {len(request_data.context)} chars):")
+    logger.info(f"   Preview: {context_preview}")
+    logger.info("-" * 60)
     try:
         # Step 1: Clear any existing documents properly
         existing_ids = rag_setup.collection.get()["ids"]
         if existing_ids:
             rag_setup.collection.delete(ids=existing_ids)
+            logger.info(f"🗑️  Cleared {len(existing_ids)} existing documents from vector collection.")
+        else:
+            logger.info("📂 No existing documents to clear.")
         # Step 2: Chunk document
         text_chunks = textwrap.wrap(
         )
         if not text_chunks:
+            logger.warning("⚠️  No text chunks were generated.")
             return 0
+        logger.info(f"✂️  Document split into {len(text_chunks)} chunks")
+        # Log each chunk for debugging
+        for i, chunk in enumerate(text_chunks):
+            chunk_preview = chunk[:100] + "..." if len(chunk) > 100 else chunk
+            logger.info(f"   Chunk {i+1}: {chunk_preview} (length: {len(chunk)} chars)")
         # Step 3: Add chunks to ChromaDB
         chunk_ids = [f"doc_chunk_{i}_{int(time.time())}" for i in range(len(text_chunks))]
+        logger.info(f"💾 Adding {len(chunk_ids)} chunks to ChromaDB...")
         rag_setup.collection.add(documents=text_chunks, ids=chunk_ids)
+        logger.info("✅ DOCUMENT INDEXING COMPLETED SUCCESSFULLY")
+        logger.info(f"📊 Total chunks indexed: {len(text_chunks)}")
+        logger.info("=" * 80)
         return len(text_chunks)
     except Exception as e:
+        logger.error(f"❌ Error during indexing: {str(e)}", exc_info=True)
         raise
 def clear_index():
     """Clears all documents from the vector database."""
+    logger.info("🗑️  Clearing vector index...")
     rag_setup.collection.delete(where={})
+    logger.info("✅ Successfully cleared the vector index.")
 async def get_rag_response(request_data: ChatRequest) -> str:
     Performs the RAG pipeline: checks cache, retrieves context, generates a response.
     """
     start_total = time.time()
+    logger.info("=" * 80)
+    logger.info("🤖 STARTING RAG PIPELINE")
+    logger.info("=" * 80)
+    logger.info(f"❓ USER PROMPT: '{request_data.prompt}'")
+    logger.info(f"📏 Prompt length: {len(request_data.prompt)} characters")
+    logger.info("-" * 60)
     try:
         # Step 1: Check cache for a recent, identical query
         cached_response = _get_cached_response(request_data.prompt)
         if cached_response:
+            logger.info("💾 CACHE HIT! Returning cached response.")
+            logger.info(f"📤 CACHED RESPONSE: {cached_response[:200]}...")
             return f"{cached_response}\n\n(This response was retrieved from cache)"
+        logger.info("🔍 Cache miss. Proceeding with RAG pipeline.")
         # Step 2: Check if the vector database has any content
+        doc_count = rag_setup.collection.count()
+        logger.info(f"📚 Vector DB contains {doc_count} documents")
+        if doc_count == 0:
+            logger.warning("⚠️  Vector DB is empty. Cannot answer query.")
             return "The knowledge base is empty. Please provide some context in the left panel and click 'Index Context' before asking questions."
         # Step 3: Retrieve relevant chunks from ChromaDB
+        logger.info("🔎 Retrieving relevant chunks from vector DB...")
         retrieved_chunks = await _retrieve_chunks_async(request_data.prompt)
         if not retrieved_chunks or not retrieved_chunks.get('documents') or not retrieved_chunks['documents'][0]:
+            logger.warning("❌ No relevant chunks found in the vector DB for this query.")
             return "I could not find any relevant information in the provided context to answer your question."
+        # Log retrieved chunks
+        chunks = retrieved_chunks['documents'][0]
+        logger.info(f"📋 Retrieved {len(chunks)} relevant chunks:")
+        for i, chunk in enumerate(chunks):
+            chunk_preview = chunk[:150] + "..." if len(chunk) > 150 else chunk
+            logger.info(f"   Chunk {i+1}: {chunk_preview}")
+        context_for_prompt = "\n\n---\n\n".join(chunks)
+        logger.info(f"📝 CONTEXT FOR LLM (total length: {len(context_for_prompt)} chars):")
+        context_preview = context_for_prompt[:300] + "..." if len(context_for_prompt) > 300 else context_for_prompt
+        logger.info(f"   Context preview: {context_preview}")
+        logger.info("-" * 60)
         # Step 4: Construct the final prompt for the LLM
         full_prompt = (
             "--- CONTEXT END ---\n\n"
             f'User\'s Question: "{request_data.prompt}"'
         )
+        logger.info(f"🔧 FULL PROMPT TO LLM (length: {len(full_prompt)} chars):")
+        prompt_preview = full_prompt[:400] + "..." if len(full_prompt) > 400 else full_prompt
+        logger.info(f"   Prompt preview: {prompt_preview}")
+        logger.info("-" * 60)
         # Step 5: Generate the response using the LLM
+        logger.info("🧠 Generating response from OpenRouter...")
         response_text = await _generate_response_async(full_prompt)
+        logger.info(f"📤 LLM RESPONSE (length: {len(response_text)} chars):")
+        response_preview = response_text[:300] + "..." if len(response_text) > 300 else response_text
+        logger.info(f"   Response preview: {response_preview}")
         # Step 6: Cache the newly generated response
         _cache_response(request_data.prompt, response_text)
+        logger.info("💾 Response cached for future use")
         total_time = time.time() - start_total
+        logger.info(f"⏱️  Total processing time: {total_time:.2f}s")
+        logger.info("✅ RAG PIPELINE COMPLETED SUCCESSFULLY")
+        logger.info("=" * 80)
         return response_text
     except asyncio.TimeoutError:
+        logger.error("⏱️  Request timed out during retrieval or generation.")
         return "The request timed out. Please try again or simplify your question."
     except Exception as e:
+        logger.error(f"❌ An unexpected error occurred: {e}", exc_info=True)
         return f"An unexpected error occurred: {e}"
     Executes a specific task on the given context.
     """
     start_total = time.time()
+    logger.info("=" * 80)
+    logger.info("🎯 STARTING TASK EXECUTION")
+    logger.info("=" * 80)
+    logger.info(f"📋 TASK TYPE: {request_data.task_type}")
+    logger.info(f"❓ TASK PROMPT: '{request_data.prompt}'")
+    logger.info(f"📏 Context length: {len(request_data.context)} characters")
+    context_preview = request_data.context[:200] + "..." if len(request_data.context) > 200 else request_data.context
+    logger.info(f"📝 CONTEXT PREVIEW: {context_preview}")
+    logger.info("-" * 60)
     try:
         # For tasks, we use the full context, not just retrieved chunks
         context = request_data.context
         if not context:
+            logger.warning("⚠️  Context is empty for task execution")
             return "Context is empty. Please provide some text in the 'Knowledge Base' to perform a task."
         # Construct the prompt based on the task type
         elif request_data.task_type == "creative":
             full_prompt = f"Use the following text as inspiration to write a creative piece (e.g., a poem, a short story, a metaphor). The user's prompt can guide the style or topic.\n\n--- INSPIRATION ---\n{context}\n\n--- PROMPT ---\n{request_data.prompt or 'Write a short poem'}"
         else:
+            logger.error(f"❌ Invalid task type: {request_data.task_type}")
             return "Invalid task type specified."
+        logger.info(f"🔧 FULL TASK PROMPT (length: {len(full_prompt)} chars):")
+        prompt_preview = full_prompt[:400] + "..." if len(full_prompt) > 400 else full_prompt
+        logger.info(f"   Prompt preview: {prompt_preview}")
+        logger.info("-" * 60)
         # Generate the response
+        logger.info("🧠 Generating task-based response from OpenRouter...")
         response_text = await _generate_response_async(full_prompt)
+        logger.info(f"📤 TASK RESPONSE (length: {len(response_text)} chars):")
+        response_preview = response_text[:300] + "..." if len(response_text) > 300 else response_text
+        logger.info(f"   Response preview: {response_preview}")
         total_time = time.time() - start_total
+        logger.info(f"⏱️  Task execution time: {total_time:.2f}s")
+        logger.info("✅ TASK EXECUTION COMPLETED SUCCESSFULLY")
+        logger.info("=" * 80)
         return response_text
     except asyncio.TimeoutError:
+        logger.error("⏱️  Request timed out during task execution.")
         return "The request timed out. Please try again."
     except Exception as e:
+        logger.error(f"❌ An unexpected error occurred during task execution: {e}", exc_info=True)
         return f"An unexpected error occurred: {e}"
 # --- ASYNC WRAPPERS & CACHE HELPERS ---
 async def _retrieve_chunks_async(prompt: str):
     """Asynchronously queries the ChromaDB collection."""
+    logger.info(f"🔍 Querying ChromaDB for prompt: '{prompt}'")
     loop = asyncio.get_event_loop()
+    result = await loop.run_in_executor(
         None,
         functools.partial(rag_setup.collection.query, query_texts=[prompt], n_results=3)
     )
+    logger.info(f"📊 ChromaDB query returned {len(result.get('documents', [[]])[0])} chunks")
+    return result
 async def _generate_response_async(full_prompt: str):
     """Asynchronously calls the LLM to generate content."""
+    logger.info("🤖 Calling LLM for content generation...")
+    logger.info(f"📏 Prompt length sent to LLM: {len(full_prompt)} characters")
     loop = asyncio.get_event_loop()
+    response = await loop.run_in_executor(
         None,
         rag_setup.generation_model.generate_content,
         full_prompt
     )
+    logger.info(f"✅ LLM response received (length: {len(response)} chars)")
+    return response
 def _get_cached_response(key: str):
     """Checks the cache for a valid (non-expired) entry."""
     if key in _response_cache:
         timestamp, response = _response_cache[key]
         if time.time() - timestamp < CACHE_EXPIRATION_SECONDS:
+            logger.info(f"💾 Cache hit for key: '{key[:50]}...'")
             return response
         else:
             # Expired, remove from cache
             del _response_cache[key]
+            logger.info(f"🗑️  Expired cache entry removed for key: '{key[:50]}...'")
     return None
 def _cache_response(key: str, response: str):
     """Adds a response to the cache with the current timestamp."""
+    _response_cache[key] = (time.time(), response)
+    logger.info(f"💾 Response cached for key: '{key[:50]}...' (response length: {len(response)} chars)")