Spaces:

Ab-Romia
/

Context-Aware-AI

Running

Ab-Romia commited on Aug 3, 2025

Commit

a1a7fd1

verified ·

1 Parent(s): 4ddb855

Update app/services.py

Files changed (1) hide show

app/services.py CHANGED Viewed

@@ -254,7 +254,10 @@ async def get_rag_response(request_data: ChatRequest, api_key: Optional[str] = N
         # Step 3: Retrieve relevant chunks from ChromaDB
         logger.info("🔎 Retrieving relevant chunks from vector DB...")
-        retrieved_chunks = await _retrieve_chunks_async(request_data.prompt, n_results=2)
         if not retrieved_chunks or not retrieved_chunks.get('documents') or not retrieved_chunks['documents'][0]:
             logger.warning("❌ No relevant chunks found in the vector DB for this query.")
@@ -267,15 +270,15 @@ async def get_rag_response(request_data: ChatRequest, api_key: Optional[str] = N
         context_for_prompt = "\n\n---\n\n".join(chunks)
         # Limit context length to prevent timeouts
-        max_context_length = 8000
         if len(context_for_prompt) > max_context_length:
             logger.warning(f"⚠️  Context too long, truncating to {max_context_length}")
-            context_for_prompt = context_for_prompt[:max_context_length] + "\n\n[... content truncated for performance ...]"
         # Step 4: Construct prompt for the LLM
         full_prompt = (
             "Based on the context below, answer the user's question concisely and accurately. "
-            "If the answer isn't in the context, say so clearly.\n\n"
             "Context:\n"
             f"{context_for_prompt}\n\n"
             f"Question: {request_data.prompt}\n\n"

         # Step 3: Retrieve relevant chunks from ChromaDB
         logger.info("🔎 Retrieving relevant chunks from vector DB...")
+        retrieved_chunks = await _retrieve_chunks_async(
+            request_data.prompt,
+            n_results=settings.MAX_CHUNKS_RETRIEVE
+        )
         if not retrieved_chunks or not retrieved_chunks.get('documents') or not retrieved_chunks['documents'][0]:
             logger.warning("❌ No relevant chunks found in the vector DB for this query.")
         context_for_prompt = "\n\n---\n\n".join(chunks)
         # Limit context length to prevent timeouts
+        max_context_length = settings.MAX_CONTEXT_LENGTH_CHAT
         if len(context_for_prompt) > max_context_length:
             logger.warning(f"⚠️  Context too long, truncating to {max_context_length}")
+            context_for_prompt = context_for_prompt[:max_context_length] + "\n\n[... content truncated ...]"
         # Step 4: Construct prompt for the LLM
         full_prompt = (
             "Based on the context below, answer the user's question concisely and accurately. "
+            "If the answer isn't in the context, say so clearly and answer the question yourself based on your knowledge.\n\n"
             "Context:\n"
             f"{context_for_prompt}\n\n"
             f"Question: {request_data.prompt}\n\n"