Spaces:
Running
Running
Update app/services.py
Browse files- app/services.py +7 -4
app/services.py
CHANGED
|
@@ -254,7 +254,10 @@ async def get_rag_response(request_data: ChatRequest, api_key: Optional[str] = N
|
|
| 254 |
|
| 255 |
# Step 3: Retrieve relevant chunks from ChromaDB
|
| 256 |
logger.info("🔎 Retrieving relevant chunks from vector DB...")
|
| 257 |
-
retrieved_chunks = await _retrieve_chunks_async(
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
if not retrieved_chunks or not retrieved_chunks.get('documents') or not retrieved_chunks['documents'][0]:
|
| 260 |
logger.warning("❌ No relevant chunks found in the vector DB for this query.")
|
|
@@ -267,15 +270,15 @@ async def get_rag_response(request_data: ChatRequest, api_key: Optional[str] = N
|
|
| 267 |
context_for_prompt = "\n\n---\n\n".join(chunks)
|
| 268 |
|
| 269 |
# Limit context length to prevent timeouts
|
| 270 |
-
max_context_length =
|
| 271 |
if len(context_for_prompt) > max_context_length:
|
| 272 |
logger.warning(f"⚠️ Context too long, truncating to {max_context_length}")
|
| 273 |
-
context_for_prompt = context_for_prompt[:max_context_length] + "\n\n[... content truncated
|
| 274 |
|
| 275 |
# Step 4: Construct prompt for the LLM
|
| 276 |
full_prompt = (
|
| 277 |
"Based on the context below, answer the user's question concisely and accurately. "
|
| 278 |
-
"If the answer isn't in the context, say so clearly.\n\n"
|
| 279 |
"Context:\n"
|
| 280 |
f"{context_for_prompt}\n\n"
|
| 281 |
f"Question: {request_data.prompt}\n\n"
|
|
|
|
| 254 |
|
| 255 |
# Step 3: Retrieve relevant chunks from ChromaDB
|
| 256 |
logger.info("🔎 Retrieving relevant chunks from vector DB...")
|
| 257 |
+
retrieved_chunks = await _retrieve_chunks_async(
|
| 258 |
+
request_data.prompt,
|
| 259 |
+
n_results=settings.MAX_CHUNKS_RETRIEVE
|
| 260 |
+
)
|
| 261 |
|
| 262 |
if not retrieved_chunks or not retrieved_chunks.get('documents') or not retrieved_chunks['documents'][0]:
|
| 263 |
logger.warning("❌ No relevant chunks found in the vector DB for this query.")
|
|
|
|
| 270 |
context_for_prompt = "\n\n---\n\n".join(chunks)
|
| 271 |
|
| 272 |
# Limit context length to prevent timeouts
|
| 273 |
+
max_context_length = settings.MAX_CONTEXT_LENGTH_CHAT
|
| 274 |
if len(context_for_prompt) > max_context_length:
|
| 275 |
logger.warning(f"⚠️ Context too long, truncating to {max_context_length}")
|
| 276 |
+
context_for_prompt = context_for_prompt[:max_context_length] + "\n\n[... content truncated ...]"
|
| 277 |
|
| 278 |
# Step 4: Construct prompt for the LLM
|
| 279 |
full_prompt = (
|
| 280 |
"Based on the context below, answer the user's question concisely and accurately. "
|
| 281 |
+
"If the answer isn't in the context, say so clearly and answer the question yourself based on your knowledge.\n\n"
|
| 282 |
"Context:\n"
|
| 283 |
f"{context_for_prompt}\n\n"
|
| 284 |
f"Question: {request_data.prompt}\n\n"
|