Files changed (17) hide show
  1. .dockerignore +0 -63
  2. .gitattributes +35 -0
  3. .gitignore +0 -4
  4. Dockerfile +1 -6
  5. LICENSE +0 -21
  6. README.md +52 -307
  7. README_HUGGINGFACE.md +0 -106
  8. app/__init__.py +0 -0
  9. app/config.py +31 -71
  10. app/main.py +21 -31
  11. app/rag_setup.py +79 -296
  12. app/schemas.py +2 -18
  13. app/services.py +210 -415
  14. main.py +5 -5
  15. requirements.txt +33 -22
  16. static/app.js +36 -209
  17. templates/index.html +30 -78
.dockerignore DELETED
@@ -1,63 +0,0 @@
1
- # Python
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
- *.so
6
- .Python
7
- *.egg-info/
8
- dist/
9
- build/
10
- *.egg
11
-
12
- # Virtual environments
13
- venv/
14
- env/
15
- ENV/
16
- .venv/
17
-
18
- # IDEs
19
- .idea/
20
- .vscode/
21
- *.swp
22
- *.swo
23
- *~
24
-
25
- # Git
26
- .git/
27
- .gitignore
28
- .gitattributes
29
-
30
- # OS
31
- .DS_Store
32
- Thumbs.db
33
-
34
- # Logs
35
- *.log
36
-
37
- # Temporary files
38
- *.tmp
39
- *.bak
40
- *.swp
41
- .cache/
42
-
43
- # ChromaDB local storage
44
- chroma_db/
45
- *.sqlite3
46
-
47
- # Documentation (if not needed in container)
48
- *.md
49
- !README.md
50
-
51
- # Tests (if any)
52
- tests/
53
- test_*
54
- *_test.py
55
-
56
- # CI/CD
57
- .github/
58
- .gitlab-ci.yml
59
-
60
- # Docker
61
- Dockerfile
62
- .dockerignore
63
- docker-compose.yml
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore DELETED
@@ -1,4 +0,0 @@
1
- .env
2
- __pycache__/
3
- *.pyc
4
- .venv/
 
 
 
 
 
Dockerfile CHANGED
@@ -1,15 +1,10 @@
1
- FROM python:3.11-slim
2
 
3
  WORKDIR /app
4
 
5
  # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
8
- curl \
9
- git \
10
- wget \
11
- gcc \
12
- g++ \
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
  # Upgrade pip and install wheel
 
1
+ FROM python:3.10-slim
2
 
3
  WORKDIR /app
4
 
5
  # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
 
 
 
 
 
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
  # Upgrade pip and install wheel
LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 Abdelrahman Abouroumia (Ab-Romia)
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,332 +1,77 @@
1
  ---
2
- title: ContextIQ - Intelligent RAG Assistant
3
- emoji: 🧠
4
- colorFrom: blue
5
- colorTo: purple
6
  sdk: docker
7
- app_port: 7860
8
  pinned: false
9
  license: mit
10
  ---
11
 
12
- # 🧠 ContextIQ - Intelligent Context-Aware AI Assistant
13
 
14
- <div align="center">
15
 
16
- [![Python](https://img.shields.io/badge/Python-3.8+-blue.svg)](https://www.python.org/downloads/)
17
- [![FastAPI](https://img.shields.io/badge/FastAPI-0.116+-green.svg)](https://fastapi.tiangolo.com/)
18
- [![License](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
19
- [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97-Hugging%20Face-orange)](https://huggingface.co/spaces/Ab-Romia/Context-Aware-AI)
20
 
21
- **A sophisticated RAG (Retrieval-Augmented Generation) application powered by multiple AI providers**
 
 
 
22
 
23
- [Live Demo](https://huggingface.co/spaces/Ab-Romia/Context-Aware-AI) · [Report Bug](https://github.com/Ab-Romia/ContextIQ-RAG/issues) · [Request Feature](https://github.com/Ab-Romia/ContextIQ-RAG/issues)
 
 
 
24
 
25
- </div>
 
 
 
26
 
27
- ---
28
-
29
- ## 🌟 What is ContextIQ?
30
-
31
- ContextIQ is an advanced **Retrieval-Augmented Generation (RAG)** application that transforms how you interact with your documents. Upload any document, ask questions, get summaries, or generate insights - all powered by state-of-the-art AI models from **OpenAI** and **OpenRouter**.
32
-
33
- ### ✨ Key Highlights
34
-
35
- - 🎯 **Dual AI Provider Support**: Choose between OpenAI (GPT-4o, GPT-4, GPT-3.5) or OpenRouter (200+ models including DeepSeek R1 FREE, Claude, Gemini, and more)
36
- - 📚 **11+ File Formats Supported**: PDF, DOCX, PPTX, XLSX, CSV, TXT, MD, HTML, JSON, XML, RTF
37
- - 🚀 **Lightning-Fast RAG Pipeline**: Custom TF-IDF embeddings + ChromaDB vector search
38
- - 💎 **Beautiful Modern UI**: Dark-themed, responsive interface with Tailwind CSS
39
- - 🔒 **Privacy-First**: API keys stored locally in your browser, never on our servers
40
- - ⚡ **Smart Caching**: 10-minute response cache for faster interactions
41
- - 🎨 **Multiple Task Types**: Q&A, Summarization, Action Plans, Creative Writing
42
-
43
- ---
44
-
45
- ## 🏗️ Architecture
46
-
47
- ```
48
- ┌──────────────────────────────────────────────────────────────┐
49
- │ Frontend (HTML/JS/Tailwind) │
50
- │ • Provider Selection (OpenAI/OpenRouter) │
51
- │ • File Upload & Text Input │
52
- │ • Real-time Chat Interface │
53
- │ • API Key Management │
54
- └────────────────────┬─────────────────────────────────────────┘
55
- │ REST API
56
- ┌────────────────────▼─────────────────────────────────────────┐
57
- │ FastAPI Backend │
58
- │ • Request Validation (Pydantic) │
59
- │ • Multi-Provider LLM Support │
60
- │ • File Processing Pipeline │
61
- │ • Response Caching │
62
- └────────────────────┬─────────────────────────────────────────┘
63
-
64
- ┌───────────┴───────────┐
65
- │ │
66
- ┌────────▼────────┐ ┌─────────▼──────────┐
67
- │ ChromaDB │ │ LLM Providers │
68
- │ Vector Database │ │ • OpenAI API │
69
- │ (TF-IDF) │ │ • OpenRouter API │
70
- └─────────────────┘ └────────────────────┘
71
- ```
72
-
73
- ---
74
-
75
- ## 🚀 Quick Start
76
-
77
- ### Prerequisites
78
-
79
- - **Python 3.8+**
80
- - **API Key** from either:
81
- - [OpenAI](https://platform.openai.com/api-keys) - For GPT models
82
- - [OpenRouter](https://openrouter.ai/) - For 200+ models (FREE tier available)
83
-
84
- ### Installation
85
-
86
- 1. **Clone the repository**
87
- ```bash
88
- git clone https://github.com/Ab-Romia/ContextIQ-RAG.git
89
- cd ContextIQ-RAG
90
- ```
91
-
92
- 2. **Install dependencies**
93
- ```bash
94
- pip install -r requirements.txt
95
- ```
96
-
97
- 3. **Run the application**
98
- ```bash
99
- python main.py
100
- ```
101
-
102
- Or use uvicorn directly:
103
- ```bash
104
- uvicorn main:app --host 0.0.0.0 --port 7860
105
- ```
106
-
107
- 4. **Access the web interface**
108
- Open your browser and navigate to:
109
- ```
110
- http://localhost:7860
111
- ```
112
-
113
- 5. **Configure your AI provider**
114
- - Choose between **OpenAI** or **OpenRouter** in the UI
115
- - Enter your API key
116
- - Test and save the key locally
117
-
118
- ---
119
-
120
- ## 📖 How to Use
121
-
122
- ### 1. Choose Your AI Provider
123
-
124
- - **OpenAI**: Access to GPT-4o, GPT-4o-mini, GPT-4, GPT-3.5-turbo
125
- - **OpenRouter**: 200+ models including DeepSeek R1 (FREE), Claude, GPT-4, Gemini, Llama 3, and more
126
- - **Default model**: DeepSeek R1 (completely free to use)
127
-
128
- ### 2. Upload Your Documents
129
-
130
- ContextIQ supports a wide range of file formats:
131
-
132
- | Category | Formats |
133
- |----------|---------|
134
- | **Text** | .txt, .md, .rtf |
135
- | **Documents** | .pdf, .docx |
136
- | **Presentations** | .pptx |
137
- | **Data** | .xlsx, .csv, .json, .xml |
138
- | **Web** | .html, .htm |
139
-
140
- ### 3. Index Your Content
141
-
142
- Click "Index Context" to process and store your documents in the vector database. The system will:
143
- - Extract text from your documents
144
- - Split into manageable chunks (600 characters)
145
- - Generate TF-IDF embeddings
146
- - Store in ChromaDB for fast retrieval
147
-
148
- ### 4. Interact with Your AI Assistant
149
-
150
- Choose from multiple task types:
151
-
152
- - **Question & Answer**: Get precise answers from your documents
153
- - **Summarize**: Generate concise summaries
154
- - **Generate Action Plan**: Create actionable plans from your content
155
- - **Creative Writing**: Transform your ideas into creative content
156
-
157
- ---
158
-
159
- ## 🎯 Features in Detail
160
-
161
- ### 📁 Advanced File Processing
162
-
163
- Our robust file processing pipeline handles:
164
-
165
- - **PDF**: Multi-page extraction with PyMuPDF
166
- - **Word Documents**: Paragraphs and tables extraction
167
- - **PowerPoint**: Slide-by-slide text extraction
168
- - **Excel/CSV**: Structured data processing with Pandas
169
- - **HTML**: Clean text extraction with BeautifulSoup
170
- - **JSON/XML**: Intelligent parsing and formatting
171
-
172
- ### 🧠 Intelligent RAG Pipeline
173
-
174
- 1. **Custom TF-IDF Embeddings**
175
- - 384-dimensional vectors
176
- - N-gram support (1-2)
177
- - English stop words filtering
178
- - Fallback hashing mechanism
179
 
180
- 2. **ChromaDB Vector Database**
181
- - In-memory storage for speed
182
- - Similarity-based retrieval
183
- - Configurable chunk retrieval (default: 3)
184
 
185
- 3. **Smart Context Assembly**
186
- - Retrieves relevant chunks
187
- - Constructs optimized prompts
188
- - Respects token limits per task type
189
 
190
- ### 🔧 Configurable Settings
191
 
192
- | Setting | Default | Description |
193
- |---------|---------|-------------|
194
- | MAX_TOKENS_CHAT | 4000 | Q&A response tokens |
195
- | MAX_TOKENS_SUMMARIZE | 3000 | Summary tokens |
196
- | MAX_TOKENS_PLAN | 5000 | Action plan tokens |
197
- | MAX_TOKENS_CREATIVE | 6000 | Creative writing tokens |
198
- | MAX_CHUNKS_RETRIEVE | 3 | Vector search results |
199
- | CACHE_EXPIRATION | 600s | Response cache duration |
200
 
201
- ---
202
-
203
- ## 🛠️ Technology Stack
204
-
205
- ### Backend
206
- - **FastAPI** - Modern, fast web framework
207
- - **ChromaDB** - Vector database for embeddings
208
- - **Scikit-learn** - TF-IDF vectorization
209
- - **Pydantic** - Data validation
210
- - **OpenAI SDK** - GPT models integration
211
- - **Requests** - HTTP client for OpenRouter
212
-
213
- ### Frontend
214
- - **Tailwind CSS** - Utility-first CSS framework
215
- - **Marked.js** - Markdown rendering
216
- - **Vanilla JavaScript** - No framework bloat
217
- - **LocalStorage** - Client-side API key storage
218
-
219
- ### File Processing
220
- - **PyMuPDF (fitz)** - PDF processing
221
- - **python-docx** - Word documents
222
- - **python-pptx** - PowerPoint files
223
- - **Pandas** - Excel/CSV handling
224
- - **BeautifulSoup** - HTML parsing
225
- - **striprtf** - RTF file support
226
-
227
- ---
228
-
229
- ## 📊 API Endpoints
230
-
231
- | Endpoint | Method | Description |
232
- |----------|--------|-------------|
233
- | `/` | GET | Serve main interface |
234
- | `/health` | GET | Health check |
235
- | `/api/v1/test-api-key` | POST | Validate API key |
236
- | `/api/v1/index` | POST | Index text context |
237
- | `/api/v1/index-file` | POST | Upload & index file |
238
- | `/api/v1/generate` | POST | Generate AI response |
239
- | `/api/v1/task` | POST | Execute specialized task |
240
- | `/api/v1/clear_index` | POST | Clear vector database |
241
-
242
- ---
243
-
244
- ## 🔒 Privacy & Security
245
-
246
- - ✅ API keys stored **only** in browser LocalStorage
247
- - ✅ No server-side API key storage
248
- - ✅ All requests use user-provided keys
249
- - ✅ HTTPS recommended for production
250
- - ✅ No telemetry or tracking
251
- - ✅ Open source - audit the code yourself
252
-
253
- ---
254
-
255
- ## 🚢 Deployment
256
-
257
- ### Docker
258
-
259
- ```bash
260
- docker build -t contextiq .
261
- docker run -p 7860:7860 contextiq
262
- ```
263
-
264
- ### Hugging Face Spaces
265
-
266
- This project is optimized for Hugging Face Spaces deployment. Simply:
267
-
268
- 1. Create a new Space
269
- 2. Upload the repository files
270
- 3. Set Space SDK to "Docker"
271
- 4. Deploy!
272
 
273
- [View Live Demo](https://huggingface.co/spaces/Ab-Romia/Context-Aware-AI)
 
 
 
274
 
275
- ---
276
-
277
- ## 🎨 UI Features
278
-
279
- - 🌙 **Dark Theme**: Easy on the eyes
280
- - 📱 **Fully Responsive**: Works on mobile, tablet, and desktop
281
- - 🎭 **Glass-morphism Effects**: Modern, elegant design
282
- - ⚡ **Real-time Updates**: Live status indicators
283
- - 📊 **Character/Word Counters**: Track your content
284
- - 🔄 **Collapsible Sections**: Clean, organized interface
285
- - 💬 **Markdown Support**: Rich text formatting in responses
286
 
287
- ---
288
-
289
- ## 🤝 Contributing
 
290
 
291
- Contributions are welcome! Please feel free to submit a Pull Request.
292
 
293
- 1. Fork the repository
294
- 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
295
- 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
296
- 4. Push to the branch (`git push origin feature/AmazingFeature`)
297
- 5. Open a Pull Request
298
 
299
  ---
300
 
301
- ## 📝 License
302
-
303
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
304
-
305
- ---
306
-
307
- ## 🙏 Acknowledgments
308
-
309
- - **OpenRouter** for providing access to 200+ AI models
310
- - **OpenAI** for GPT models
311
- - **ChromaDB** for the vector database
312
- - **FastAPI** for the amazing web framework
313
- - **Tailwind CSS** for the beautiful UI
314
-
315
- ---
316
-
317
- ## 📬 Contact
318
-
319
- **Ab-Romia** - Abdelrahman Abouroumia
320
-
321
- - GitHub: [@Ab-Romia](https://github.com/Ab-Romia)
322
- - Hugging Face: [Ab-Romia](https://huggingface.co/Ab-Romia)
323
-
324
- ---
325
-
326
- <div align="center">
327
-
328
- **⭐ Star this repo if you find it helpful! ⭐**
329
-
330
- Made with ❤️ by Ab-Romia
331
-
332
- </div>
 
1
  ---
2
+ title: Context Aware AI
3
+ emoji: 🌍
4
+ colorFrom: green
5
+ colorTo: red
6
  sdk: docker
 
7
  pinned: false
8
  license: mit
9
  ---
10
 
11
+ # ContextIQ - Smart Document Assistant 🧠
12
 
13
+ A RAG-powered AI assistant that answers questions based on your documents. Upload any text, ask questions, and get intelligent responses.
14
 
15
+ ## 🚀 How to Use on Hugging Face
 
 
 
16
 
17
+ ### 1. **Get Your API Key (Free)**
18
+ - Go to [OpenRouter.ai](https://openrouter.ai) and sign up
19
+ - Copy your API key (starts with `sk-or-`)
20
+ - No credit card required for basic usage
21
 
22
+ ### 2. **Configure the App**
23
+ - Enter your API key in the configuration section
24
+ - Click "Test Key" to validate
25
+ - Click "Save" to remember it for future sessions
26
 
27
+ ### 3. **Add Your Documents**
28
+ - Paste your text, documents, or notes in the "Knowledge Base" panel
29
+ - Click "Index Context" to process the content
30
+ - Wait for the green success message
31
 
32
+ ### 4. **Start Asking Questions**
33
+ Choose your task:
34
+ - **Question & Answer**: Ask specific questions about your content
35
+ - **Summarize**: Get a concise summary of your documents
36
+ - **Plan**: Generate action plans based on your content
37
+ - **Creative**: Write stories or content inspired by your documents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ ## What You Can Do
 
 
 
40
 
41
+ - **Analyze Documents**: Research papers, meeting notes, reports
42
+ - **Study Materials**: Summarize textbooks, generate study questions
43
+ - **Business Intelligence**: Analyze customer feedback, market research
44
+ - **Content Creation**: Generate blog posts, creative writing from source material
45
 
46
+ ## 🔧 Features
47
 
48
+ - **Smart Context Search**: Finds relevant information from your documents
49
+ - **Multiple AI Tasks**: Q&A, summarization, planning, creative writing
50
+ - **Mobile Friendly**: Works perfectly on phones and tablets
51
+ - **Secure**: Your API key stays in your browser, never shared
52
+ - **Fast**: Cached responses for repeated questions
 
 
 
53
 
54
+ ## 💡 Example Usage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ 1. **Upload a research paper** → Ask "What are the main findings?"
57
+ 2. **Paste meeting notes** → Generate "Create an action plan"
58
+ 3. **Add product specs** → Write "Create marketing copy"
59
+ 4. **Upload course material** → Ask "Explain the key concepts"
60
 
61
+ ## 🛠️ Technical Details
 
 
 
 
 
 
 
 
 
 
62
 
63
+ - **Backend**: FastAPI + ChromaDB vector database
64
+ - **AI Model**: DeepSeek R1 via OpenRouter
65
+ - **Embeddings**: Custom TF-IDF for document similarity
66
+ - **Frontend**: Vanilla JavaScript with Tailwind CSS
67
 
68
+ ## 🔒 Privacy
69
 
70
+ - Your documents are processed in memory only
71
+ - API keys stored locally in your browser
72
+ - No data is saved on our servers
73
+ - All communication is encrypted
 
74
 
75
  ---
76
 
77
+ **Built by [Abdelrahman Abouroumia](https://github.com/Ab-Romia)** | **Try it now on [Hugging Face](https://huggingface.co/spaces/Ab-Romia/Context-Aware-AI) or [Github](https://github.com/Ab-Romia/ContextIQ-RAG)**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README_HUGGINGFACE.md DELETED
@@ -1,106 +0,0 @@
1
- ---
2
- title: ContextIQ - Context-Aware AI Assistant
3
- emoji: 🧠
4
- colorFrom: purple
5
- colorTo: blue
6
- sdk: docker
7
- pinned: true
8
- license: mit
9
- app_port: 7860
10
- ---
11
-
12
- # 🧠 ContextIQ - Intelligent Context-Aware AI Assistant
13
-
14
- Welcome to **ContextIQ**, a sophisticated RAG (Retrieval-Augmented Generation) application that transforms how you interact with your documents!
15
-
16
- ## 🌟 What Can You Do?
17
-
18
- - 📚 **Upload Documents**: Support for 11+ file formats (PDF, DOCX, PPTX, XLSX, CSV, TXT, MD, HTML, JSON, XML, RTF)
19
- - 🤖 **Ask Questions**: Get intelligent answers based on your uploaded documents
20
- - 📝 **Summarize**: Generate concise summaries of your content
21
- - 📋 **Action Plans**: Create actionable plans from your documents
22
- - ✍️ **Creative Writing**: Transform your ideas into creative content
23
-
24
- ## 🎯 Dual AI Provider Support
25
-
26
- Choose your preferred AI provider:
27
-
28
- ### OpenRouter (FREE DeepSeek Model!)
29
- - 200+ models including DeepSeek R1 (FREE), Claude, GPT-4, Gemini, Llama 3
30
- - **Default**: DeepSeek R1 - completely free to use
31
- - Get your key: [openrouter.ai](https://openrouter.ai/)
32
-
33
- ### OpenAI
34
- - GPT-4o, GPT-4o-mini, GPT-4, GPT-3.5-turbo
35
- - Production-ready models
36
- - Get your key: [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
37
-
38
- ## 🚀 How to Use
39
-
40
- 1. **Choose Your AI Provider**
41
- - Select OpenRouter (free) or OpenAI in the interface
42
-
43
- 2. **Enter Your API Key**
44
- - Your key is stored locally in your browser only
45
- - Never sent to our servers
46
-
47
- 3. **Upload Your Documents**
48
- - Drag & drop or browse for files
49
- - Or paste text directly
50
-
51
- 4. **Index Your Content**
52
- - Click "Index Context" to process your documents
53
-
54
- 5. **Start Asking Questions!**
55
- - Choose a task type (Q&A, Summarize, Plan, Creative)
56
- - Type your question or prompt
57
- - Get AI-powered responses based on your documents
58
-
59
- ## 🔒 Privacy & Security
60
-
61
- - ✅ Your API keys are stored **only** in your browser
62
- - ✅ No server-side storage of API keys
63
- - ✅ All requests use your own API key
64
- - ✅ Open source - audit the code yourself
65
-
66
- ## 🛠️ Technology Stack
67
-
68
- - **Backend**: FastAPI + Python
69
- - **Vector Database**: ChromaDB with custom TF-IDF embeddings
70
- - **Frontend**: Tailwind CSS + Vanilla JavaScript
71
- - **AI Providers**: OpenAI SDK + OpenRouter API
72
- - **File Processing**: PyMuPDF, python-docx, pandas, BeautifulSoup, and more
73
-
74
- ## 📊 Supported File Formats
75
-
76
- | Category | Formats |
77
- |----------|---------|
78
- | **Text** | .txt, .md, .rtf |
79
- | **Documents** | .pdf, .docx |
80
- | **Presentations** | .pptx |
81
- | **Data** | .xlsx, .csv, .json, .xml |
82
- | **Web** | .html, .htm |
83
-
84
- ## 💡 Tips for Best Results
85
-
86
- - **Clear Questions**: Ask specific questions about your documents
87
- - **Context Matters**: The more relevant text you provide, the better the answers
88
- - **Chunk Size**: Large documents are automatically split into manageable chunks
89
- - **Model Selection**:
90
- - Use OpenRouter's DeepSeek R1 (FREE) for excellent reasoning at no cost
91
- - Use OpenAI's GPT-4o for production workloads
92
- - Default DeepSeek model is completely free - no credit card needed!
93
-
94
- ## 🤝 Open Source
95
-
96
- This project is open source! Check out the code on GitHub:
97
- [github.com/Ab-Romia/ContextIQ-RAG](https://github.com/Ab-Romia/ContextIQ-RAG)
98
-
99
- ## 📬 Feedback
100
-
101
- Found a bug or have a feature request?
102
- [Open an issue on GitHub](https://github.com/Ab-Romia/ContextIQ-RAG/issues)
103
-
104
- ---
105
-
106
- Made with ❤️ by Ab-Romia (Abdelrahman Abouroumia)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/__init__.py DELETED
File without changes
app/config.py CHANGED
@@ -9,45 +9,35 @@ class Settings(BaseSettings):
9
  # OpenRouter Configuration
10
  OPENROUTER_API_KEY: str = ""
11
  OPENROUTER_URL: str = "https://openrouter.ai/api/v1"
12
- OPENROUTER_MODEL: str = "deepseek/deepseek-r1-0528:free"
13
-
14
- # OpenAI Configuration
15
- OPENAI_API_KEY: str = ""
16
- OPENAI_URL: str = "https://api.openai.com/v1"
17
- OPENAI_MODEL: str = "gpt-4o-mini" # Default to GPT-4o-mini for cost efficiency
18
-
19
- # Legacy field for backward compatibility
20
  MODEL_NAME: str = "deepseek/deepseek-r1-0528:free"
21
-
22
  # Token Limits Configuration
23
- MAX_TOKENS_CHAT: int = 4000 # For Q&A responses
24
- MAX_TOKENS_SUMMARIZE: int = 3000 # For summaries
25
- MAX_TOKENS_PLAN: int = 5000 # For action plans
26
- MAX_TOKENS_CREATIVE: int = 6000 # For creative writing
27
- MAX_TOKENS_TEST: int = 50 # For API key testing
28
-
29
- # Context Limits - Optimized for better retrieval
30
- MAX_CONTEXT_LENGTH_CHAT: int = 12000
31
- MAX_CONTEXT_LENGTH_TASK: int = 16000
32
- MAX_CHUNKS_RETRIEVE: int = 5
33
- CHUNK_SIZE: int = 500
34
- CHUNK_OVERLAP: int = 100
35
-
36
  # Performance Settings
37
- REQUEST_TIMEOUT_BASE: int = 120 # Base timeout in seconds
38
- REQUEST_TIMEOUT_PER_1K_TOKENS: int = 4 # Additional seconds per 1000 tokens
39
-
40
  # New setting to control fallback behavior
41
  REQUIRE_USER_API_KEY: bool = True
42
-
43
  class Config:
44
  env_file = ".env"
45
  case_sensitive = True
46
 
47
-
48
  # Create settings instance
49
  settings = Settings()
50
 
 
51
  # Debug logging for API key configuration
52
  logger.info("=" * 80)
53
  logger.info("🔧 CONFIGURATION DEBUG")
@@ -82,10 +72,9 @@ else:
82
 
83
  # Check if API key starts with expected prefix (only if present)
84
  if settings.OPENROUTER_API_KEY:
85
- api_key_preview = settings.OPENROUTER_API_KEY[:20] + "..." if len(
86
- settings.OPENROUTER_API_KEY) > 20 else settings.OPENROUTER_API_KEY
87
  logger.info(f"🔑 Server API Key Preview: {api_key_preview}")
88
-
89
  # OpenRouter API keys typically start with "sk-or-"
90
  if settings.OPENROUTER_API_KEY.startswith("sk-or-"):
91
  logger.info("✅ Server API key format looks correct (starts with 'sk-or-')")
@@ -114,56 +103,27 @@ def get_max_tokens_for_task(task_type: str) -> int:
114
  }
115
  return token_map.get(task_type, settings.MAX_TOKENS_CHAT)
116
 
117
-
118
  def get_timeout_for_tokens(max_tokens: int) -> int:
119
  """Calculate appropriate timeout based on token count."""
120
  additional_time = (max_tokens // 1000) * settings.REQUEST_TIMEOUT_PER_1K_TOKENS
121
  return settings.REQUEST_TIMEOUT_BASE + additional_time
122
 
123
-
124
- def validate_api_key(api_key: str, provider: str = "openrouter") -> bool:
125
- """Validate API key format for OpenRouter or OpenAI"""
126
  if not api_key:
127
  return False
128
-
129
- provider = provider.lower()
130
-
131
- if provider == "openrouter":
132
- # OpenRouter keys should start with "sk-or-" and be at least 40 characters
133
- if not api_key.startswith("sk-or-"):
134
- logger.warning("⚠️ OpenRouter API key should start with 'sk-or-'")
135
- return False
136
- if len(api_key) < 40:
137
- logger.warning("⚠️ OpenRouter API key seems too short")
138
- return False
139
- elif provider == "openai":
140
- # OpenAI keys should start with "sk-" and be at least 40 characters
141
- if not api_key.startswith("sk-"):
142
- logger.warning("⚠️ OpenAI API key should start with 'sk-'")
143
- return False
144
- if len(api_key) < 40:
145
- logger.warning("⚠️ OpenAI API key seems too short")
146
- return False
147
- else:
148
- logger.warning(f"⚠️ Unknown provider: {provider}")
149
  return False
150
-
 
 
 
 
151
  return True
152
 
153
-
154
- def detect_provider_from_key(api_key: str) -> str:
155
- """Detect provider from API key format"""
156
- if not api_key:
157
- return "unknown"
158
-
159
- if api_key.startswith("sk-or-"):
160
- return "openrouter"
161
- elif api_key.startswith("sk-proj-") or api_key.startswith("sk-"):
162
- return "openai"
163
- else:
164
- return "unknown"
165
-
166
-
167
  # Validate the current server API key (if present)
168
  if settings.OPENROUTER_API_KEY:
169
  is_valid = validate_api_key(settings.OPENROUTER_API_KEY)
@@ -172,4 +132,4 @@ else:
172
  logger.info("🔍 No server API key to validate")
173
 
174
  # Export settings
175
- __all__ = ['settings', 'validate_api_key', 'detect_provider_from_key', 'get_max_tokens_for_task', 'get_timeout_for_tokens']
 
9
  # OpenRouter Configuration
10
  OPENROUTER_API_KEY: str = ""
11
  OPENROUTER_URL: str = "https://openrouter.ai/api/v1"
 
 
 
 
 
 
 
 
12
  MODEL_NAME: str = "deepseek/deepseek-r1-0528:free"
13
+
14
  # Token Limits Configuration
15
+ MAX_TOKENS_CHAT: int = 4000 # For Q&A responses
16
+ MAX_TOKENS_SUMMARIZE: int = 3000 # For summaries
17
+ MAX_TOKENS_PLAN: int = 5000 # For action plans
18
+ MAX_TOKENS_CREATIVE: int = 6000 # For creative writing
19
+ MAX_TOKENS_TEST: int = 50 # For API key testing
20
+
21
+ # Context Limits
22
+ MAX_CONTEXT_LENGTH_CHAT: int = 8000 # For chat context
23
+ MAX_CONTEXT_LENGTH_TASK: int = 12000 # For task context
24
+ MAX_CHUNKS_RETRIEVE: int = 3 # Number of chunks to retrieve
25
+
 
 
26
  # Performance Settings
27
+ REQUEST_TIMEOUT_BASE: int = 120 # Base timeout in seconds
28
+ REQUEST_TIMEOUT_PER_1K_TOKENS: int = 4 # Additional seconds per 1000 tokens
29
+
30
  # New setting to control fallback behavior
31
  REQUIRE_USER_API_KEY: bool = True
32
+
33
  class Config:
34
  env_file = ".env"
35
  case_sensitive = True
36
 
 
37
  # Create settings instance
38
  settings = Settings()
39
 
40
+
41
  # Debug logging for API key configuration
42
  logger.info("=" * 80)
43
  logger.info("🔧 CONFIGURATION DEBUG")
 
72
 
73
  # Check if API key starts with expected prefix (only if present)
74
  if settings.OPENROUTER_API_KEY:
75
+ api_key_preview = settings.OPENROUTER_API_KEY[:20] + "..." if len(settings.OPENROUTER_API_KEY) > 20 else settings.OPENROUTER_API_KEY
 
76
  logger.info(f"🔑 Server API Key Preview: {api_key_preview}")
77
+
78
  # OpenRouter API keys typically start with "sk-or-"
79
  if settings.OPENROUTER_API_KEY.startswith("sk-or-"):
80
  logger.info("✅ Server API key format looks correct (starts with 'sk-or-')")
 
103
  }
104
  return token_map.get(task_type, settings.MAX_TOKENS_CHAT)
105
 
 
106
  def get_timeout_for_tokens(max_tokens: int) -> int:
107
  """Calculate appropriate timeout based on token count."""
108
  additional_time = (max_tokens // 1000) * settings.REQUEST_TIMEOUT_PER_1K_TOKENS
109
  return settings.REQUEST_TIMEOUT_BASE + additional_time
110
 
111
+ def validate_api_key(api_key: str) -> bool:
112
+ """Validate OpenRouter API key format"""
 
113
  if not api_key:
114
  return False
115
+
116
+ # OpenRouter keys should start with "sk-or-" and be at least 40 characters
117
+ if not api_key.startswith("sk-or-"):
118
+ logger.warning("⚠️ API key should start with 'sk-or-'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  return False
120
+
121
+ if len(api_key) < 40:
122
+ logger.warning("⚠️ API key seems too short")
123
+ return False
124
+
125
  return True
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  # Validate the current server API key (if present)
128
  if settings.OPENROUTER_API_KEY:
129
  is_valid = validate_api_key(settings.OPENROUTER_API_KEY)
 
132
  logger.info("🔍 No server API key to validate")
133
 
134
  # Export settings
135
+ __all__ = ['settings', 'validate_api_key']
app/main.py CHANGED
@@ -13,39 +13,33 @@ from typing import Optional
13
  # Get the base directory (works both locally and on Hugging Face)
14
  if os.path.exists("/app"): # Hugging Face environment
15
  BASE_DIR = Path("/app")
16
- STATIC_DIR = BASE_DIR / "static"
17
- TEMPLATES_DIR = BASE_DIR / "templates"
18
  else: # Local environment
19
  BASE_DIR = Path(__file__).resolve().parent
20
- STATIC_DIR = BASE_DIR.parent / "static"
21
- TEMPLATES_DIR = BASE_DIR.parent / "templates"
22
 
23
  app = FastAPI(
24
  title="ContextIQ RAG - Intelligent Context-Aware Assistant",
25
  description="A sophisticated RAG-powered backend using FastAPI and OpenRouter.",
26
- version="2.2.0" # Version updated for new feature
27
  )
28
 
29
  # Mount static files and templates
30
- app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
31
- templates = Jinja2Templates(directory=TEMPLATES_DIR)
32
-
33
 
34
  def get_api_key(x_api_key: Optional[str] = Header(None)) -> str:
35
  """Extract API key from header or use default."""
36
  if x_api_key and x_api_key.strip():
37
  return x_api_key.strip()
38
-
39
  # Fall back to server default if available
40
  if config.settings.OPENROUTER_API_KEY:
41
  return config.settings.OPENROUTER_API_KEY
42
-
43
  raise HTTPException(
44
  status_code=400,
45
  detail="No API key provided. Please provide your OpenRouter API key via the X-API-Key header."
46
  )
47
 
48
-
49
  @app.get("/debug")
50
  async def debug_config():
51
  """Debug endpoint to check configuration."""
@@ -57,23 +51,21 @@ async def debug_config():
57
  "accepts_user_keys": True
58
  }
59
 
60
-
61
  @app.post("/api/v1/test-api-key", response_model=schemas.ApiKeyTestResponse)
62
  async def test_api_key_endpoint(api_key_request: schemas.ApiKeyRequest):
63
  """
64
- Test if the provided API key is valid (OpenRouter or OpenAI).
65
  """
66
  try:
67
- result = await services.test_api_key(api_key_request.api_key, api_key_request.provider)
68
  return schemas.ApiKeyTestResponse(**result)
69
  except Exception as e:
70
  raise HTTPException(status_code=500, detail=str(e))
71
 
72
-
73
  @app.post("/api/v1/task", response_model=schemas.TaskResponse)
74
  async def execute_task(
75
- task_request: schemas.TaskRequest,
76
- x_api_key: Optional[str] = Header(None)
77
  ):
78
  """
79
  Executes a specific task (e.g., summarize, plan) based on the provided context
@@ -86,7 +78,6 @@ async def execute_task(
86
  except Exception as e:
87
  raise HTTPException(status_code=500, detail=str(e))
88
 
89
-
90
  @app.get("/", response_class=HTMLResponse)
91
  async def read_root(request: Request):
92
  """
@@ -94,17 +85,15 @@ async def read_root(request: Request):
94
  """
95
  return templates.TemplateResponse("index.html", {"request": request})
96
 
97
-
98
  @app.get("/health")
99
  async def health_check():
100
  """Health check endpoint for Hugging Face."""
101
  return {"status": "healthy", "message": "ContextIQ RAG is running!"}
102
 
103
-
104
  @app.post("/api/v1/generate", response_model=schemas.ChatResponse)
105
  async def generate_response(
106
- chat_request: schemas.ChatRequest,
107
- x_api_key: Optional[str] = Header(None)
108
  ):
109
  """
110
  Receives a prompt, retrieves relevant context from the vector DB,
@@ -117,7 +106,6 @@ async def generate_response(
117
  except Exception as e:
118
  raise HTTPException(status_code=500, detail=str(e))
119
 
120
-
121
  @app.post("/api/v1/clear_index", response_model=schemas.GeneralResponse)
122
  async def clear_context_index(x_api_key: Optional[str] = Header(None)):
123
  """
@@ -129,11 +117,10 @@ async def clear_context_index(x_api_key: Optional[str] = Header(None)):
129
  except Exception as e:
130
  raise HTTPException(status_code=500, detail=f"Failed to clear index: {e}")
131
 
132
-
133
  @app.post("/api/v1/index", response_model=schemas.IndexResponse)
134
  async def index_context(
135
- document_request: schemas.DocumentRequest,
136
- x_api_key: Optional[str] = Header(None)
137
  ):
138
  """
139
  Receives text, clears the old index, chunks the new text,
@@ -142,22 +129,21 @@ async def index_context(
142
  try:
143
  # Validate API key access (but indexing doesn't require API calls)
144
  get_api_key(x_api_key)
145
-
146
  docs_added = services.index_document(document_request)
147
  return schemas.IndexResponse(
148
  message="Context has been successfully indexed.",
149
  documents_added=docs_added,
150
- extracted_text=document_request.context # Return the provided text
151
  )
152
  except Exception as e:
153
  raise HTTPException(status_code=500, detail=f"Failed to index document: {e}")
154
 
155
-
156
  # ✨ UPDATED: File Upload Endpoint now returns the extracted text
157
  @app.post("/api/v1/index-file", response_model=schemas.IndexResponse)
158
  async def index_file(
159
- x_api_key: Optional[str] = Header(None),
160
- file: UploadFile = File(...)
161
  ):
162
  """
163
  Receives a file (.txt, .pdf), extracts text, and indexes it.
@@ -180,3 +166,7 @@ async def index_file(
180
  except Exception as e:
181
  raise HTTPException(status_code=500, detail=f"Failed to process file: {str(e)}")
182
 
 
 
 
 
 
13
  # Get the base directory (works both locally and on Hugging Face)
14
  if os.path.exists("/app"): # Hugging Face environment
15
  BASE_DIR = Path("/app")
 
 
16
  else: # Local environment
17
  BASE_DIR = Path(__file__).resolve().parent
 
 
18
 
19
  app = FastAPI(
20
  title="ContextIQ RAG - Intelligent Context-Aware Assistant",
21
  description="A sophisticated RAG-powered backend using FastAPI and OpenRouter.",
22
+ version="2.2.0" # Version updated for new feature
23
  )
24
 
25
  # Mount static files and templates
26
+ app.mount("/static", StaticFiles(directory=BASE_DIR / "static"), name="static")
27
+ templates = Jinja2Templates(directory=BASE_DIR / "templates")
 
28
 
29
  def get_api_key(x_api_key: Optional[str] = Header(None)) -> str:
30
  """Extract API key from header or use default."""
31
  if x_api_key and x_api_key.strip():
32
  return x_api_key.strip()
33
+
34
  # Fall back to server default if available
35
  if config.settings.OPENROUTER_API_KEY:
36
  return config.settings.OPENROUTER_API_KEY
37
+
38
  raise HTTPException(
39
  status_code=400,
40
  detail="No API key provided. Please provide your OpenRouter API key via the X-API-Key header."
41
  )
42
 
 
43
  @app.get("/debug")
44
  async def debug_config():
45
  """Debug endpoint to check configuration."""
 
51
  "accepts_user_keys": True
52
  }
53
 
 
54
  @app.post("/api/v1/test-api-key", response_model=schemas.ApiKeyTestResponse)
55
  async def test_api_key_endpoint(api_key_request: schemas.ApiKeyRequest):
56
  """
57
+ Test if the provided API key is valid.
58
  """
59
  try:
60
+ result = await services.test_api_key(api_key_request.api_key)
61
  return schemas.ApiKeyTestResponse(**result)
62
  except Exception as e:
63
  raise HTTPException(status_code=500, detail=str(e))
64
 
 
65
  @app.post("/api/v1/task", response_model=schemas.TaskResponse)
66
  async def execute_task(
67
+ task_request: schemas.TaskRequest,
68
+ x_api_key: Optional[str] = Header(None)
69
  ):
70
  """
71
  Executes a specific task (e.g., summarize, plan) based on the provided context
 
78
  except Exception as e:
79
  raise HTTPException(status_code=500, detail=str(e))
80
 
 
81
  @app.get("/", response_class=HTMLResponse)
82
  async def read_root(request: Request):
83
  """
 
85
  """
86
  return templates.TemplateResponse("index.html", {"request": request})
87
 
 
88
  @app.get("/health")
89
  async def health_check():
90
  """Health check endpoint for Hugging Face."""
91
  return {"status": "healthy", "message": "ContextIQ RAG is running!"}
92
 
 
93
  @app.post("/api/v1/generate", response_model=schemas.ChatResponse)
94
  async def generate_response(
95
+ chat_request: schemas.ChatRequest,
96
+ x_api_key: Optional[str] = Header(None)
97
  ):
98
  """
99
  Receives a prompt, retrieves relevant context from the vector DB,
 
106
  except Exception as e:
107
  raise HTTPException(status_code=500, detail=str(e))
108
 
 
109
  @app.post("/api/v1/clear_index", response_model=schemas.GeneralResponse)
110
  async def clear_context_index(x_api_key: Optional[str] = Header(None)):
111
  """
 
117
  except Exception as e:
118
  raise HTTPException(status_code=500, detail=f"Failed to clear index: {e}")
119
 
 
120
  @app.post("/api/v1/index", response_model=schemas.IndexResponse)
121
  async def index_context(
122
+ document_request: schemas.DocumentRequest,
123
+ x_api_key: Optional[str] = Header(None)
124
  ):
125
  """
126
  Receives text, clears the old index, chunks the new text,
 
129
  try:
130
  # Validate API key access (but indexing doesn't require API calls)
131
  get_api_key(x_api_key)
132
+
133
  docs_added = services.index_document(document_request)
134
  return schemas.IndexResponse(
135
  message="Context has been successfully indexed.",
136
  documents_added=docs_added,
137
+ extracted_text=document_request.context # Return the provided text
138
  )
139
  except Exception as e:
140
  raise HTTPException(status_code=500, detail=f"Failed to index document: {e}")
141
 
 
142
  # ✨ UPDATED: File Upload Endpoint now returns the extracted text
143
  @app.post("/api/v1/index-file", response_model=schemas.IndexResponse)
144
  async def index_file(
145
+ x_api_key: Optional[str] = Header(None),
146
+ file: UploadFile = File(...)
147
  ):
148
  """
149
  Receives a file (.txt, .pdf), extracts text, and indexes it.
 
166
  except Exception as e:
167
  raise HTTPException(status_code=500, detail=f"Failed to process file: {str(e)}")
168
 
169
+
170
+ if __name__ == "__main__":
171
+ port = int(os.environ.get("PORT", 7860))
172
+ uvicorn.run(app, host="0.0.0.0", port=port)
app/rag_setup.py CHANGED
@@ -2,15 +2,14 @@ import chromadb
2
  import logging
3
  import requests
4
  import json
5
- from config import settings, detect_provider_from_key # Fixed: removed 'app.' prefix
6
  import time
7
  import os
8
  import numpy as np
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
- from typing import List, Optional
11
  import hashlib
12
  import re
13
- from openai import OpenAI
14
 
15
  # Disable ChromaDB telemetry to avoid errors
16
  os.environ["ANONYMIZED_TELEMETRY"] = "False"
@@ -18,7 +17,6 @@ os.environ["ANONYMIZED_TELEMETRY"] = "False"
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
  logger = logging.getLogger("rag-setup")
20
 
21
-
22
  # Custom TF-IDF based embedding function
23
  class TFIDFEmbeddingFunction:
24
  def __init__(self, max_features=384):
@@ -30,37 +28,34 @@ class TFIDFEmbeddingFunction:
30
  )
31
  self.is_fitted = False
32
  self.max_features = max_features
33
-
34
- def name(self):
35
- """Return a name identifier for this embedding function."""
36
- return "tfidf_embedder"
37
  def _preprocess_text(self, text: str) -> str:
38
  """Clean and preprocess text."""
39
  text = re.sub(r'\s+', ' ', text)
40
  text = re.sub(r'[^\w\s]', ' ', text)
41
  return text.strip().lower()
42
-
43
  def __call__(self, input: List[str]) -> List[List[float]]:
44
  """Generate TF-IDF based embeddings."""
45
  try:
46
  logger.info(f"🔢 Generating embeddings for {len(input)} texts")
47
  processed_texts = [self._preprocess_text(text) for text in input]
48
-
49
  if not self.is_fitted and processed_texts:
50
  # Fit the vectorizer on the input texts
51
  self.vectorizer.fit(processed_texts)
52
  self.is_fitted = True
53
  logger.info("✅ TF-IDF vectorizer fitted on input texts")
54
-
55
  if not self.is_fitted:
56
  # Return simple fallback if no data to fit on
57
  logger.warning("⚠️ Using fallback embeddings - vectorizer not fitted")
58
  return self._fallback_embeddings(input)
59
-
60
  # Transform texts to vectors
61
  tfidf_matrix = self.vectorizer.transform(processed_texts)
62
  embeddings = tfidf_matrix.toarray()
63
-
64
  # Ensure consistent dimensions
65
  result_embeddings = []
66
  for embedding in embeddings:
@@ -70,14 +65,14 @@ class TFIDFEmbeddingFunction:
70
  result_embeddings.append(padded.tolist())
71
  else:
72
  result_embeddings.append(embedding[:self.max_features].tolist())
73
-
74
  logger.info(f"✅ Generated {len(result_embeddings)} embeddings of dimension {self.max_features}")
75
  return result_embeddings
76
-
77
  except Exception as e:
78
  logger.error(f"❌ Error generating TF-IDF embeddings: {e}")
79
  return self._fallback_embeddings(input)
80
-
81
  def _fallback_embeddings(self, input: List[str]) -> List[List[float]]:
82
  """Simple fallback embedding method."""
83
  logger.info(f"🔧 Using fallback embeddings for {len(input)} texts")
@@ -85,28 +80,27 @@ class TFIDFEmbeddingFunction:
85
  for text in input:
86
  text_hash = hashlib.md5(text.encode()).hexdigest()
87
  embedding = []
88
-
89
  # Convert hash to numbers
90
  for i in range(0, min(len(text_hash), 32), 2):
91
- hex_pair = text_hash[i:i + 2]
92
  embedding.append(int(hex_pair, 16) / 255.0)
93
-
94
  # Add text features
95
  embedding.extend([
96
  len(text) / 1000.0,
97
  len(text.split()) / 100.0,
98
  text.count('.') / 10.0,
99
  ])
100
-
101
  # Pad to desired size
102
  while len(embedding) < self.max_features:
103
  embedding.extend(embedding[:min(len(embedding), self.max_features - len(embedding))])
104
-
105
  embeddings.append(embedding[:self.max_features])
106
-
107
  return embeddings
108
 
109
-
110
  # Simple ChromaDB setup - use in-memory storage for Hugging Face
111
  logger.info("🔧 Initializing ChromaDB with in-memory storage for Hugging Face compatibility")
112
 
@@ -114,14 +108,14 @@ try:
114
  # Use in-memory client to avoid permission issues
115
  client = chromadb.Client()
116
  embedding_function = TFIDFEmbeddingFunction()
117
-
118
  collection = client.get_or_create_collection(
119
  name="context_aware_collection",
120
  embedding_function=embedding_function
121
  )
122
-
123
  logger.info("✅ ChromaDB collection initialized successfully with in-memory storage")
124
-
125
  except Exception as e:
126
  logger.error(f"❌ Error initializing ChromaDB: {e}")
127
  raise RuntimeError(f"Failed to initialize ChromaDB: {e}")
@@ -133,7 +127,7 @@ class OpenRouterLLM:
133
  self.base_url = base_url
134
  self.model = model
135
  self.api_url = f"{base_url.rstrip('/')}/chat/completions"
136
-
137
  logger.info("=" * 60)
138
  logger.info("🚀 INITIALIZING OPENROUTER LLM")
139
  logger.info("=" * 60)
@@ -141,12 +135,12 @@ class OpenRouterLLM:
141
  logger.info(f"🔑 API Key present: {'Yes' if api_key else 'No'}")
142
  logger.info(f"📏 API Key length: {len(api_key) if api_key else 0}")
143
  logger.info(f"🌐 API URL: {self.api_url}")
144
-
145
  if not api_key or not api_key.strip():
146
  logger.error("❌ OpenRouter API key is missing or empty")
147
  self.client_ready = False
148
  return
149
-
150
  # Test the connection with minimal tokens
151
  try:
152
  logger.info("🔍 Testing OpenRouter connection...")
@@ -160,12 +154,12 @@ class OpenRouterLLM:
160
  except Exception as e:
161
  logger.error(f"❌ OpenRouter connection test failed: {e}")
162
  self.client_ready = False
163
-
164
  logger.info("=" * 60)
165
 
166
  def _make_api_request(self, prompt: str, max_tokens: int = 2000, timeout: int = None) -> dict:
167
  """Make a direct HTTP request to OpenRouter API with configurable token limits."""
168
-
169
  # Calculate dynamic timeout based on max_tokens and prompt length
170
  if timeout is None:
171
  base_timeout = 120
@@ -173,19 +167,19 @@ class OpenRouterLLM:
173
  token_timeout = max(20, max_tokens // 100) # ~1 second per 100 tokens
174
  prompt_timeout = max(10, len(prompt) // 1000) # ~1 second per 2000 characters
175
  timeout = min(base_timeout + token_timeout + prompt_timeout, 600) # Cap at 5 minutes
176
-
177
  logger.info(f"🌐 Making API request to OpenRouter")
178
  logger.info(f"📏 Prompt length: {len(prompt)} characters")
179
  logger.info(f"🎯 Max tokens: {max_tokens}")
180
  logger.info(f"⏱️ Timeout: {timeout}s")
181
-
182
  headers = {
183
  "Authorization": f"Bearer {self.api_key}",
184
  "Content-Type": "application/json",
185
  "HTTP-Referer": "https://github.com/Ab-Romia/ContextIQ-RAG",
186
  "X-Title": "Context Aware AI"
187
  }
188
-
189
  # Optimize payload for longer responses
190
  payload = {
191
  "model": self.model,
@@ -198,15 +192,15 @@ class OpenRouterLLM:
198
  "presence_penalty": 0.1, # Slight penalty for repetition
199
  "frequency_penalty": 0.1, # Slight penalty for frequency
200
  }
201
-
202
  # Log the request payload (without sensitive data)
203
  safe_payload = payload.copy()
204
  safe_payload["messages"] = [{"role": "user", "content": f"[CONTENT: {len(prompt)} chars]"}]
205
  logger.info(f"📤 Request payload: {json.dumps(safe_payload, indent=2)}")
206
-
207
  try:
208
  start_time = time.time()
209
-
210
  with requests.Session() as session:
211
  response = session.post(
212
  self.api_url,
@@ -214,40 +208,39 @@ class OpenRouterLLM:
214
  json=payload,
215
  timeout=timeout
216
  )
217
-
218
  request_time = time.time() - start_time
219
-
220
  logger.info(f"⏱️ API request completed in {request_time:.2f}s")
221
  logger.info(f"📊 Response status: {response.status_code}")
222
-
223
  if response.status_code == 200:
224
  response_data = response.json()
225
  logger.info("✅ API request successful")
226
-
227
  # Log response details
228
  if "choices" in response_data and response_data["choices"]:
229
  content = response_data["choices"][0]["message"]["content"]
230
  logger.info(f"📝 Response content length: {len(content)} characters")
231
-
232
  # Check if response was truncated
233
  if "usage" in response_data:
234
  usage = response_data["usage"]
235
  completion_tokens = usage.get("completion_tokens", 0)
236
  logger.info(f"📊 Token usage: {usage}")
237
-
238
  if completion_tokens >= max_tokens * 0.95: # If we used 95% of max tokens
239
- logger.warning(
240
- f"⚠️ Response may be truncated (used {completion_tokens}/{max_tokens} tokens)")
241
-
242
  content_preview = content[:300] + "..." if len(content) > 300 else content
243
  logger.info(f"📄 Response preview: {content_preview}")
244
-
245
  return response_data
246
  else:
247
  logger.error(f"❌ API request failed with status {response.status_code}")
248
  logger.error(f"📄 Response text: {response.text}")
249
  return {"error": f"HTTP {response.status_code}: {response.text}"}
250
-
251
  except requests.exceptions.Timeout:
252
  logger.error(f"⏱️ API request timed out after {timeout}s")
253
  return {"error": f"Request timed out after {timeout}s. Try reducing the context length or max tokens."}
@@ -267,11 +260,11 @@ class OpenRouterLLM:
267
  logger.info(f"🎯 Requested max tokens: {max_tokens}")
268
  logger.info(f"🔧 Client status: {'Ready' if self.client_ready else 'Not ready'}")
269
  logger.info(f"🔑 API key status: {'Present' if self.api_key else 'Missing'}")
270
-
271
  # Dynamic prompt optimization based on max_tokens
272
  original_length = len(prompt)
273
  max_prompt_length = 12000 if max_tokens > 3000 else 8000 # Allow longer prompts for longer responses
274
-
275
  if len(prompt) > max_prompt_length:
276
  logger.warning(f"⚠️ Prompt is quite long ({original_length} chars), truncating for better performance")
277
  # Intelligent truncation that preserves structure
@@ -280,37 +273,36 @@ class OpenRouterLLM:
280
  if len(parts) == 2:
281
  context_part = parts[0]
282
  question_part = "Question:" + parts[1]
283
-
284
  # Keep the question and instructions, truncate context if needed
285
  available_for_context = max_prompt_length - len(question_part) - 500 # Reserve space
286
  if len(context_part) > available_for_context:
287
- context_part = context_part[
288
- :available_for_context] + "\n\n[... content truncated for performance ...]"
289
-
290
  prompt = context_part + question_part
291
  logger.info(f"📏 Prompt intelligently truncated from {original_length} to {len(prompt)} characters")
292
  else:
293
  prompt = prompt[:max_prompt_length] + "\n\n[... content truncated for performance ...]"
294
  logger.info(f"📏 Prompt truncated from {original_length} to {len(prompt)} characters")
295
-
296
  # Log prompt preview
297
  prompt_preview = prompt[:400] + "..." if len(prompt) > 400 else prompt
298
  logger.info(f"📝 PROMPT PREVIEW:")
299
  logger.info(f" {prompt_preview}")
300
  logger.info("-" * 60)
301
-
302
  # Check API key first
303
  if not self.api_key or not self.api_key.strip():
304
  error_msg = "❌ OpenRouter API key is not configured. Please set the OPENROUTER_API_KEY environment variable."
305
  logger.error(error_msg)
306
  return error_msg
307
-
308
  # Check client readiness
309
  if not self.client_ready:
310
  error_msg = "❌ OpenRouter client is not ready. Please check your API key and connection."
311
  logger.error(error_msg)
312
  return error_msg
313
-
314
  max_retries = 3
315
  retry_count = 0
316
  base_wait_time = 2
@@ -318,21 +310,21 @@ class OpenRouterLLM:
318
  while retry_count <= max_retries:
319
  try:
320
  logger.info(f"🔄 API call attempt {retry_count + 1}/{max_retries + 1}")
321
-
322
  # Adjust parameters based on retry attempt
323
  current_max_tokens = max_tokens
324
  timeout = None # Let _make_api_request calculate dynamic timeout
325
-
326
  if retry_count > 0:
327
  # Reduce max_tokens on retries for faster responses
328
  current_max_tokens = max(1000, max_tokens - (retry_count * 500))
329
  logger.info(f"🔧 Retry attempt - reducing max_tokens to {current_max_tokens}")
330
-
331
  response = self._make_api_request(prompt, max_tokens=current_max_tokens, timeout=timeout)
332
-
333
  if "error" in response:
334
  error_msg = response["error"]
335
-
336
  # Handle specific error types
337
  if "timeout" in error_msg.lower() or "408" in error_msg:
338
  logger.warning(f"⏱️ Timeout error on attempt {retry_count + 1}")
@@ -348,29 +340,28 @@ class OpenRouterLLM:
348
  elif "401" in error_msg or "403" in error_msg:
349
  logger.error(f"🔑 Authentication error: {error_msg}")
350
  return f"❌ Authentication error: {error_msg}"
351
-
352
  raise Exception(error_msg)
353
-
354
  if "choices" in response and len(response["choices"]) > 0:
355
  content = response["choices"][0]["message"]["content"]
356
  if content:
357
  logger.info(f"✅ Successfully generated response")
358
  logger.info(f"📏 Response length: {len(content)} characters")
359
-
360
  # Check if response seems complete
361
  if "usage" in response:
362
  usage = response["usage"]
363
  completion_tokens = usage.get("completion_tokens", 0)
364
  if completion_tokens >= current_max_tokens * 0.95:
365
- logger.warning(
366
- f"⚠️ Response may be incomplete (used {completion_tokens}/{current_max_tokens} tokens)")
367
  content += "\n\n[Note: Response may be truncated due to token limits. Consider asking for specific parts if needed.]"
368
-
369
  response_preview = content[:400] + "..." if len(content) > 400 else content
370
  logger.info(f"📤 RESPONSE PREVIEW:")
371
  logger.info(f" {response_preview}")
372
  logger.info("=" * 80)
373
-
374
  return content
375
  else:
376
  logger.error("❌ Received empty response from AI model")
@@ -385,253 +376,45 @@ class OpenRouterLLM:
385
  retry_count += 1
386
  continue
387
  return "❌ Invalid response format from AI model."
388
-
389
  except Exception as e:
390
  error_type = type(e).__name__
391
  error_msg = str(e)
392
-
393
  logger.error(f"❌ API call failed (attempt {retry_count + 1}): {error_type}: {error_msg}")
394
-
395
  retry_count += 1
396
  if retry_count > max_retries:
397
  final_error = f"❌ Error: Failed to get response from AI model after {max_retries + 1} attempts. Final error: {error_msg}"
398
  logger.error(final_error)
399
  logger.info("=" * 80)
400
  return final_error
401
-
402
  wait_time = base_wait_time * retry_count + (retry_count * 0.5)
403
  logger.info(f"⏳ Waiting {wait_time:.1f}s before retry...")
404
  time.sleep(wait_time)
405
 
406
-
407
- class OpenAILLM:
408
- """OpenAI LLM client for GPT models"""
409
-
410
- def __init__(self, api_key: str, base_url: str, model: str):
411
- self.api_key = api_key
412
- self.base_url = base_url
413
- self.model = model
414
- self.client = None
415
-
416
- logger.info("=" * 60)
417
- logger.info("🚀 INITIALIZING OPENAI LLM")
418
- logger.info("=" * 60)
419
- logger.info(f"🤖 Model: {model}")
420
- logger.info(f"🔑 API Key present: {'Yes' if api_key else 'No'}")
421
- logger.info(f"📏 API Key length: {len(api_key) if api_key else 0}")
422
- logger.info(f"🌐 API URL: {base_url}")
423
-
424
- if not api_key or not api_key.strip():
425
- logger.error("❌ OpenAI API key is missing or empty")
426
- self.client_ready = False
427
- return
428
-
429
- # Initialize OpenAI client
430
- try:
431
- self.client = OpenAI(api_key=api_key, base_url=base_url)
432
- logger.info("✅ OpenAI client initialized")
433
-
434
- # Test the connection with minimal tokens
435
- test_response = self.client.chat.completions.create(
436
- model=self.model,
437
- messages=[{"role": "user", "content": "Hello"}],
438
- max_tokens=5
439
- )
440
- if test_response:
441
- logger.info("✅ OpenAI connection test successful")
442
- self.client_ready = True
443
- else:
444
- logger.error("❌ OpenAI connection test failed")
445
- self.client_ready = False
446
- except Exception as e:
447
- logger.error(f"❌ OpenAI initialization failed: {e}")
448
- self.client_ready = False
449
-
450
- logger.info("=" * 60)
451
-
452
- def generate_content(self, prompt: str, max_tokens: int = 2000) -> str:
453
- """Generate content using OpenAI API"""
454
- logger.info("=" * 80)
455
- logger.info("🧠 OPENAI CONTENT GENERATION STARTED")
456
- logger.info("=" * 80)
457
- logger.info(f"📏 Input prompt length: {len(prompt)} characters")
458
- logger.info(f"🎯 Requested max tokens: {max_tokens}")
459
- logger.info(f"🔧 Client status: {'Ready' if self.client_ready else 'Not ready'}")
460
-
461
- # Check client readiness
462
- if not self.client_ready or not self.client:
463
- error_msg = "❌ OpenAI client is not ready. Please check your API key and connection."
464
- logger.error(error_msg)
465
- return error_msg
466
-
467
- # Optimize prompt length
468
- original_length = len(prompt)
469
- max_prompt_length = 12000 if max_tokens > 3000 else 8000
470
-
471
- if len(prompt) > max_prompt_length:
472
- logger.warning(f"⚠️ Prompt is quite long ({original_length} chars), truncating for better performance")
473
- if "Context:" in prompt and "Question:" in prompt:
474
- parts = prompt.split("Question:")
475
- if len(parts) == 2:
476
- context_part = parts[0]
477
- question_part = "Question:" + parts[1]
478
- available_for_context = max_prompt_length - len(question_part) - 500
479
- if len(context_part) > available_for_context:
480
- context_part = context_part[:available_for_context] + "\n\n[... content truncated ...]"
481
- prompt = context_part + question_part
482
- logger.info(f"📏 Prompt intelligently truncated from {original_length} to {len(prompt)} characters")
483
- else:
484
- prompt = prompt[:max_prompt_length] + "\n\n[... content truncated ...]"
485
- logger.info(f"📏 Prompt truncated from {original_length} to {len(prompt)} characters")
486
-
487
- max_retries = 3
488
- retry_count = 0
489
-
490
- while retry_count <= max_retries:
491
- try:
492
- logger.info(f"🔄 API call attempt {retry_count + 1}/{max_retries + 1}")
493
-
494
- current_max_tokens = max_tokens
495
- if retry_count > 0:
496
- current_max_tokens = max(1000, max_tokens - (retry_count * 500))
497
- logger.info(f"🔧 Retry attempt - reducing max_tokens to {current_max_tokens}")
498
-
499
- start_time = time.time()
500
-
501
- response = self.client.chat.completions.create(
502
- model=self.model,
503
- messages=[{"role": "user", "content": prompt}],
504
- max_tokens=current_max_tokens,
505
- temperature=0.7,
506
- top_p=0.9,
507
- )
508
-
509
- request_time = time.time() - start_time
510
- logger.info(f"⏱️ API request completed in {request_time:.2f}s")
511
-
512
- if response and response.choices:
513
- content = response.choices[0].message.content
514
- if content:
515
- logger.info(f"✅ Successfully generated response")
516
- logger.info(f"📏 Response length: {len(content)} characters")
517
-
518
- # Check token usage
519
- if hasattr(response, 'usage') and response.usage:
520
- logger.info(f"📊 Token usage: {response.usage}")
521
- if response.usage.completion_tokens >= current_max_tokens * 0.95:
522
- logger.warning(f"⚠️ Response may be truncated")
523
- content += "\n\n[Note: Response may be truncated due to token limits.]"
524
-
525
- response_preview = content[:400] + "..." if len(content) > 400 else content
526
- logger.info(f"📤 RESPONSE PREVIEW: {response_preview}")
527
- logger.info("=" * 80)
528
- return content
529
- else:
530
- logger.error("❌ Received empty response")
531
- if retry_count < max_retries:
532
- retry_count += 1
533
- continue
534
- return "❌ Received empty response from OpenAI."
535
- else:
536
- logger.error("❌ Invalid response format")
537
- if retry_count < max_retries:
538
- retry_count += 1
539
- continue
540
- return "❌ Invalid response format from OpenAI."
541
-
542
- except Exception as e:
543
- error_msg = str(e)
544
- logger.error(f"❌ API call failed (attempt {retry_count + 1}): {error_msg}")
545
-
546
- # Handle rate limits
547
- if "rate_limit" in error_msg.lower() or "429" in error_msg:
548
- wait_time = 2 ** retry_count
549
- logger.info(f"⏳ Rate limit hit, waiting {wait_time}s...")
550
- time.sleep(wait_time)
551
-
552
- retry_count += 1
553
- if retry_count > max_retries:
554
- final_error = f"❌ Error: Failed after {max_retries + 1} attempts. Final error: {error_msg}"
555
- logger.error(final_error)
556
- logger.info("=" * 80)
557
- return final_error
558
-
559
- time.sleep(2 * retry_count)
560
-
561
-
562
- def create_llm(api_key: str, provider: Optional[str] = None) -> 'OpenRouterLLM | OpenAILLM':
563
- """
564
- Factory function to create the appropriate LLM client based on API key or provider.
565
-
566
- Args:
567
- api_key: The API key to use
568
- provider: Optional provider name ('openrouter' or 'openai'). If not provided, will auto-detect.
569
-
570
- Returns:
571
- An instance of OpenRouterLLM or OpenAILLM
572
- """
573
- # Auto-detect provider if not specified
574
- if provider is None:
575
- provider = detect_provider_from_key(api_key)
576
- logger.info(f"🔍 Auto-detected provider: {provider}")
577
-
578
- provider = provider.lower()
579
-
580
- if provider == "openai":
581
- logger.info("🎯 Creating OpenAI LLM client")
582
- return OpenAILLM(
583
- api_key=api_key,
584
- base_url=settings.OPENAI_URL,
585
- model=settings.OPENAI_MODEL
586
- )
587
- elif provider == "openrouter":
588
- logger.info("🎯 Creating OpenRouter LLM client")
589
- return OpenRouterLLM(
590
- api_key=api_key,
591
- base_url=settings.OPENROUTER_URL,
592
- model=settings.OPENROUTER_MODEL
593
- )
594
- else:
595
- logger.warning(f"⚠️ Unknown provider '{provider}', defaulting to OpenRouter")
596
- return OpenRouterLLM(
597
- api_key=api_key,
598
- base_url=settings.OPENROUTER_URL,
599
- model=settings.OPENROUTER_MODEL
600
- )
601
-
602
-
603
- # Initialize the generation model with default settings
604
- logger.info("🚀 Creating default LLM instance...")
605
  try:
606
- # Use OpenRouter by default if API key is available
607
- if settings.OPENROUTER_API_KEY:
608
- generation_model = OpenRouterLLM(
609
- api_key=settings.OPENROUTER_API_KEY,
610
- base_url=settings.OPENROUTER_URL,
611
- model=settings.OPENROUTER_MODEL
612
- )
613
- elif settings.OPENAI_API_KEY:
614
- generation_model = OpenAILLM(
615
- api_key=settings.OPENAI_API_KEY,
616
- base_url=settings.OPENAI_URL,
617
- model=settings.OPENAI_MODEL
618
- )
619
- else:
620
- raise ValueError("No API key configured")
621
-
622
  if generation_model.client_ready:
623
- logger.info("✅ RAG setup completed successfully - LLM client is ready")
624
  else:
625
- logger.error("❌ RAG setup completed but LLM client is not ready")
626
-
627
  except Exception as e:
628
- logger.error(f"❌ Error creating LLM: {e}")
629
-
630
  # Create a dummy model for graceful degradation
631
  class DummyLLM:
632
- def generate_content(self, prompt: str, max_tokens: int = 2000) -> str:
633
  return f"❌ AI model is not available. Initialization error: {str(e)}"
634
-
635
  generation_model = DummyLLM()
636
  logger.warning("⚠️ Using dummy LLM due to initialization failure")
637
 
 
2
  import logging
3
  import requests
4
  import json
5
+ from config import settings # Fixed: removed 'app.' prefix
6
  import time
7
  import os
8
  import numpy as np
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
+ from typing import List
11
  import hashlib
12
  import re
 
13
 
14
  # Disable ChromaDB telemetry to avoid errors
15
  os.environ["ANONYMIZED_TELEMETRY"] = "False"
 
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
18
  logger = logging.getLogger("rag-setup")
19
 
 
20
  # Custom TF-IDF based embedding function
21
  class TFIDFEmbeddingFunction:
22
  def __init__(self, max_features=384):
 
28
  )
29
  self.is_fitted = False
30
  self.max_features = max_features
31
+
 
 
 
32
  def _preprocess_text(self, text: str) -> str:
33
  """Clean and preprocess text."""
34
  text = re.sub(r'\s+', ' ', text)
35
  text = re.sub(r'[^\w\s]', ' ', text)
36
  return text.strip().lower()
37
+
38
  def __call__(self, input: List[str]) -> List[List[float]]:
39
  """Generate TF-IDF based embeddings."""
40
  try:
41
  logger.info(f"🔢 Generating embeddings for {len(input)} texts")
42
  processed_texts = [self._preprocess_text(text) for text in input]
43
+
44
  if not self.is_fitted and processed_texts:
45
  # Fit the vectorizer on the input texts
46
  self.vectorizer.fit(processed_texts)
47
  self.is_fitted = True
48
  logger.info("✅ TF-IDF vectorizer fitted on input texts")
49
+
50
  if not self.is_fitted:
51
  # Return simple fallback if no data to fit on
52
  logger.warning("⚠️ Using fallback embeddings - vectorizer not fitted")
53
  return self._fallback_embeddings(input)
54
+
55
  # Transform texts to vectors
56
  tfidf_matrix = self.vectorizer.transform(processed_texts)
57
  embeddings = tfidf_matrix.toarray()
58
+
59
  # Ensure consistent dimensions
60
  result_embeddings = []
61
  for embedding in embeddings:
 
65
  result_embeddings.append(padded.tolist())
66
  else:
67
  result_embeddings.append(embedding[:self.max_features].tolist())
68
+
69
  logger.info(f"✅ Generated {len(result_embeddings)} embeddings of dimension {self.max_features}")
70
  return result_embeddings
71
+
72
  except Exception as e:
73
  logger.error(f"❌ Error generating TF-IDF embeddings: {e}")
74
  return self._fallback_embeddings(input)
75
+
76
  def _fallback_embeddings(self, input: List[str]) -> List[List[float]]:
77
  """Simple fallback embedding method."""
78
  logger.info(f"🔧 Using fallback embeddings for {len(input)} texts")
 
80
  for text in input:
81
  text_hash = hashlib.md5(text.encode()).hexdigest()
82
  embedding = []
83
+
84
  # Convert hash to numbers
85
  for i in range(0, min(len(text_hash), 32), 2):
86
+ hex_pair = text_hash[i:i+2]
87
  embedding.append(int(hex_pair, 16) / 255.0)
88
+
89
  # Add text features
90
  embedding.extend([
91
  len(text) / 1000.0,
92
  len(text.split()) / 100.0,
93
  text.count('.') / 10.0,
94
  ])
95
+
96
  # Pad to desired size
97
  while len(embedding) < self.max_features:
98
  embedding.extend(embedding[:min(len(embedding), self.max_features - len(embedding))])
99
+
100
  embeddings.append(embedding[:self.max_features])
101
+
102
  return embeddings
103
 
 
104
  # Simple ChromaDB setup - use in-memory storage for Hugging Face
105
  logger.info("🔧 Initializing ChromaDB with in-memory storage for Hugging Face compatibility")
106
 
 
108
  # Use in-memory client to avoid permission issues
109
  client = chromadb.Client()
110
  embedding_function = TFIDFEmbeddingFunction()
111
+
112
  collection = client.get_or_create_collection(
113
  name="context_aware_collection",
114
  embedding_function=embedding_function
115
  )
116
+
117
  logger.info("✅ ChromaDB collection initialized successfully with in-memory storage")
118
+
119
  except Exception as e:
120
  logger.error(f"❌ Error initializing ChromaDB: {e}")
121
  raise RuntimeError(f"Failed to initialize ChromaDB: {e}")
 
127
  self.base_url = base_url
128
  self.model = model
129
  self.api_url = f"{base_url.rstrip('/')}/chat/completions"
130
+
131
  logger.info("=" * 60)
132
  logger.info("🚀 INITIALIZING OPENROUTER LLM")
133
  logger.info("=" * 60)
 
135
  logger.info(f"🔑 API Key present: {'Yes' if api_key else 'No'}")
136
  logger.info(f"📏 API Key length: {len(api_key) if api_key else 0}")
137
  logger.info(f"🌐 API URL: {self.api_url}")
138
+
139
  if not api_key or not api_key.strip():
140
  logger.error("❌ OpenRouter API key is missing or empty")
141
  self.client_ready = False
142
  return
143
+
144
  # Test the connection with minimal tokens
145
  try:
146
  logger.info("🔍 Testing OpenRouter connection...")
 
154
  except Exception as e:
155
  logger.error(f"❌ OpenRouter connection test failed: {e}")
156
  self.client_ready = False
157
+
158
  logger.info("=" * 60)
159
 
160
  def _make_api_request(self, prompt: str, max_tokens: int = 2000, timeout: int = None) -> dict:
161
  """Make a direct HTTP request to OpenRouter API with configurable token limits."""
162
+
163
  # Calculate dynamic timeout based on max_tokens and prompt length
164
  if timeout is None:
165
  base_timeout = 120
 
167
  token_timeout = max(20, max_tokens // 100) # ~1 second per 100 tokens
168
  prompt_timeout = max(10, len(prompt) // 1000) # ~1 second per 2000 characters
169
  timeout = min(base_timeout + token_timeout + prompt_timeout, 600) # Cap at 5 minutes
170
+
171
  logger.info(f"🌐 Making API request to OpenRouter")
172
  logger.info(f"📏 Prompt length: {len(prompt)} characters")
173
  logger.info(f"🎯 Max tokens: {max_tokens}")
174
  logger.info(f"⏱️ Timeout: {timeout}s")
175
+
176
  headers = {
177
  "Authorization": f"Bearer {self.api_key}",
178
  "Content-Type": "application/json",
179
  "HTTP-Referer": "https://github.com/Ab-Romia/ContextIQ-RAG",
180
  "X-Title": "Context Aware AI"
181
  }
182
+
183
  # Optimize payload for longer responses
184
  payload = {
185
  "model": self.model,
 
192
  "presence_penalty": 0.1, # Slight penalty for repetition
193
  "frequency_penalty": 0.1, # Slight penalty for frequency
194
  }
195
+
196
  # Log the request payload (without sensitive data)
197
  safe_payload = payload.copy()
198
  safe_payload["messages"] = [{"role": "user", "content": f"[CONTENT: {len(prompt)} chars]"}]
199
  logger.info(f"📤 Request payload: {json.dumps(safe_payload, indent=2)}")
200
+
201
  try:
202
  start_time = time.time()
203
+
204
  with requests.Session() as session:
205
  response = session.post(
206
  self.api_url,
 
208
  json=payload,
209
  timeout=timeout
210
  )
211
+
212
  request_time = time.time() - start_time
213
+
214
  logger.info(f"⏱️ API request completed in {request_time:.2f}s")
215
  logger.info(f"📊 Response status: {response.status_code}")
216
+
217
  if response.status_code == 200:
218
  response_data = response.json()
219
  logger.info("✅ API request successful")
220
+
221
  # Log response details
222
  if "choices" in response_data and response_data["choices"]:
223
  content = response_data["choices"][0]["message"]["content"]
224
  logger.info(f"📝 Response content length: {len(content)} characters")
225
+
226
  # Check if response was truncated
227
  if "usage" in response_data:
228
  usage = response_data["usage"]
229
  completion_tokens = usage.get("completion_tokens", 0)
230
  logger.info(f"📊 Token usage: {usage}")
231
+
232
  if completion_tokens >= max_tokens * 0.95: # If we used 95% of max tokens
233
+ logger.warning(f"⚠️ Response may be truncated (used {completion_tokens}/{max_tokens} tokens)")
234
+
 
235
  content_preview = content[:300] + "..." if len(content) > 300 else content
236
  logger.info(f"📄 Response preview: {content_preview}")
237
+
238
  return response_data
239
  else:
240
  logger.error(f"❌ API request failed with status {response.status_code}")
241
  logger.error(f"📄 Response text: {response.text}")
242
  return {"error": f"HTTP {response.status_code}: {response.text}"}
243
+
244
  except requests.exceptions.Timeout:
245
  logger.error(f"⏱️ API request timed out after {timeout}s")
246
  return {"error": f"Request timed out after {timeout}s. Try reducing the context length or max tokens."}
 
260
  logger.info(f"🎯 Requested max tokens: {max_tokens}")
261
  logger.info(f"🔧 Client status: {'Ready' if self.client_ready else 'Not ready'}")
262
  logger.info(f"🔑 API key status: {'Present' if self.api_key else 'Missing'}")
263
+
264
  # Dynamic prompt optimization based on max_tokens
265
  original_length = len(prompt)
266
  max_prompt_length = 12000 if max_tokens > 3000 else 8000 # Allow longer prompts for longer responses
267
+
268
  if len(prompt) > max_prompt_length:
269
  logger.warning(f"⚠️ Prompt is quite long ({original_length} chars), truncating for better performance")
270
  # Intelligent truncation that preserves structure
 
273
  if len(parts) == 2:
274
  context_part = parts[0]
275
  question_part = "Question:" + parts[1]
276
+
277
  # Keep the question and instructions, truncate context if needed
278
  available_for_context = max_prompt_length - len(question_part) - 500 # Reserve space
279
  if len(context_part) > available_for_context:
280
+ context_part = context_part[:available_for_context] + "\n\n[... content truncated for performance ...]"
281
+
 
282
  prompt = context_part + question_part
283
  logger.info(f"📏 Prompt intelligently truncated from {original_length} to {len(prompt)} characters")
284
  else:
285
  prompt = prompt[:max_prompt_length] + "\n\n[... content truncated for performance ...]"
286
  logger.info(f"📏 Prompt truncated from {original_length} to {len(prompt)} characters")
287
+
288
  # Log prompt preview
289
  prompt_preview = prompt[:400] + "..." if len(prompt) > 400 else prompt
290
  logger.info(f"📝 PROMPT PREVIEW:")
291
  logger.info(f" {prompt_preview}")
292
  logger.info("-" * 60)
293
+
294
  # Check API key first
295
  if not self.api_key or not self.api_key.strip():
296
  error_msg = "❌ OpenRouter API key is not configured. Please set the OPENROUTER_API_KEY environment variable."
297
  logger.error(error_msg)
298
  return error_msg
299
+
300
  # Check client readiness
301
  if not self.client_ready:
302
  error_msg = "❌ OpenRouter client is not ready. Please check your API key and connection."
303
  logger.error(error_msg)
304
  return error_msg
305
+
306
  max_retries = 3
307
  retry_count = 0
308
  base_wait_time = 2
 
310
  while retry_count <= max_retries:
311
  try:
312
  logger.info(f"🔄 API call attempt {retry_count + 1}/{max_retries + 1}")
313
+
314
  # Adjust parameters based on retry attempt
315
  current_max_tokens = max_tokens
316
  timeout = None # Let _make_api_request calculate dynamic timeout
317
+
318
  if retry_count > 0:
319
  # Reduce max_tokens on retries for faster responses
320
  current_max_tokens = max(1000, max_tokens - (retry_count * 500))
321
  logger.info(f"🔧 Retry attempt - reducing max_tokens to {current_max_tokens}")
322
+
323
  response = self._make_api_request(prompt, max_tokens=current_max_tokens, timeout=timeout)
324
+
325
  if "error" in response:
326
  error_msg = response["error"]
327
+
328
  # Handle specific error types
329
  if "timeout" in error_msg.lower() or "408" in error_msg:
330
  logger.warning(f"⏱️ Timeout error on attempt {retry_count + 1}")
 
340
  elif "401" in error_msg or "403" in error_msg:
341
  logger.error(f"🔑 Authentication error: {error_msg}")
342
  return f"❌ Authentication error: {error_msg}"
343
+
344
  raise Exception(error_msg)
345
+
346
  if "choices" in response and len(response["choices"]) > 0:
347
  content = response["choices"][0]["message"]["content"]
348
  if content:
349
  logger.info(f"✅ Successfully generated response")
350
  logger.info(f"📏 Response length: {len(content)} characters")
351
+
352
  # Check if response seems complete
353
  if "usage" in response:
354
  usage = response["usage"]
355
  completion_tokens = usage.get("completion_tokens", 0)
356
  if completion_tokens >= current_max_tokens * 0.95:
357
+ logger.warning(f"⚠️ Response may be incomplete (used {completion_tokens}/{current_max_tokens} tokens)")
 
358
  content += "\n\n[Note: Response may be truncated due to token limits. Consider asking for specific parts if needed.]"
359
+
360
  response_preview = content[:400] + "..." if len(content) > 400 else content
361
  logger.info(f"📤 RESPONSE PREVIEW:")
362
  logger.info(f" {response_preview}")
363
  logger.info("=" * 80)
364
+
365
  return content
366
  else:
367
  logger.error("❌ Received empty response from AI model")
 
376
  retry_count += 1
377
  continue
378
  return "❌ Invalid response format from AI model."
379
+
380
  except Exception as e:
381
  error_type = type(e).__name__
382
  error_msg = str(e)
383
+
384
  logger.error(f"❌ API call failed (attempt {retry_count + 1}): {error_type}: {error_msg}")
385
+
386
  retry_count += 1
387
  if retry_count > max_retries:
388
  final_error = f"❌ Error: Failed to get response from AI model after {max_retries + 1} attempts. Final error: {error_msg}"
389
  logger.error(final_error)
390
  logger.info("=" * 80)
391
  return final_error
392
+
393
  wait_time = base_wait_time * retry_count + (retry_count * 0.5)
394
  logger.info(f"⏳ Waiting {wait_time:.1f}s before retry...")
395
  time.sleep(wait_time)
396
 
397
+ # Initialize the generation model
398
+ logger.info("🚀 Creating OpenRouter LLM instance...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  try:
400
+ generation_model = OpenRouterLLM(
401
+ api_key=settings.OPENROUTER_API_KEY,
402
+ base_url=settings.OPENROUTER_URL,
403
+ model=settings.MODEL_NAME
404
+ )
405
+
 
 
 
 
 
 
 
 
 
 
406
  if generation_model.client_ready:
407
+ logger.info("✅ RAG setup completed successfully - OpenRouter client is ready")
408
  else:
409
+ logger.error("❌ RAG setup completed but OpenRouter client is not ready")
410
+
411
  except Exception as e:
412
+ logger.error(f"❌ Error creating OpenRouter LLM: {e}")
 
413
  # Create a dummy model for graceful degradation
414
  class DummyLLM:
415
+ def generate_content(self, prompt: str) -> str:
416
  return f"❌ AI model is not available. Initialization error: {str(e)}"
417
+
418
  generation_model = DummyLLM()
419
  logger.warning("⚠️ Using dummy LLM due to initialization failure")
420
 
app/schemas.py CHANGED
@@ -1,12 +1,5 @@
1
  from pydantic import BaseModel, Field
2
- from typing import Optional, List
3
-
4
-
5
- class ConversationMessage(BaseModel):
6
- """Schema for a single message in the conversation history"""
7
- role: str = Field(..., description="Role of the message sender ('user' or 'assistant')")
8
- content: str = Field(..., description="Content of the message")
9
-
10
 
11
  class DocumentRequest(BaseModel):
12
  """
@@ -18,7 +11,6 @@ class DocumentRequest(BaseModel):
18
  description="The full document or text to be indexed."
19
  )
20
 
21
-
22
  class ChatRequest(BaseModel):
23
  """
24
  Schema for the request to generate a response.
@@ -28,10 +20,6 @@ class ChatRequest(BaseModel):
28
  min_length=2,
29
  description="The user's question to be answered based on the indexed context."
30
  )
31
- conversation_history: Optional[List[ConversationMessage]] = Field(
32
- default=None,
33
- description="Optional conversation history for context-aware responses"
34
- )
35
 
36
  class TaskRequest(BaseModel):
37
  """
@@ -48,11 +36,7 @@ class ApiKeyRequest(BaseModel):
48
  api_key: str = Field(
49
  ...,
50
  min_length=10,
51
- description="The API key to test (OpenRouter or OpenAI)."
52
- )
53
- provider: Optional[str] = Field(
54
- None,
55
- description="The provider for the API key ('openrouter' or 'openai'). Auto-detected if not provided."
56
  )
57
 
58
  class ChatResponse(BaseModel):
 
1
  from pydantic import BaseModel, Field
2
+ from typing import Optional
 
 
 
 
 
 
 
3
 
4
  class DocumentRequest(BaseModel):
5
  """
 
11
  description="The full document or text to be indexed."
12
  )
13
 
 
14
  class ChatRequest(BaseModel):
15
  """
16
  Schema for the request to generate a response.
 
20
  min_length=2,
21
  description="The user's question to be answered based on the indexed context."
22
  )
 
 
 
 
23
 
24
  class TaskRequest(BaseModel):
25
  """
 
36
  api_key: str = Field(
37
  ...,
38
  min_length=10,
39
+ description="The OpenRouter API key to test."
 
 
 
 
40
  )
41
 
42
  class ChatResponse(BaseModel):
app/services.py CHANGED
@@ -6,13 +6,12 @@ import time
6
  import rag_setup
7
  from schemas import ChatRequest, DocumentRequest, TaskRequest
8
  from typing import Optional, Tuple
9
- from config import settings, detect_provider_from_key # Fixed: removed 'app.' prefix
10
  from fastapi import UploadFile, HTTPException
11
  import json
12
  import xml.etree.ElementTree as ET
13
  from striprtf.striprtf import rtf_to_text
14
  import markdown
15
-
16
  try:
17
  import fitz # PyMuPDF
18
  except ImportError:
@@ -22,31 +21,28 @@ except ImportError:
22
  try:
23
  import docx # python-docx for Word documents
24
  except ImportError:
25
- logging.error(
26
- "python-docx is not installed. Word document processing will not work. Please run 'pip install python-docx'")
27
  docx = None
28
 
29
  try:
30
  from pptx import Presentation # python-pptx for PowerPoint
31
  except ImportError:
32
- logging.error(
33
- "python-pptx is not installed. PowerPoint processing will not work. Please run 'pip install python-pptx'")
34
  Presentation = None
35
 
36
  try:
37
  import pandas as pd # For Excel and CSV files
38
  except ImportError:
39
- logging.error(
40
- "pandas is not installed. Excel/CSV processing will not work. Please run 'pip install pandas openpyxl'")
41
  pd = None
42
 
43
  try:
44
  from bs4 import BeautifulSoup # For HTML parsing
45
  except ImportError:
46
- logging.error(
47
- "BeautifulSoup is not installed. HTML processing will not work. Please run 'pip install beautifulsoup4'")
48
  BeautifulSoup = None
49
 
 
50
  logging.basicConfig(
51
  level=logging.INFO,
52
  format='%(asctime)s [%(levelname)s] %(message)s',
@@ -59,16 +55,18 @@ logger = logging.getLogger("rag-service")
59
  _response_cache = {}
60
  CACHE_EXPIRATION_SECONDS = 600 # 10 minutes
61
 
 
 
 
 
 
 
 
62
 
63
- def create_llm_instance(api_key: str, provider: Optional[str] = None):
64
- """Create a new LLM instance with the provided API key (OpenRouter or OpenAI)."""
65
- return rag_setup.create_llm(api_key=api_key, provider=provider)
66
-
67
-
68
- async def test_api_key(api_key: str, provider: Optional[str] = None) -> dict:
69
- """Test if the provided API key is valid (OpenRouter or OpenAI)."""
70
  logger.info(f"🔍 Testing API key: {api_key[:10]}...")
71
-
72
  try:
73
  # Validate API key format first
74
  if not api_key or not api_key.strip():
@@ -78,37 +76,15 @@ async def test_api_key(api_key: str, provider: Optional[str] = None) -> dict:
78
  "message": "API key cannot be empty",
79
  "model_info": None
80
  }
81
-
82
- # Auto-detect provider if not specified
83
- if provider is None:
84
- provider = detect_provider_from_key(api_key)
85
- logger.info(f"🔍 Auto-detected provider: {provider}")
86
-
87
- # Validate based on provider
88
- if provider == "openrouter":
89
- if not api_key.startswith('sk-or-'):
90
- logger.error("❌ API key has incorrect format for OpenRouter")
91
- return {
92
- "valid": False,
93
- "message": "OpenRouter API keys should start with 'sk-or-'",
94
- "model_info": None
95
- }
96
- elif provider == "openai":
97
- if not api_key.startswith('sk-'):
98
- logger.error("❌ API key has incorrect format for OpenAI")
99
- return {
100
- "valid": False,
101
- "message": "OpenAI API keys should start with 'sk-'",
102
- "model_info": None
103
- }
104
- else:
105
- logger.error("❌ Unknown provider")
106
  return {
107
  "valid": False,
108
- "message": f"Unknown provider: {provider}. Please use OpenRouter or OpenAI.",
109
  "model_info": None
110
  }
111
-
112
  if len(api_key) < 40:
113
  logger.error("❌ API key is too short")
114
  return {
@@ -116,51 +92,18 @@ async def test_api_key(api_key: str, provider: Optional[str] = None) -> dict:
116
  "message": "API key appears to be too short",
117
  "model_info": None
118
  }
119
-
120
  # Create a temporary LLM instance
121
- test_llm = create_llm_instance(api_key, provider)
122
-
123
  # Test with a minimal prompt to avoid quota usage
124
- if provider == "openai":
125
- # OpenAI uses the SDK, so we test differently
126
- try:
127
- test_content = test_llm.generate_content("Hi", max_tokens=5)
128
- if test_content and not test_content.startswith("❌"):
129
- logger.info("✅ OpenAI API key test successful")
130
- return {
131
- "valid": True,
132
- "message": "OpenAI API key is valid and working!",
133
- "model_info": {"model": settings.OPENAI_MODEL, "provider": "openai"}
134
- }
135
- else:
136
- return {
137
- "valid": False,
138
- "message": test_content or "API key test failed",
139
- "model_info": None
140
- }
141
- except Exception as e:
142
- error_msg = str(e)
143
- if "401" in error_msg or "Incorrect API key" in error_msg:
144
- return {
145
- "valid": False,
146
- "message": "Invalid OpenAI API key: Authentication failed",
147
- "model_info": None
148
- }
149
- else:
150
- return {
151
- "valid": False,
152
- "message": f"OpenAI API key test failed: {error_msg}",
153
- "model_info": None
154
- }
155
-
156
- # For OpenRouter, use the existing logic
157
  test_response = test_llm._make_api_request("Hi", max_tokens=1)
158
-
159
  # Check for explicit errors first
160
  if "error" in test_response:
161
  error_msg = test_response["error"]
162
  logger.error(f"❌ API key test failed: {error_msg}")
163
-
164
  # Parse specific error types
165
  if "401" in str(error_msg) or "403" in str(error_msg) or "Unauthorized" in str(error_msg):
166
  return {
@@ -188,7 +131,7 @@ async def test_api_key(api_key: str, provider: Optional[str] = None) -> dict:
188
  "message": f"API key test failed: {error_msg}",
189
  "model_info": None
190
  }
191
-
192
  # Check for successful response with proper structure
193
  if "choices" in test_response and test_response["choices"]:
194
  choice = test_response["choices"][0]
@@ -204,7 +147,7 @@ async def test_api_key(api_key: str, provider: Optional[str] = None) -> dict:
204
  "message": "API key is valid and working!",
205
  "model_info": model_info
206
  }
207
-
208
  # If we get here, the response format is unexpected
209
  logger.error(f"❌ API key test failed: Unexpected response format - {test_response}")
210
  return {
@@ -212,11 +155,11 @@ async def test_api_key(api_key: str, provider: Optional[str] = None) -> dict:
212
  "message": "API key test failed: Unexpected response format from OpenRouter",
213
  "model_info": None
214
  }
215
-
216
  except Exception as e:
217
  logger.error(f"❌ API key test failed with exception: {str(e)}")
218
  error_msg = str(e)
219
-
220
  # Parse common error patterns
221
  if "401" in error_msg or "403" in error_msg or "Unauthorized" in error_msg:
222
  return {
@@ -243,7 +186,6 @@ async def test_api_key(api_key: str, provider: Optional[str] = None) -> dict:
243
  "model_info": None
244
  }
245
 
246
-
247
  async def process_and_index_file(file: UploadFile) -> Tuple[int, str]:
248
  """
249
  Processes an uploaded file, extracts text, calls the indexing function,
@@ -251,7 +193,7 @@ async def process_and_index_file(file: UploadFile) -> Tuple[int, str]:
251
  Supports: .txt, .pdf, .docx, .pptx, .xlsx, .csv, .json, .xml, .html, .md, .rtf
252
  """
253
  logger.info(f"📄 Processing file '{file.filename}' with content type '{file.content_type}'")
254
-
255
  # Read file content
256
  file_content = await file.read()
257
  text = ""
@@ -260,43 +202,42 @@ async def process_and_index_file(file: UploadFile) -> Tuple[int, str]:
260
  try:
261
  if file_extension == "txt":
262
  text = await _process_txt_file(file_content)
263
-
264
  elif file_extension == "pdf":
265
  text = await _process_pdf_file(file_content)
266
-
267
  elif file_extension == "docx":
268
  text = await _process_docx_file(file_content)
269
-
270
  elif file_extension in ["ppt", "pptx"]:
271
  text = await _process_pptx_file(file_content)
272
-
273
  elif file_extension in ["xls", "xlsx"]:
274
  text = await _process_excel_file(file_content, file.filename)
275
-
276
  elif file_extension == "csv":
277
  text = await _process_csv_file(file_content)
278
-
279
  elif file_extension == "json":
280
  text = await _process_json_file(file_content)
281
-
282
  elif file_extension == "xml":
283
  text = await _process_xml_file(file_content)
284
-
285
  elif file_extension in ["html", "htm"]:
286
  text = await _process_html_file(file_content)
287
-
288
  elif file_extension in ["md", "markdown"]:
289
  text = await _process_markdown_file(file_content)
290
-
291
  elif file_extension == "rtf":
292
  text = await _process_rtf_file(file_content)
293
-
294
  else:
295
- supported_extensions = ['.txt', '.pdf', '.docx', '.pptx', '.xlsx', '.csv', '.json', '.xml', '.html', '.md',
296
- '.rtf']
297
  logger.error(f"❌ Unsupported file type: {file.filename}")
298
  raise HTTPException(
299
- status_code=400,
300
  detail=f"Unsupported file type. Please upload one of: {', '.join(supported_extensions)}"
301
  )
302
 
@@ -309,24 +250,23 @@ async def process_and_index_file(file: UploadFile) -> Tuple[int, str]:
309
  # Validate extracted text
310
  if not text or not text.strip():
311
  logger.error("❌ Extracted text is empty or whitespace only")
312
- raise HTTPException(status_code=400,
313
- detail="Extracted text is empty. The file might be empty, corrupted, or unreadable.")
314
-
315
  # Clean up the text
316
  text = text.strip()
317
-
318
  # Log processing stats
319
  word_count = len(text.split())
320
  logger.info(f"📊 Text processing complete: {len(text)} characters, {word_count} words")
321
-
322
  # Index the extracted text using existing logic
323
  try:
324
  doc_request = DocumentRequest(context=text)
325
  docs_added = index_document(doc_request)
326
  logger.info(f"✅ Successfully indexed {docs_added} document chunks from file")
327
-
328
  return docs_added, text
329
-
330
  except Exception as e:
331
  logger.error(f"❌ Failed to index extracted text: {e}")
332
  raise HTTPException(status_code=500, detail=f"Failed to index extracted text: {str(e)}")
@@ -350,14 +290,13 @@ async def _process_txt_file(file_content: bytes) -> str:
350
  continue
351
  else:
352
  raise UnicodeDecodeError("Unable to decode file with any common encoding")
353
-
354
  logger.info(f"✅ Extracted {len(text)} characters from .txt file")
355
  return text
356
-
357
  except UnicodeDecodeError as e:
358
  logger.error(f"❌ Could not decode .txt file: {e}")
359
- raise HTTPException(status_code=400,
360
- detail="Could not decode .txt file. Please ensure it uses UTF-8, Latin-1, or CP1252 encoding.")
361
 
362
 
363
  async def _process_pdf_file(file_content: bytes) -> str:
@@ -365,34 +304,34 @@ async def _process_pdf_file(file_content: bytes) -> str:
365
  if fitz is None:
366
  logger.error("❌ PyMuPDF not available for PDF processing")
367
  raise HTTPException(status_code=501, detail="PDF processing is not available. PyMuPDF is not installed.")
368
-
369
  logger.info("📖 Opening PDF document...")
370
  doc = fitz.open(stream=file_content, filetype="pdf")
371
-
372
  try:
373
  text_parts = []
374
  page_count = len(doc)
375
  logger.info(f"📑 PDF has {page_count} pages")
376
-
377
  for page_num in range(page_count):
378
  try:
379
  page = doc[page_num]
380
  page_text = page.get_text()
381
-
382
  if page_text and page_text.strip():
383
  text_parts.append(f"--- Page {page_num + 1} ---\n{page_text.strip()}")
384
  logger.info(f"📄 Extracted text from page {page_num + 1}: {len(page_text)} characters")
385
  else:
386
  logger.info(f"📄 Page {page_num + 1} is empty or contains no extractable text")
387
-
388
  except Exception as page_error:
389
  logger.warning(f"⚠️ Could not extract text from page {page_num + 1}: {page_error}")
390
  continue
391
-
392
  text = "\n\n".join(text_parts)
393
  logger.info(f"✅ Extracted text from {len(text_parts)} pages of the PDF file ({len(text)} characters)")
394
  return text
395
-
396
  finally:
397
  doc.close()
398
  logger.info("📕 PDF document closed successfully")
@@ -401,17 +340,16 @@ async def _process_pdf_file(file_content: bytes) -> str:
401
  async def _process_docx_file(file_content: bytes) -> str:
402
  """Process .docx files using python-docx."""
403
  if docx is None:
404
- raise HTTPException(status_code=501,
405
- detail="Word document processing is not available. python-docx is not installed.")
406
-
407
  from io import BytesIO
408
  doc = docx.Document(BytesIO(file_content))
409
-
410
  text_parts = []
411
  for paragraph in doc.paragraphs:
412
  if paragraph.text.strip():
413
  text_parts.append(paragraph.text.strip())
414
-
415
  # Also extract text from tables
416
  for table in doc.tables:
417
  for row in table.rows:
@@ -421,7 +359,7 @@ async def _process_docx_file(file_content: bytes) -> str:
421
  row_text.append(cell.text.strip())
422
  if row_text:
423
  text_parts.append(" | ".join(row_text))
424
-
425
  text = "\n\n".join(text_parts)
426
  logger.info(f"✅ Extracted {len(text)} characters from Word document")
427
  return text
@@ -430,23 +368,22 @@ async def _process_docx_file(file_content: bytes) -> str:
430
  async def _process_pptx_file(file_content: bytes) -> str:
431
  """Process .pptx files using python-pptx."""
432
  if Presentation is None:
433
- raise HTTPException(status_code=501,
434
- detail="PowerPoint processing is not available. python-pptx is not installed.")
435
-
436
  from io import BytesIO
437
  prs = Presentation(BytesIO(file_content))
438
-
439
  text_parts = []
440
  for slide_num, slide in enumerate(prs.slides, 1):
441
  slide_text = [f"--- Slide {slide_num} ---"]
442
-
443
  for shape in slide.shapes:
444
  if hasattr(shape, "text") and shape.text.strip():
445
  slide_text.append(shape.text.strip())
446
-
447
  if len(slide_text) > 1: # More than just the slide header
448
  text_parts.append("\n".join(slide_text))
449
-
450
  text = "\n\n".join(text_parts)
451
  logger.info(f"✅ Extracted text from {len(prs.slides)} PowerPoint slides ({len(text)} characters)")
452
  return text
@@ -456,38 +393,37 @@ async def _process_excel_file(file_content: bytes, filename: str) -> str:
456
  """Process .xlsx/.xls files using pandas."""
457
  if pd is None:
458
  raise HTTPException(status_code=501, detail="Excel processing is not available. pandas is not installed.")
459
-
460
  from io import BytesIO
461
-
462
  try:
463
  # Read all sheets
464
  excel_file = pd.ExcelFile(BytesIO(file_content))
465
  text_parts = [f"Excel File: {filename}"]
466
-
467
  for sheet_name in excel_file.sheet_names:
468
  df = pd.read_excel(excel_file, sheet_name=sheet_name)
469
-
470
  if not df.empty:
471
  text_parts.append(f"\n--- Sheet: {sheet_name} ---")
472
-
473
  # Convert DataFrame to readable text
474
  # Include column headers
475
  text_parts.append("Columns: " + " | ".join(str(col) for col in df.columns))
476
-
477
  # Add row data (limit to first 100 rows to avoid huge files)
478
  for idx, row in df.head(100).iterrows():
479
  row_text = " | ".join(str(val) for val in row.values if pd.notna(val))
480
  if row_text.strip():
481
  text_parts.append(row_text)
482
-
483
  if len(df) > 100:
484
  text_parts.append(f"... and {len(df) - 100} more rows")
485
-
486
  text = "\n".join(text_parts)
487
- logger.info(
488
- f"✅ Extracted data from Excel file with {len(excel_file.sheet_names)} sheets ({len(text)} characters)")
489
  return text
490
-
491
  except Exception as e:
492
  raise HTTPException(status_code=400, detail=f"Could not process Excel file: {str(e)}")
493
 
@@ -496,9 +432,9 @@ async def _process_csv_file(file_content: bytes) -> str:
496
  """Process .csv files using pandas."""
497
  if pd is None:
498
  raise HTTPException(status_code=501, detail="CSV processing is not available. pandas is not installed.")
499
-
500
  from io import StringIO
501
-
502
  try:
503
  # Try different encodings for CSV
504
  for encoding in ['utf-8', 'latin-1', 'cp1252']:
@@ -510,26 +446,26 @@ async def _process_csv_file(file_content: bytes) -> str:
510
  continue
511
  else:
512
  raise ValueError("Could not decode CSV file with any common encoding")
513
-
514
  if df.empty:
515
  raise ValueError("CSV file is empty")
516
-
517
  text_parts = ["CSV Data:"]
518
  text_parts.append("Columns: " + " | ".join(str(col) for col in df.columns))
519
-
520
  # Add row data (limit to first 200 rows)
521
  for idx, row in df.head(200).iterrows():
522
  row_text = " | ".join(str(val) for val in row.values if pd.notna(val))
523
  if row_text.strip():
524
  text_parts.append(row_text)
525
-
526
  if len(df) > 200:
527
  text_parts.append(f"... and {len(df) - 200} more rows")
528
-
529
  text = "\n".join(text_parts)
530
  logger.info(f"✅ Extracted data from CSV file with {len(df)} rows ({len(text)} characters)")
531
  return text
532
-
533
  except Exception as e:
534
  raise HTTPException(status_code=400, detail=f"Could not process CSV file: {str(e)}")
535
 
@@ -539,12 +475,12 @@ async def _process_json_file(file_content: bytes) -> str:
539
  try:
540
  json_text = file_content.decode('utf-8')
541
  data = json.loads(json_text)
542
-
543
  # Convert JSON to readable text format
544
  def json_to_text(obj, indent=0):
545
  lines = []
546
  prefix = " " * indent
547
-
548
  if isinstance(obj, dict):
549
  for key, value in obj.items():
550
  if isinstance(value, (dict, list)):
@@ -561,15 +497,15 @@ async def _process_json_file(file_content: bytes) -> str:
561
  lines.append(f"{prefix}[{i}]: {item}")
562
  else:
563
  lines.append(f"{prefix}{obj}")
564
-
565
  return lines
566
-
567
  text_lines = ["JSON Data:"] + json_to_text(data)
568
  text = "\n".join(text_lines)
569
-
570
  logger.info(f"✅ Extracted data from JSON file ({len(text)} characters)")
571
  return text
572
-
573
  except json.JSONDecodeError as e:
574
  raise HTTPException(status_code=400, detail=f"Invalid JSON file: {str(e)}")
575
  except Exception as e:
@@ -581,34 +517,34 @@ async def _process_xml_file(file_content: bytes) -> str:
581
  try:
582
  xml_text = file_content.decode('utf-8')
583
  root = ET.fromstring(xml_text)
584
-
585
  def xml_to_text(element, indent=0):
586
  lines = []
587
  prefix = " " * indent
588
-
589
  # Add element name and attributes
590
  if element.attrib:
591
  attrs = " ".join(f'{k}="{v}"' for k, v in element.attrib.items())
592
  lines.append(f"{prefix}{element.tag} ({attrs}):")
593
  else:
594
  lines.append(f"{prefix}{element.tag}:")
595
-
596
  # Add text content
597
  if element.text and element.text.strip():
598
  lines.append(f"{prefix} {element.text.strip()}")
599
-
600
  # Add child elements
601
  for child in element:
602
  lines.extend(xml_to_text(child, indent + 1))
603
-
604
  return lines
605
-
606
  text_lines = ["XML Data:"] + xml_to_text(root)
607
  text = "\n".join(text_lines)
608
-
609
  logger.info(f"✅ Extracted data from XML file ({len(text)} characters)")
610
  return text
611
-
612
  except ET.ParseError as e:
613
  raise HTTPException(status_code=400, detail=f"Invalid XML file: {str(e)}")
614
  except Exception as e:
@@ -619,26 +555,26 @@ async def _process_html_file(file_content: bytes) -> str:
619
  """Process .html files using BeautifulSoup."""
620
  if BeautifulSoup is None:
621
  raise HTTPException(status_code=501, detail="HTML processing is not available. BeautifulSoup is not installed.")
622
-
623
  try:
624
  html_text = file_content.decode('utf-8')
625
  soup = BeautifulSoup(html_text, 'html.parser')
626
-
627
  # Remove script and style elements
628
  for script in soup(["script", "style"]):
629
  script.decompose()
630
-
631
  # Get text content
632
  text = soup.get_text()
633
-
634
  # Clean up whitespace
635
  lines = (line.strip() for line in text.splitlines())
636
  chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
637
  text = '\n'.join(chunk for chunk in chunks if chunk)
638
-
639
  logger.info(f"✅ Extracted text from HTML file ({len(text)} characters)")
640
  return text
641
-
642
  except Exception as e:
643
  raise HTTPException(status_code=400, detail=f"Could not process HTML file: {str(e)}")
644
 
@@ -647,7 +583,7 @@ async def _process_markdown_file(file_content: bytes) -> str:
647
  """Process .md files."""
648
  try:
649
  md_text = file_content.decode('utf-8')
650
-
651
  # Convert markdown to HTML then to plain text for better readability
652
  html = markdown.markdown(md_text)
653
  if BeautifulSoup:
@@ -656,10 +592,10 @@ async def _process_markdown_file(file_content: bytes) -> str:
656
  else:
657
  # Fallback: use raw markdown
658
  text = md_text
659
-
660
  logger.info(f"✅ Extracted text from Markdown file ({len(text)} characters)")
661
  return text
662
-
663
  except Exception as e:
664
  raise HTTPException(status_code=400, detail=f"Could not process Markdown file: {str(e)}")
665
 
@@ -669,66 +605,18 @@ async def _process_rtf_file(file_content: bytes) -> str:
669
  try:
670
  rtf_text = file_content.decode('utf-8')
671
  text = rtf_to_text(rtf_text)
672
-
673
  logger.info(f"✅ Extracted text from RTF file ({len(text)} characters)")
674
  return text
675
-
676
  except Exception as e:
677
  raise HTTPException(status_code=400, detail=f"Could not process RTF file: {str(e)}")
678
 
679
-
680
- def _create_overlapping_chunks(text: str, chunk_size: int = None, overlap: int = None) -> list:
681
- """
682
- Create overlapping text chunks with smart boundary detection.
683
- """
684
- if chunk_size is None:
685
- chunk_size = settings.CHUNK_SIZE
686
- if overlap is None:
687
- overlap = settings.CHUNK_OVERLAP
688
-
689
- chunks = []
690
- start = 0
691
- text_length = len(text)
692
-
693
- while start < text_length:
694
- end = start + chunk_size
695
-
696
- if end < text_length:
697
- # Try sentence boundaries first
698
- sentence_end = max(
699
- text.rfind('.', start, end),
700
- text.rfind('!', start, end),
701
- text.rfind('?', start, end)
702
- )
703
-
704
- if sentence_end > start + chunk_size // 2:
705
- end = sentence_end + 1
706
- else:
707
- # Try paragraph or line break
708
- newline_pos = text.rfind('\n', start, end)
709
- if newline_pos > start + chunk_size // 2:
710
- end = newline_pos + 1
711
- else:
712
- # Fall back to word boundary
713
- space_pos = text.rfind(' ', start, end)
714
- if space_pos > start + chunk_size // 2:
715
- end = space_pos
716
-
717
- chunk = text[start:end].strip()
718
- if chunk and len(chunk) > 20:
719
- chunks.append(chunk)
720
-
721
- next_start = end - overlap if end < text_length else text_length
722
- start = max(next_start, start + 1)
723
-
724
- return chunks
725
-
726
-
727
  def index_document(request_data: DocumentRequest) -> int:
728
  logger.info("=" * 80)
729
- logger.info("📚 STARTING ENHANCED DOCUMENT INDEXING PROCESS")
730
  logger.info("=" * 80)
731
-
732
  # Log the incoming context
733
  context_preview = request_data.context[:200] + "..." if len(request_data.context) > 200 else request_data.context
734
  logger.info(f"📝 CONTEXT TO INDEX (length: {len(request_data.context)} chars):")
@@ -745,55 +633,64 @@ def index_document(request_data: DocumentRequest) -> int:
745
  else:
746
  logger.info("📂 No existing documents to clear.")
747
 
748
- # Step 2: Enhanced chunking with overlap for better context preservation
749
- logger.info(f"✂️ Creating overlapping chunks ({settings.CHUNK_SIZE} chars, {settings.CHUNK_OVERLAP} overlap)...")
750
- text_chunks = _create_overlapping_chunks(request_data.context)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
751
 
752
  if not text_chunks:
753
  logger.warning("⚠️ No text chunks were generated.")
754
  return 0
755
 
756
- logger.info(f"Document split into {len(text_chunks)} overlapping chunks")
757
-
758
- # Log chunk statistics
759
- avg_chunk_size = sum(len(chunk) for chunk in text_chunks) / len(text_chunks)
760
- logger.info(f"📊 Average chunk size: {avg_chunk_size:.0f} characters")
761
-
762
- # Step 3: Add chunks to ChromaDB with enhanced metadata
763
- timestamp = int(time.time())
764
- chunk_ids = [f"doc_chunk_{i}_{timestamp}" for i in range(len(text_chunks))]
765
- logger.info(f"💾 Adding {len(chunk_ids)} chunks to ChromaDB with enhanced metadata...")
766
-
767
- # Enhanced metadata for better retrieval
768
- metadatas = [
769
- {
770
- "chunk_index": i,
771
- "timestamp": timestamp,
772
- "chunk_length": len(chunk),
773
- "position": "start" if i == 0 else "end" if i == len(text_chunks) - 1 else "middle",
774
- "total_chunks": len(text_chunks)
775
- }
776
- for i, chunk in enumerate(text_chunks)
777
- ]
778
 
 
 
 
 
 
 
 
779
  rag_setup.collection.add(
780
- documents=text_chunks,
781
  ids=chunk_ids,
782
  metadatas=metadatas
783
  )
784
-
785
- logger.info("✅ ENHANCED DOCUMENT INDEXING COMPLETED SUCCESSFULLY")
786
  logger.info(f"📊 Total chunks indexed: {len(text_chunks)}")
787
- logger.info(f"🔗 Chunks have 100-char overlap for better context continuity")
788
  logger.info("=" * 80)
789
-
790
  return len(text_chunks)
791
-
792
  except Exception as e:
793
  logger.error(f"❌ Error during indexing: {str(e)}", exc_info=True)
794
  raise
795
 
796
-
797
  def clear_index():
798
  """Clears all documents from the vector database."""
799
  logger.info("🗑️ Clearing vector index...")
@@ -810,194 +707,97 @@ def clear_index():
810
  logger.error(f"❌ Error clearing vector index: {e}")
811
  raise
812
 
813
-
814
- def _expand_query(query: str) -> list:
815
- """
816
- Expand the query with synonyms and related terms for better retrieval.
817
- Returns list of query variations.
818
- """
819
- queries = [query]
820
-
821
- # Add query without question words for better matching
822
- question_words = ['what', 'where', 'when', 'who', 'why', 'how', 'is', 'are', 'can', 'do', 'does']
823
- words = query.lower().split()
824
- filtered_words = [w for w in words if w not in question_words and len(w) > 2]
825
-
826
- if len(filtered_words) >= 2:
827
- # Create a version with just the key terms
828
- key_terms_query = ' '.join(filtered_words)
829
- if key_terms_query != query.lower():
830
- queries.append(key_terms_query)
831
-
832
- return queries[:2] # Limit to 2 variations to avoid too many retrievals
833
-
834
-
835
- def _deduplicate_and_rank_chunks(chunks_list: list, metadatas_list: list) -> tuple:
836
- """
837
- Deduplicate chunks and rank them by relevance.
838
- Returns (unique_chunks, unique_metadatas)
839
- """
840
- seen = set()
841
- unique_chunks = []
842
- unique_metadatas = []
843
-
844
- for chunks, metadatas in zip(chunks_list, metadatas_list):
845
- for chunk, metadata in zip(chunks, metadatas):
846
- # Use first 100 chars as fingerprint
847
- fingerprint = chunk[:100]
848
- if fingerprint not in seen:
849
- seen.add(fingerprint)
850
- unique_chunks.append(chunk)
851
- unique_metadatas.append(metadata)
852
-
853
- return unique_chunks, unique_metadatas
854
-
855
-
856
  async def get_rag_response(request_data: ChatRequest, api_key: Optional[str] = None) -> str:
857
  """
858
- Enhanced RAG pipeline with conversation history, query expansion, and better retrieval.
 
859
  """
860
  start_total = time.time()
861
-
862
  logger.info("=" * 80)
863
- logger.info("🤖 STARTING ENHANCED RAG PIPELINE")
864
  logger.info("=" * 80)
865
  logger.info(f"❓ USER PROMPT: '{request_data.prompt}'")
866
  logger.info(f"📏 Prompt length: {len(request_data.prompt)} characters")
867
- logger.info(f"💬 Conversation history: {len(request_data.conversation_history or [])} messages")
868
  logger.info(f"🔑 Using custom API key: {'Yes' if api_key else 'No'}")
869
  logger.info("-" * 60)
870
 
871
  try:
872
- # Step 1: Build cache key including conversation context
873
- history_hash = ""
874
- if request_data.conversation_history:
875
- # Create a hash of recent conversation for cache key
876
- recent_msgs = request_data.conversation_history[-3:] # Last 3 messages
877
- history_hash = str(hash("".join([m.content[:50] for m in recent_msgs])))
878
-
879
- cache_key = f"{api_key or 'default'}:{history_hash}:{request_data.prompt}"
880
  cached_response = _get_cached_response(cache_key)
881
  if cached_response:
882
  logger.info("💾 CACHE HIT! Returning cached response.")
883
- return cached_response
884
 
885
- logger.info("🔍 Cache miss. Proceeding with enhanced RAG pipeline.")
886
 
887
  # Step 2: Check if the vector database has any content
888
  doc_count = rag_setup.collection.count()
889
  logger.info(f"📚 Vector DB contains {doc_count} documents")
890
-
891
  if doc_count == 0:
892
  logger.warning("⚠️ Vector DB is empty. Cannot answer query.")
893
  return "I don't have any specific context loaded right now. Please provide some context in the Knowledge Base and click 'Index Context' before asking questions. However, I'd be happy to help with general questions using my built-in knowledge!"
894
 
895
- # Step 3: Query expansion for better retrieval
896
- logger.info("🔍 Expanding query for better retrieval...")
897
- query_variations = _expand_query(request_data.prompt)
898
- logger.info(f"📝 Generated {len(query_variations)} query variations")
899
-
900
- # Step 4: Retrieve chunks for each query variation
901
- all_chunks = []
902
- all_metadatas = []
903
-
904
- for query_var in query_variations:
905
- logger.info(f"🔎 Retrieving chunks for: '{query_var[:50]}...'")
906
- retrieved = await _retrieve_chunks_async(
907
- query_var,
908
- n_results=settings.MAX_CHUNKS_RETRIEVE + 2 # Retrieve more for better ranking
909
- )
910
-
911
- if retrieved and retrieved.get('documents') and retrieved['documents'][0]:
912
- all_chunks.append(retrieved['documents'][0])
913
- all_metadatas.append(retrieved.get('metadatas', [[]])[0])
914
 
915
- if not all_chunks:
916
  logger.warning("❌ No relevant chunks found in the vector DB for this query.")
917
- return "I couldn't find specific information about that in the provided context. Let me help you with what I know from my general knowledge:\n\n" + await _generate_fallback_response(
918
- request_data.prompt, api_key)
919
-
920
- # Step 5: Deduplicate and rank chunks
921
- logger.info("🎯 Deduplicating and ranking retrieved chunks...")
922
- unique_chunks, unique_metadatas = _deduplicate_and_rank_chunks(all_chunks, all_metadatas)
923
-
924
- # Limit to best chunks
925
- final_chunks = unique_chunks[:settings.MAX_CHUNKS_RETRIEVE + 1]
926
- logger.info(f"📋 Using {len(final_chunks)} unique, ranked chunks for context")
927
-
928
- # Log chunk details
929
- for i, (chunk, meta) in enumerate(zip(final_chunks, unique_metadatas[:len(final_chunks)])):
930
- logger.info(f" Chunk {i + 1}: {len(chunk)} chars, position: {meta.get('position', 'unknown')}")
931
-
932
- context_for_prompt = "\n\n---\n\n".join(final_chunks)
933
-
934
  # Limit context length to prevent timeouts
935
  max_context_length = settings.MAX_CONTEXT_LENGTH_CHAT
936
  if len(context_for_prompt) > max_context_length:
937
  logger.warning(f"⚠️ Context too long, truncating to {max_context_length}")
938
  context_for_prompt = context_for_prompt[:max_context_length] + "\n\n[... content truncated ...]"
939
 
940
- # Step 6: Build conversation history context
941
- history_context = ""
942
- if request_data.conversation_history and len(request_data.conversation_history) > 0:
943
- logger.info(f"💬 Including {len(request_data.conversation_history)} previous messages for context")
944
- history_messages = request_data.conversation_history[-6:] # Last 6 messages (3 exchanges)
945
- history_parts = []
946
- for msg in history_messages:
947
- role_label = "User" if msg.role == "user" else "Assistant"
948
- # Truncate very long messages
949
- content = msg.content[:300] + "..." if len(msg.content) > 300 else msg.content
950
- history_parts.append(f"{role_label}: {content}")
951
-
952
- history_context = (
953
- "\n\nPREVIOUS CONVERSATION:\n"
954
- + "\n".join(history_parts)
955
- + "\n"
956
- )
957
-
958
- # Step 7: Construct enhanced prompt with conversation history
959
  full_prompt = (
960
- "You are an intelligent assistant with access to specific context information and conversation history. "
961
- "Your goal is to provide comprehensive, helpful answers that:\n"
962
- "• Take into account the previous conversation flow\n"
963
- "• Use the provided context as your PRIMARY source when relevant\n"
964
- "• Build naturally on previous exchanges\n"
965
- "• Provide coherent, contextually appropriate responses\n\n"
966
-
967
  "INSTRUCTIONS:\n"
968
- "• Reference previous conversation when relevant for continuity\n"
969
- "• Use the document context as your primary source for factual information\n"
970
- "• If the user asks follow-up questions, refer back to previous answers\n"
971
- "• Be natural and conversational - maintain the conversation flow\n"
972
- "• Provide detailed, well-structured responses\n"
973
- "• If information isn't in the context, acknowledge it and provide general knowledge help\n"
974
- )
975
-
976
- if history_context:
977
- full_prompt += history_context + "\n"
978
-
979
- full_prompt += (
980
- "DOCUMENT CONTEXT:\n"
981
  f"{context_for_prompt}\n\n"
982
-
983
- f"CURRENT USER QUESTION: {request_data.prompt}\n\n"
984
-
985
- "Please provide a comprehensive, contextually appropriate response:"
986
  )
987
 
988
- # Step 8: Generate the response using the LLM
989
- logger.info("🧠 Generating response with conversation context...")
990
  response_text = await _generate_response_async(full_prompt, api_key)
991
-
992
- # Step 9: Cache the newly generated response
993
  _cache_response(cache_key, response_text)
994
  logger.info("💾 Response cached for future use")
995
 
996
  total_time = time.time() - start_total
997
  logger.info(f"⏱️ Total processing time: {total_time:.2f}s")
998
- logger.info("✅ ENHANCED RAG PIPELINE COMPLETED SUCCESSFULLY")
999
  logger.info("=" * 80)
1000
-
1001
  return response_text
1002
 
1003
  except asyncio.TimeoutError:
@@ -1015,21 +815,20 @@ async def _generate_fallback_response(prompt: str, api_key: Optional[str] = None
1015
  f"Question: {prompt}\n\n"
1016
  f"Answer:"
1017
  )
1018
-
1019
  try:
1020
  return await _generate_response_async(fallback_prompt, api_key)
1021
  except Exception as e:
1022
  logger.error(f"❌ Fallback response generation failed: {e}")
1023
  return "I'm having trouble generating a response right now. Please try again or rephrase your question."
1024
 
1025
-
1026
  async def execute_task(request_data: TaskRequest, api_key: Optional[str] = None) -> str:
1027
  """
1028
  Executes a specific task on the given context.
1029
  Uses provided API key or falls back to default.
1030
  """
1031
  start_total = time.time()
1032
-
1033
  logger.info("=" * 80)
1034
  logger.info("🎯 STARTING TASK EXECUTION")
1035
  logger.info("=" * 80)
@@ -1072,7 +871,7 @@ async def execute_task(request_data: TaskRequest, api_key: Optional[str] = None)
1072
  logger.info(f"⏱️ Task execution time: {total_time:.2f}s")
1073
  logger.info("✅ TASK EXECUTION COMPLETED SUCCESSFULLY")
1074
  logger.info("=" * 80)
1075
-
1076
  return response_text
1077
 
1078
  except asyncio.TimeoutError:
@@ -1082,7 +881,6 @@ async def execute_task(request_data: TaskRequest, api_key: Optional[str] = None)
1082
  logger.error(f"❌ An unexpected error occurred during task execution: {e}", exc_info=True)
1083
  return f"An unexpected error occurred: {e}"
1084
 
1085
-
1086
  # --- ASYNC WRAPPERS & CACHE HELPERS ---
1087
 
1088
  async def _retrieve_chunks_async(prompt: str, n_results: int = 2):
@@ -1096,12 +894,11 @@ async def _retrieve_chunks_async(prompt: str, n_results: int = 2):
1096
  logger.info(f"📊 ChromaDB query returned {len(result.get('documents', [[]])[0])} chunks")
1097
  return result
1098
 
1099
-
1100
  async def _generate_response_async(full_prompt: str, api_key: Optional[str] = None):
1101
  """Asynchronously calls the LLM to generate content."""
1102
  logger.info("🤖 Calling LLM for content generation...")
1103
  logger.info(f"📏 Prompt length sent to LLM: {len(full_prompt)} characters")
1104
-
1105
  # Use custom API key if provided, otherwise use default
1106
  if api_key:
1107
  llm_instance = create_llm_instance(api_key)
@@ -1109,18 +906,17 @@ async def _generate_response_async(full_prompt: str, api_key: Optional[str] = No
1109
  else:
1110
  llm_instance = rag_setup.generation_model
1111
  logger.info("�� Using default API key")
1112
-
1113
  loop = asyncio.get_event_loop()
1114
  response = await loop.run_in_executor(
1115
  None,
1116
  llm_instance.generate_content,
1117
  full_prompt
1118
  )
1119
-
1120
  logger.info(f"✅ LLM response received (length: {len(response)} chars)")
1121
  return response
1122
 
1123
-
1124
  def _get_cached_response(key: str):
1125
  """Checks the cache for a valid (non-expired) entry."""
1126
  if key in _response_cache:
@@ -1134,7 +930,6 @@ def _get_cached_response(key: str):
1134
  logger.info(f"🗑️ Expired cache entry removed for key: '{key[:50]}...'")
1135
  return None
1136
 
1137
-
1138
  def _cache_response(key: str, response: str):
1139
  """Adds a response to the cache with the current timestamp."""
1140
  _response_cache[key] = (time.time(), response)
 
6
  import rag_setup
7
  from schemas import ChatRequest, DocumentRequest, TaskRequest
8
  from typing import Optional, Tuple
9
+ from config import settings # Fixed: removed 'app.' prefix
10
  from fastapi import UploadFile, HTTPException
11
  import json
12
  import xml.etree.ElementTree as ET
13
  from striprtf.striprtf import rtf_to_text
14
  import markdown
 
15
  try:
16
  import fitz # PyMuPDF
17
  except ImportError:
 
21
  try:
22
  import docx # python-docx for Word documents
23
  except ImportError:
24
+ logging.error("python-docx is not installed. Word document processing will not work. Please run 'pip install python-docx'")
 
25
  docx = None
26
 
27
  try:
28
  from pptx import Presentation # python-pptx for PowerPoint
29
  except ImportError:
30
+ logging.error("python-pptx is not installed. PowerPoint processing will not work. Please run 'pip install python-pptx'")
 
31
  Presentation = None
32
 
33
  try:
34
  import pandas as pd # For Excel and CSV files
35
  except ImportError:
36
+ logging.error("pandas is not installed. Excel/CSV processing will not work. Please run 'pip install pandas openpyxl'")
 
37
  pd = None
38
 
39
  try:
40
  from bs4 import BeautifulSoup # For HTML parsing
41
  except ImportError:
42
+ logging.error("BeautifulSoup is not installed. HTML processing will not work. Please run 'pip install beautifulsoup4'")
 
43
  BeautifulSoup = None
44
 
45
+
46
  logging.basicConfig(
47
  level=logging.INFO,
48
  format='%(asctime)s [%(levelname)s] %(message)s',
 
55
  _response_cache = {}
56
  CACHE_EXPIRATION_SECONDS = 600 # 10 minutes
57
 
58
+ def create_llm_instance(api_key: str) -> rag_setup.OpenRouterLLM:
59
+ """Create a new LLM instance with the provided API key."""
60
+ return rag_setup.OpenRouterLLM(
61
+ api_key=api_key,
62
+ base_url=settings.OPENROUTER_URL,
63
+ model=settings.MODEL_NAME
64
+ )
65
 
66
+ async def test_api_key(api_key: str) -> dict:
67
+ """Test if the provided API key is valid."""
 
 
 
 
 
68
  logger.info(f"🔍 Testing API key: {api_key[:10]}...")
69
+
70
  try:
71
  # Validate API key format first
72
  if not api_key or not api_key.strip():
 
76
  "message": "API key cannot be empty",
77
  "model_info": None
78
  }
79
+
80
+ if not api_key.startswith('sk-or-'):
81
+ logger.error("❌ API key has incorrect format")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  return {
83
  "valid": False,
84
+ "message": "OpenRouter API keys should start with 'sk-or-'",
85
  "model_info": None
86
  }
87
+
88
  if len(api_key) < 40:
89
  logger.error("❌ API key is too short")
90
  return {
 
92
  "message": "API key appears to be too short",
93
  "model_info": None
94
  }
95
+
96
  # Create a temporary LLM instance
97
+ test_llm = create_llm_instance(api_key)
98
+
99
  # Test with a minimal prompt to avoid quota usage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  test_response = test_llm._make_api_request("Hi", max_tokens=1)
101
+
102
  # Check for explicit errors first
103
  if "error" in test_response:
104
  error_msg = test_response["error"]
105
  logger.error(f"❌ API key test failed: {error_msg}")
106
+
107
  # Parse specific error types
108
  if "401" in str(error_msg) or "403" in str(error_msg) or "Unauthorized" in str(error_msg):
109
  return {
 
131
  "message": f"API key test failed: {error_msg}",
132
  "model_info": None
133
  }
134
+
135
  # Check for successful response with proper structure
136
  if "choices" in test_response and test_response["choices"]:
137
  choice = test_response["choices"][0]
 
147
  "message": "API key is valid and working!",
148
  "model_info": model_info
149
  }
150
+
151
  # If we get here, the response format is unexpected
152
  logger.error(f"❌ API key test failed: Unexpected response format - {test_response}")
153
  return {
 
155
  "message": "API key test failed: Unexpected response format from OpenRouter",
156
  "model_info": None
157
  }
158
+
159
  except Exception as e:
160
  logger.error(f"❌ API key test failed with exception: {str(e)}")
161
  error_msg = str(e)
162
+
163
  # Parse common error patterns
164
  if "401" in error_msg or "403" in error_msg or "Unauthorized" in error_msg:
165
  return {
 
186
  "model_info": None
187
  }
188
 
 
189
  async def process_and_index_file(file: UploadFile) -> Tuple[int, str]:
190
  """
191
  Processes an uploaded file, extracts text, calls the indexing function,
 
193
  Supports: .txt, .pdf, .docx, .pptx, .xlsx, .csv, .json, .xml, .html, .md, .rtf
194
  """
195
  logger.info(f"📄 Processing file '{file.filename}' with content type '{file.content_type}'")
196
+
197
  # Read file content
198
  file_content = await file.read()
199
  text = ""
 
202
  try:
203
  if file_extension == "txt":
204
  text = await _process_txt_file(file_content)
205
+
206
  elif file_extension == "pdf":
207
  text = await _process_pdf_file(file_content)
208
+
209
  elif file_extension == "docx":
210
  text = await _process_docx_file(file_content)
211
+
212
  elif file_extension in ["ppt", "pptx"]:
213
  text = await _process_pptx_file(file_content)
214
+
215
  elif file_extension in ["xls", "xlsx"]:
216
  text = await _process_excel_file(file_content, file.filename)
217
+
218
  elif file_extension == "csv":
219
  text = await _process_csv_file(file_content)
220
+
221
  elif file_extension == "json":
222
  text = await _process_json_file(file_content)
223
+
224
  elif file_extension == "xml":
225
  text = await _process_xml_file(file_content)
226
+
227
  elif file_extension in ["html", "htm"]:
228
  text = await _process_html_file(file_content)
229
+
230
  elif file_extension in ["md", "markdown"]:
231
  text = await _process_markdown_file(file_content)
232
+
233
  elif file_extension == "rtf":
234
  text = await _process_rtf_file(file_content)
235
+
236
  else:
237
+ supported_extensions = ['.txt', '.pdf', '.docx', '.pptx', '.xlsx', '.csv', '.json', '.xml', '.html', '.md', '.rtf']
 
238
  logger.error(f"❌ Unsupported file type: {file.filename}")
239
  raise HTTPException(
240
+ status_code=400,
241
  detail=f"Unsupported file type. Please upload one of: {', '.join(supported_extensions)}"
242
  )
243
 
 
250
  # Validate extracted text
251
  if not text or not text.strip():
252
  logger.error("❌ Extracted text is empty or whitespace only")
253
+ raise HTTPException(status_code=400, detail="Extracted text is empty. The file might be empty, corrupted, or unreadable.")
254
+
 
255
  # Clean up the text
256
  text = text.strip()
257
+
258
  # Log processing stats
259
  word_count = len(text.split())
260
  logger.info(f"📊 Text processing complete: {len(text)} characters, {word_count} words")
261
+
262
  # Index the extracted text using existing logic
263
  try:
264
  doc_request = DocumentRequest(context=text)
265
  docs_added = index_document(doc_request)
266
  logger.info(f"✅ Successfully indexed {docs_added} document chunks from file")
267
+
268
  return docs_added, text
269
+
270
  except Exception as e:
271
  logger.error(f"❌ Failed to index extracted text: {e}")
272
  raise HTTPException(status_code=500, detail=f"Failed to index extracted text: {str(e)}")
 
290
  continue
291
  else:
292
  raise UnicodeDecodeError("Unable to decode file with any common encoding")
293
+
294
  logger.info(f"✅ Extracted {len(text)} characters from .txt file")
295
  return text
296
+
297
  except UnicodeDecodeError as e:
298
  logger.error(f"❌ Could not decode .txt file: {e}")
299
+ raise HTTPException(status_code=400, detail="Could not decode .txt file. Please ensure it uses UTF-8, Latin-1, or CP1252 encoding.")
 
300
 
301
 
302
  async def _process_pdf_file(file_content: bytes) -> str:
 
304
  if fitz is None:
305
  logger.error("❌ PyMuPDF not available for PDF processing")
306
  raise HTTPException(status_code=501, detail="PDF processing is not available. PyMuPDF is not installed.")
307
+
308
  logger.info("📖 Opening PDF document...")
309
  doc = fitz.open(stream=file_content, filetype="pdf")
310
+
311
  try:
312
  text_parts = []
313
  page_count = len(doc)
314
  logger.info(f"📑 PDF has {page_count} pages")
315
+
316
  for page_num in range(page_count):
317
  try:
318
  page = doc[page_num]
319
  page_text = page.get_text()
320
+
321
  if page_text and page_text.strip():
322
  text_parts.append(f"--- Page {page_num + 1} ---\n{page_text.strip()}")
323
  logger.info(f"📄 Extracted text from page {page_num + 1}: {len(page_text)} characters")
324
  else:
325
  logger.info(f"📄 Page {page_num + 1} is empty or contains no extractable text")
326
+
327
  except Exception as page_error:
328
  logger.warning(f"⚠️ Could not extract text from page {page_num + 1}: {page_error}")
329
  continue
330
+
331
  text = "\n\n".join(text_parts)
332
  logger.info(f"✅ Extracted text from {len(text_parts)} pages of the PDF file ({len(text)} characters)")
333
  return text
334
+
335
  finally:
336
  doc.close()
337
  logger.info("📕 PDF document closed successfully")
 
340
  async def _process_docx_file(file_content: bytes) -> str:
341
  """Process .docx files using python-docx."""
342
  if docx is None:
343
+ raise HTTPException(status_code=501, detail="Word document processing is not available. python-docx is not installed.")
344
+
 
345
  from io import BytesIO
346
  doc = docx.Document(BytesIO(file_content))
347
+
348
  text_parts = []
349
  for paragraph in doc.paragraphs:
350
  if paragraph.text.strip():
351
  text_parts.append(paragraph.text.strip())
352
+
353
  # Also extract text from tables
354
  for table in doc.tables:
355
  for row in table.rows:
 
359
  row_text.append(cell.text.strip())
360
  if row_text:
361
  text_parts.append(" | ".join(row_text))
362
+
363
  text = "\n\n".join(text_parts)
364
  logger.info(f"✅ Extracted {len(text)} characters from Word document")
365
  return text
 
368
  async def _process_pptx_file(file_content: bytes) -> str:
369
  """Process .pptx files using python-pptx."""
370
  if Presentation is None:
371
+ raise HTTPException(status_code=501, detail="PowerPoint processing is not available. python-pptx is not installed.")
372
+
 
373
  from io import BytesIO
374
  prs = Presentation(BytesIO(file_content))
375
+
376
  text_parts = []
377
  for slide_num, slide in enumerate(prs.slides, 1):
378
  slide_text = [f"--- Slide {slide_num} ---"]
379
+
380
  for shape in slide.shapes:
381
  if hasattr(shape, "text") and shape.text.strip():
382
  slide_text.append(shape.text.strip())
383
+
384
  if len(slide_text) > 1: # More than just the slide header
385
  text_parts.append("\n".join(slide_text))
386
+
387
  text = "\n\n".join(text_parts)
388
  logger.info(f"✅ Extracted text from {len(prs.slides)} PowerPoint slides ({len(text)} characters)")
389
  return text
 
393
  """Process .xlsx/.xls files using pandas."""
394
  if pd is None:
395
  raise HTTPException(status_code=501, detail="Excel processing is not available. pandas is not installed.")
396
+
397
  from io import BytesIO
398
+
399
  try:
400
  # Read all sheets
401
  excel_file = pd.ExcelFile(BytesIO(file_content))
402
  text_parts = [f"Excel File: {filename}"]
403
+
404
  for sheet_name in excel_file.sheet_names:
405
  df = pd.read_excel(excel_file, sheet_name=sheet_name)
406
+
407
  if not df.empty:
408
  text_parts.append(f"\n--- Sheet: {sheet_name} ---")
409
+
410
  # Convert DataFrame to readable text
411
  # Include column headers
412
  text_parts.append("Columns: " + " | ".join(str(col) for col in df.columns))
413
+
414
  # Add row data (limit to first 100 rows to avoid huge files)
415
  for idx, row in df.head(100).iterrows():
416
  row_text = " | ".join(str(val) for val in row.values if pd.notna(val))
417
  if row_text.strip():
418
  text_parts.append(row_text)
419
+
420
  if len(df) > 100:
421
  text_parts.append(f"... and {len(df) - 100} more rows")
422
+
423
  text = "\n".join(text_parts)
424
+ logger.info(f"✅ Extracted data from Excel file with {len(excel_file.sheet_names)} sheets ({len(text)} characters)")
 
425
  return text
426
+
427
  except Exception as e:
428
  raise HTTPException(status_code=400, detail=f"Could not process Excel file: {str(e)}")
429
 
 
432
  """Process .csv files using pandas."""
433
  if pd is None:
434
  raise HTTPException(status_code=501, detail="CSV processing is not available. pandas is not installed.")
435
+
436
  from io import StringIO
437
+
438
  try:
439
  # Try different encodings for CSV
440
  for encoding in ['utf-8', 'latin-1', 'cp1252']:
 
446
  continue
447
  else:
448
  raise ValueError("Could not decode CSV file with any common encoding")
449
+
450
  if df.empty:
451
  raise ValueError("CSV file is empty")
452
+
453
  text_parts = ["CSV Data:"]
454
  text_parts.append("Columns: " + " | ".join(str(col) for col in df.columns))
455
+
456
  # Add row data (limit to first 200 rows)
457
  for idx, row in df.head(200).iterrows():
458
  row_text = " | ".join(str(val) for val in row.values if pd.notna(val))
459
  if row_text.strip():
460
  text_parts.append(row_text)
461
+
462
  if len(df) > 200:
463
  text_parts.append(f"... and {len(df) - 200} more rows")
464
+
465
  text = "\n".join(text_parts)
466
  logger.info(f"✅ Extracted data from CSV file with {len(df)} rows ({len(text)} characters)")
467
  return text
468
+
469
  except Exception as e:
470
  raise HTTPException(status_code=400, detail=f"Could not process CSV file: {str(e)}")
471
 
 
475
  try:
476
  json_text = file_content.decode('utf-8')
477
  data = json.loads(json_text)
478
+
479
  # Convert JSON to readable text format
480
  def json_to_text(obj, indent=0):
481
  lines = []
482
  prefix = " " * indent
483
+
484
  if isinstance(obj, dict):
485
  for key, value in obj.items():
486
  if isinstance(value, (dict, list)):
 
497
  lines.append(f"{prefix}[{i}]: {item}")
498
  else:
499
  lines.append(f"{prefix}{obj}")
500
+
501
  return lines
502
+
503
  text_lines = ["JSON Data:"] + json_to_text(data)
504
  text = "\n".join(text_lines)
505
+
506
  logger.info(f"✅ Extracted data from JSON file ({len(text)} characters)")
507
  return text
508
+
509
  except json.JSONDecodeError as e:
510
  raise HTTPException(status_code=400, detail=f"Invalid JSON file: {str(e)}")
511
  except Exception as e:
 
517
  try:
518
  xml_text = file_content.decode('utf-8')
519
  root = ET.fromstring(xml_text)
520
+
521
  def xml_to_text(element, indent=0):
522
  lines = []
523
  prefix = " " * indent
524
+
525
  # Add element name and attributes
526
  if element.attrib:
527
  attrs = " ".join(f'{k}="{v}"' for k, v in element.attrib.items())
528
  lines.append(f"{prefix}{element.tag} ({attrs}):")
529
  else:
530
  lines.append(f"{prefix}{element.tag}:")
531
+
532
  # Add text content
533
  if element.text and element.text.strip():
534
  lines.append(f"{prefix} {element.text.strip()}")
535
+
536
  # Add child elements
537
  for child in element:
538
  lines.extend(xml_to_text(child, indent + 1))
539
+
540
  return lines
541
+
542
  text_lines = ["XML Data:"] + xml_to_text(root)
543
  text = "\n".join(text_lines)
544
+
545
  logger.info(f"✅ Extracted data from XML file ({len(text)} characters)")
546
  return text
547
+
548
  except ET.ParseError as e:
549
  raise HTTPException(status_code=400, detail=f"Invalid XML file: {str(e)}")
550
  except Exception as e:
 
555
  """Process .html files using BeautifulSoup."""
556
  if BeautifulSoup is None:
557
  raise HTTPException(status_code=501, detail="HTML processing is not available. BeautifulSoup is not installed.")
558
+
559
  try:
560
  html_text = file_content.decode('utf-8')
561
  soup = BeautifulSoup(html_text, 'html.parser')
562
+
563
  # Remove script and style elements
564
  for script in soup(["script", "style"]):
565
  script.decompose()
566
+
567
  # Get text content
568
  text = soup.get_text()
569
+
570
  # Clean up whitespace
571
  lines = (line.strip() for line in text.splitlines())
572
  chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
573
  text = '\n'.join(chunk for chunk in chunks if chunk)
574
+
575
  logger.info(f"✅ Extracted text from HTML file ({len(text)} characters)")
576
  return text
577
+
578
  except Exception as e:
579
  raise HTTPException(status_code=400, detail=f"Could not process HTML file: {str(e)}")
580
 
 
583
  """Process .md files."""
584
  try:
585
  md_text = file_content.decode('utf-8')
586
+
587
  # Convert markdown to HTML then to plain text for better readability
588
  html = markdown.markdown(md_text)
589
  if BeautifulSoup:
 
592
  else:
593
  # Fallback: use raw markdown
594
  text = md_text
595
+
596
  logger.info(f"✅ Extracted text from Markdown file ({len(text)} characters)")
597
  return text
598
+
599
  except Exception as e:
600
  raise HTTPException(status_code=400, detail=f"Could not process Markdown file: {str(e)}")
601
 
 
605
  try:
606
  rtf_text = file_content.decode('utf-8')
607
  text = rtf_to_text(rtf_text)
608
+
609
  logger.info(f"✅ Extracted text from RTF file ({len(text)} characters)")
610
  return text
611
+
612
  except Exception as e:
613
  raise HTTPException(status_code=400, detail=f"Could not process RTF file: {str(e)}")
614
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
  def index_document(request_data: DocumentRequest) -> int:
616
  logger.info("=" * 80)
617
+ logger.info("📚 STARTING DOCUMENT INDEXING PROCESS")
618
  logger.info("=" * 80)
619
+
620
  # Log the incoming context
621
  context_preview = request_data.context[:200] + "..." if len(request_data.context) > 200 else request_data.context
622
  logger.info(f"📝 CONTEXT TO INDEX (length: {len(request_data.context)} chars):")
 
633
  else:
634
  logger.info("📂 No existing documents to clear.")
635
 
636
+ # Step 2: Chunk document with better chunking strategy
637
+ text_chunks = textwrap.wrap(
638
+ request_data.context,
639
+ width=600,
640
+ break_long_words=False,
641
+ replace_whitespace=False,
642
+ break_on_hyphens=False
643
+ )
644
+
645
+ # If chunks are still too few, try splitting on sentences/paragraphs
646
+ if len(text_chunks) < 3 and len(request_data.context) > 1200:
647
+ logger.info("🔧 Using sentence-based chunking for better granularity")
648
+ paragraphs = request_data.context.split('\n\n')
649
+ text_chunks = []
650
+
651
+ for para in paragraphs:
652
+ para = para.strip()
653
+ if not para:
654
+ continue
655
+
656
+ if len(para) <= 600:
657
+ text_chunks.append(para)
658
+ else:
659
+ sub_chunks = textwrap.wrap(para, width=600, break_long_words=False)
660
+ text_chunks.extend(sub_chunks)
661
+
662
+ # Filter out empty chunks
663
+ text_chunks = [chunk.strip() for chunk in text_chunks if chunk.strip()]
664
 
665
  if not text_chunks:
666
  logger.warning("⚠️ No text chunks were generated.")
667
  return 0
668
 
669
+ logger.info(f"✂️ Document split into {len(text_chunks)} chunks")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
670
 
671
+ # Step 3: Add chunks to ChromaDB
672
+ chunk_ids = [f"doc_chunk_{i}_{int(time.time())}" for i in range(len(text_chunks))]
673
+ logger.info(f"💾 Adding {len(chunk_ids)} chunks to ChromaDB...")
674
+
675
+ # Add documents with metadata
676
+ metadatas = [{"chunk_index": i, "timestamp": int(time.time())} for i in range(len(text_chunks))]
677
+
678
  rag_setup.collection.add(
679
+ documents=text_chunks,
680
  ids=chunk_ids,
681
  metadatas=metadatas
682
  )
683
+
684
+ logger.info("✅ DOCUMENT INDEXING COMPLETED SUCCESSFULLY")
685
  logger.info(f"📊 Total chunks indexed: {len(text_chunks)}")
 
686
  logger.info("=" * 80)
687
+
688
  return len(text_chunks)
689
+
690
  except Exception as e:
691
  logger.error(f"❌ Error during indexing: {str(e)}", exc_info=True)
692
  raise
693
 
 
694
  def clear_index():
695
  """Clears all documents from the vector database."""
696
  logger.info("🗑️ Clearing vector index...")
 
707
  logger.error(f"❌ Error clearing vector index: {e}")
708
  raise
709
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
  async def get_rag_response(request_data: ChatRequest, api_key: Optional[str] = None) -> str:
711
  """
712
+ Performs the RAG pipeline: checks cache, retrieves context, generates a response.
713
+ Uses provided API key or falls back to default.
714
  """
715
  start_total = time.time()
716
+
717
  logger.info("=" * 80)
718
+ logger.info("🤖 STARTING RAG PIPELINE")
719
  logger.info("=" * 80)
720
  logger.info(f"❓ USER PROMPT: '{request_data.prompt}'")
721
  logger.info(f"📏 Prompt length: {len(request_data.prompt)} characters")
 
722
  logger.info(f"🔑 Using custom API key: {'Yes' if api_key else 'No'}")
723
  logger.info("-" * 60)
724
 
725
  try:
726
+ # Step 1: Check cache for a recent, identical query
727
+ cache_key = f"{api_key or 'default'}:{request_data.prompt}"
 
 
 
 
 
 
728
  cached_response = _get_cached_response(cache_key)
729
  if cached_response:
730
  logger.info("💾 CACHE HIT! Returning cached response.")
731
+ return f"{cached_response}\n\n(This response was retrieved from cache)"
732
 
733
+ logger.info("🔍 Cache miss. Proceeding with RAG pipeline.")
734
 
735
  # Step 2: Check if the vector database has any content
736
  doc_count = rag_setup.collection.count()
737
  logger.info(f"📚 Vector DB contains {doc_count} documents")
738
+
739
  if doc_count == 0:
740
  logger.warning("⚠️ Vector DB is empty. Cannot answer query.")
741
  return "I don't have any specific context loaded right now. Please provide some context in the Knowledge Base and click 'Index Context' before asking questions. However, I'd be happy to help with general questions using my built-in knowledge!"
742
 
743
+ # Step 3: Retrieve relevant chunks from ChromaDB
744
+ logger.info("🔎 Retrieving relevant chunks from vector DB...")
745
+ retrieved_chunks = await _retrieve_chunks_async(
746
+ request_data.prompt,
747
+ n_results=settings.MAX_CHUNKS_RETRIEVE
748
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
749
 
750
+ if not retrieved_chunks or not retrieved_chunks.get('documents') or not retrieved_chunks['documents'][0]:
751
  logger.warning("❌ No relevant chunks found in the vector DB for this query.")
752
+ return "I couldn't find specific information about that in the provided context. Let me help you with what I know from my general knowledge:\n\n" + await _generate_fallback_response(request_data.prompt, api_key)
753
+
754
+ # Log retrieved chunks
755
+ chunks = retrieved_chunks['documents'][0]
756
+ logger.info(f"📋 Retrieved {len(chunks)} relevant chunks")
757
+
758
+ context_for_prompt = "\n\n---\n\n".join(chunks)
759
+
 
 
 
 
 
 
 
 
 
760
  # Limit context length to prevent timeouts
761
  max_context_length = settings.MAX_CONTEXT_LENGTH_CHAT
762
  if len(context_for_prompt) > max_context_length:
763
  logger.warning(f"⚠️ Context too long, truncating to {max_context_length}")
764
  context_for_prompt = context_for_prompt[:max_context_length] + "\n\n[... content truncated ...]"
765
 
766
+ # Step 4: Construct improved prompt for the LLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767
  full_prompt = (
768
+ "You are an intelligent assistant with access to specific context information. "
769
+ "Your goal is to provide comprehensive, helpful answers that combine the provided context with your expertise.\n\n"
770
+
 
 
 
 
771
  "INSTRUCTIONS:\n"
772
+ "• Use the provided context as your PRIMARY source when it's relevant\n"
773
+ "• If the context fully answers the question, focus on that information and enhance it with practical insights\n"
774
+ "• If the context only partially addresses the question, build upon it with your knowledge\n"
775
+ "• If the context isn't relevant to the question, briefly mention this and provide a helpful answer based on your expertise\n"
776
+ "• Be natural and conversational - avoid robotic phrases like 'based solely on the context'\n"
777
+ "• Provide actionable, practical advice when appropriate\n"
778
+ "• Structure your response clearly with headings or bullet points when helpful\n\n"
779
+
780
+ "CONTEXT INFORMATION:\n"
 
 
 
 
781
  f"{context_for_prompt}\n\n"
782
+
783
+ f"USER QUESTION: {request_data.prompt}\n\n"
784
+
785
+ "Please provide a comprehensive, helpful response:"
786
  )
787
 
788
+ # Step 5: Generate the response using the LLM
789
+ logger.info("🧠 Generating response from OpenRouter...")
790
  response_text = await _generate_response_async(full_prompt, api_key)
791
+
792
+ # Step 6: Cache the newly generated response
793
  _cache_response(cache_key, response_text)
794
  logger.info("💾 Response cached for future use")
795
 
796
  total_time = time.time() - start_total
797
  logger.info(f"⏱️ Total processing time: {total_time:.2f}s")
798
+ logger.info("✅ RAG PIPELINE COMPLETED SUCCESSFULLY")
799
  logger.info("=" * 80)
800
+
801
  return response_text
802
 
803
  except asyncio.TimeoutError:
 
815
  f"Question: {prompt}\n\n"
816
  f"Answer:"
817
  )
818
+
819
  try:
820
  return await _generate_response_async(fallback_prompt, api_key)
821
  except Exception as e:
822
  logger.error(f"❌ Fallback response generation failed: {e}")
823
  return "I'm having trouble generating a response right now. Please try again or rephrase your question."
824
 
 
825
  async def execute_task(request_data: TaskRequest, api_key: Optional[str] = None) -> str:
826
  """
827
  Executes a specific task on the given context.
828
  Uses provided API key or falls back to default.
829
  """
830
  start_total = time.time()
831
+
832
  logger.info("=" * 80)
833
  logger.info("🎯 STARTING TASK EXECUTION")
834
  logger.info("=" * 80)
 
871
  logger.info(f"⏱️ Task execution time: {total_time:.2f}s")
872
  logger.info("✅ TASK EXECUTION COMPLETED SUCCESSFULLY")
873
  logger.info("=" * 80)
874
+
875
  return response_text
876
 
877
  except asyncio.TimeoutError:
 
881
  logger.error(f"❌ An unexpected error occurred during task execution: {e}", exc_info=True)
882
  return f"An unexpected error occurred: {e}"
883
 
 
884
  # --- ASYNC WRAPPERS & CACHE HELPERS ---
885
 
886
  async def _retrieve_chunks_async(prompt: str, n_results: int = 2):
 
894
  logger.info(f"📊 ChromaDB query returned {len(result.get('documents', [[]])[0])} chunks")
895
  return result
896
 
 
897
  async def _generate_response_async(full_prompt: str, api_key: Optional[str] = None):
898
  """Asynchronously calls the LLM to generate content."""
899
  logger.info("🤖 Calling LLM for content generation...")
900
  logger.info(f"📏 Prompt length sent to LLM: {len(full_prompt)} characters")
901
+
902
  # Use custom API key if provided, otherwise use default
903
  if api_key:
904
  llm_instance = create_llm_instance(api_key)
 
906
  else:
907
  llm_instance = rag_setup.generation_model
908
  logger.info("�� Using default API key")
909
+
910
  loop = asyncio.get_event_loop()
911
  response = await loop.run_in_executor(
912
  None,
913
  llm_instance.generate_content,
914
  full_prompt
915
  )
916
+
917
  logger.info(f"✅ LLM response received (length: {len(response)} chars)")
918
  return response
919
 
 
920
  def _get_cached_response(key: str):
921
  """Checks the cache for a valid (non-expired) entry."""
922
  if key in _response_cache:
 
930
  logger.info(f"🗑️ Expired cache entry removed for key: '{key[:50]}...'")
931
  return None
932
 
 
933
  def _cache_response(key: str, response: str):
934
  """Adds a response to the cache with the current timestamp."""
935
  _response_cache[key] = (time.time(), response)
main.py CHANGED
@@ -24,19 +24,19 @@ try:
24
  # Import and run the FastAPI app
25
  from app.main import app
26
  import uvicorn
27
-
28
  logger.info("Successfully imported FastAPI app")
29
-
30
  if __name__ == "__main__":
31
  port = int(os.environ.get("PORT", 7860))
32
  logger.info(f"Starting server on port {port}")
33
  uvicorn.run(
34
- app,
35
- host="0.0.0.0",
36
  port=port,
37
  log_level="info"
38
  )
39
-
40
  except Exception as e:
41
  logger.error(f"Error starting application: {e}")
42
  raise
 
24
  # Import and run the FastAPI app
25
  from app.main import app
26
  import uvicorn
27
+
28
  logger.info("Successfully imported FastAPI app")
29
+
30
  if __name__ == "__main__":
31
  port = int(os.environ.get("PORT", 7860))
32
  logger.info(f"Starting server on port {port}")
33
  uvicorn.run(
34
+ app,
35
+ host="0.0.0.0",
36
  port=port,
37
  log_level="info"
38
  )
39
+
40
  except Exception as e:
41
  logger.error(f"Error starting application: {e}")
42
  raise
requirements.txt CHANGED
@@ -1,24 +1,35 @@
1
- # Core Framework
2
- fastapi~=0.116.1
3
- uvicorn~=0.35.0
4
- pydantic~=2.11.7
5
- pydantic-settings~=2.10.1
6
- python-multipart~=0.0.20
7
- jinja2~=3.1.2
8
 
9
- # AI/ML Libraries
10
- openai~=1.62.0
11
- chromadb~=1.0.15
12
- scikit-learn~=1.7.1
13
- numpy~=2.3.1
14
- requests~=2.32.4
15
 
16
- # File Processing Libraries
17
- PyMuPDF~=1.25.2
18
- python-docx~=1.1.2
19
- python-pptx~=1.0.2
20
- pandas~=2.2.3
21
- openpyxl~=3.1.5
22
- beautifulsoup4~=4.12.3
23
- striprtf~=0.0.26
24
- markdown~=3.7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core FastAPI dependencies
2
+ fastapi==0.104.1
3
+ uvicorn==0.24.0
4
+ python-multipart==0.0.6
5
+ jinja2==3.1.2
6
+ aiofiles==23.2.1
 
7
 
8
+ # Configuration management
9
+ pydantic==2.5.0
10
+ pydantic-settings==2.1.0
11
+ python-dotenv==1.0.0
 
 
12
 
13
+ # Vector database and embeddings
14
+ chromadb==0.4.18
15
+ sentence-transformers==2.2.2
16
+ scikit-learn==1.3.2
17
+ numpy==1.24.4
18
+
19
+ # HTTP client
20
+ requests==2.31.0
21
+
22
+ # Existing file processing
23
+ pymupdf==1.23.9 # PDF processing
24
+
25
+ # New file processing dependencies
26
+ python-docx==1.1.0 # Word documents (.docx)
27
+ python-pptx==0.6.23 # PowerPoint presentations (.pptx)
28
+ pandas==2.1.4 # Excel and CSV files
29
+ openpyxl==3.1.2 # Excel file support for pandas
30
+ xlrd==2.0.1 # Legacy Excel file support
31
+ beautifulsoup4==4.12.2 # HTML parsing
32
+ lxml==4.9.3 # XML parsing (faster than built-in)
33
+ markdown==3.5.1 # Markdown processing
34
+ striprtf==0.0.26 # RTF file processing
35
+ chardet==5.2.0 # Character encoding detection
static/app.js CHANGED
@@ -9,17 +9,13 @@ class ContextAwareApp {
9
  chatContainer: document.getElementById('chat-container'),
10
  statusIndicator: document.getElementById('status-indicator'),
11
  clearContextBtn: document.getElementById('clear-context-btn'),
12
- clearHistoryBtn: document.getElementById('clear-history-btn'),
13
  indexContextBtn: document.getElementById('index-context-btn'),
14
  taskSelect: document.getElementById('task-select'),
15
  charCount: document.getElementById('char-count'),
16
  wordCount: document.getElementById('word-count'),
17
-
18
  // API Key elements
19
  apiKeyInput: document.getElementById('api-key-input'),
20
- providerSelect: document.getElementById('provider-select'),
21
- providerLink: document.getElementById('provider-link'),
22
- providerModels: document.getElementById('provider-models'),
23
  testApiKeyBtn: document.getElementById('test-api-key'),
24
  saveApiKeyBtn: document.getElementById('save-api-key'),
25
  apiKeyStatus: document.getElementById('api-key-status'),
@@ -40,18 +36,6 @@ class ContextAwareApp {
40
  assistantHeader: document.getElementById('assistant-header'),
41
  assistantContent: document.getElementById('assistant-content'),
42
  assistantToggleIcon: document.getElementById('assistant-toggle-icon'),
43
-
44
- // RAG Info Panel elements
45
- ragInfoPanel: document.getElementById('rag-info-panel'),
46
- ragInfoContent: document.getElementById('rag-info-content'),
47
- toggleRagInfo: document.getElementById('toggle-rag-info'),
48
- ragToggleText: document.getElementById('rag-toggle-text'),
49
- ragStatus: document.getElementById('rag-status'),
50
- ragChunks: document.getElementById('rag-chunks'),
51
- ragContextSize: document.getElementById('rag-context-size'),
52
- ragModel: document.getElementById('rag-model'),
53
- ragRetrieval: document.getElementById('rag-retrieval'),
54
- ragHistory: document.getElementById('rag-history'),
55
  };
56
 
57
  // Application state
@@ -62,11 +46,7 @@ class ContextAwareApp {
62
  apiKeyValidated: false,
63
  isTestingApiKey: false,
64
  userApiKey: '',
65
- provider: 'openrouter',
66
- conversationHistory: [],
67
- ragInfoCollapsed: false,
68
- chunksIndexed: 0,
69
- contextSize: 0,
70
  apiSectionCollapsed: false,
71
  kbSectionCollapsed: false,
72
  assistantSectionCollapsed: false,
@@ -82,20 +62,18 @@ class ContextAwareApp {
82
  this.addEventListeners();
83
  this.loadStoredApiKey();
84
  this.setupResponsiveUI();
85
-
86
  // Show welcome message
87
  this.addMessageToChat(
88
  "👋 **Welcome to ContextIQ!**\n\n" +
89
  "To get started:\n" +
90
- "1. **Choose your AI provider** (OpenRouter or OpenAI) in the configuration section above.\n" +
91
- "2. **Enter your API key** for your chosen provider.\n" +
92
- "3. **Add your context** by uploading a file or pasting text in the Knowledge Base.\n" +
93
- "4. **Index the context** and start asking questions!\n\n" +
94
- "🆓 **OpenRouter** offers free access to 200+ models including Claude, GPT, and Gemini!\n" +
95
- "💡 **OpenAI** provides GPT-4o, GPT-4o-mini, and other cutting-edge models!",
96
  'system'
97
  );
98
-
99
  // Initial UI update
100
  this.updateUI();
101
  this.updateContextStats();
@@ -107,7 +85,6 @@ class ContextAwareApp {
107
  addEventListeners() {
108
  this.elements.indexContextBtn.addEventListener('click', () => this.handleIndexContext());
109
  this.elements.clearContextBtn.addEventListener('click', () => this.handleClearContext());
110
- this.elements.clearHistoryBtn.addEventListener('click', () => this.clearConversationHistory());
111
  this.elements.sendButton.addEventListener('click', () => this.handleSubmit());
112
  this.elements.chatInput.addEventListener('keydown', e => {
113
  if (e.key === 'Enter' && !e.shiftKey) {
@@ -125,16 +102,11 @@ class ContextAwareApp {
125
  this.updateUI();
126
  });
127
  this.elements.chatInput.addEventListener('input', () => this.autoResizeTextarea(this.elements.chatInput));
128
-
129
  // File input listener
130
  this.elements.fileInput.addEventListener('change', () => this.handleFileSelection());
131
 
132
 
133
- // Provider selection listener
134
- this.elements.providerSelect.addEventListener('change', () => {
135
- this.handleProviderChange();
136
- });
137
-
138
  // API Key listeners
139
  this.elements.testApiKeyBtn.addEventListener('click', (e) => {
140
  e.preventDefault();
@@ -154,17 +126,12 @@ class ContextAwareApp {
154
  this.testApiKey();
155
  }
156
  });
157
-
158
  // Toggle listeners for collapsible sections
159
  this.elements.toggleApiSection.addEventListener('click', () => this.toggleSection('api'));
160
  this.elements.kbHeader.addEventListener('click', () => this.toggleSection('kb'));
161
  this.elements.assistantHeader.addEventListener('click', () => this.toggleSection('assistant'));
162
 
163
- // RAG info panel toggle
164
- if (this.elements.toggleRagInfo) {
165
- this.elements.toggleRagInfo.addEventListener('click', () => this.toggleRagInfo());
166
- }
167
-
168
  // Listen for window resize to adjust UI
169
  window.addEventListener('resize', () => this.setupResponsiveUI());
170
  }
@@ -191,10 +158,10 @@ class ContextAwareApp {
191
  */
192
  setupResponsiveUI() {
193
  const isMobile = window.innerWidth < 1024;
194
-
195
  this.state.kbSectionCollapsed = isMobile;
196
  this.state.assistantSectionCollapsed = false;
197
-
198
  if (this.state.apiKeyValidated) {
199
  this.state.apiSectionCollapsed = true;
200
  }
@@ -237,23 +204,10 @@ class ContextAwareApp {
237
  */
238
  loadStoredApiKey() {
239
  try {
240
- const storedKey = localStorage.getItem('ai_api_key');
241
- const storedProvider = localStorage.getItem('ai_provider');
242
-
243
  if (storedKey) {
244
  this.elements.apiKeyInput.value = storedKey;
245
  this.state.userApiKey = storedKey;
246
- }
247
-
248
- if (storedProvider) {
249
- this.state.provider = storedProvider;
250
- this.elements.providerSelect.value = storedProvider;
251
- }
252
-
253
- // Update UI based on provider
254
- this.handleProviderChange();
255
-
256
- if (storedKey) {
257
  this.onApiKeyInputChange();
258
  }
259
  } catch (error) {
@@ -266,17 +220,14 @@ class ContextAwareApp {
266
  */
267
  onApiKeyInputChange() {
268
  const apiKey = this.elements.apiKeyInput.value.trim();
269
- const provider = this.state.provider;
270
-
271
  this.state.apiKeyValidated = false;
272
  this.state.userApiKey = '';
273
-
274
  if (!apiKey) {
275
  this.updateApiKeyStatus('pending', 'Enter API key and click Test');
276
- } else if (provider === 'openrouter' && !apiKey.startsWith('sk-or-')) {
277
- this.updateApiKeyStatus('error', 'OpenRouter keys should start with "sk-or-"');
278
- } else if (provider === 'openai' && !apiKey.startsWith('sk-')) {
279
- this.updateApiKeyStatus('error', 'OpenAI keys should start with "sk-"');
280
  } else if (apiKey.length < 40) {
281
  this.updateApiKeyStatus('error', 'API key appears too short');
282
  } else {
@@ -307,10 +258,7 @@ class ContextAwareApp {
307
  const response = await fetch('/api/v1/test-api-key', {
308
  method: 'POST',
309
  headers: { 'Content-Type': 'application/json' },
310
- body: JSON.stringify({
311
- api_key: apiKey,
312
- provider: this.state.provider
313
- }),
314
  signal: controller.signal
315
  });
316
 
@@ -322,7 +270,6 @@ class ContextAwareApp {
322
  this.state.apiKeyValidated = true;
323
  this.state.userApiKey = apiKey;
324
  this.updateApiKeyStatus('success', result.message || 'API key is valid');
325
- this.updateRagInfo();
326
  if (!silent) {
327
  this.addMessageToChat("✅ **API Key Validated!** You can now use the assistant.", 'system');
328
  this.state.apiSectionCollapsed = true;
@@ -338,7 +285,7 @@ class ContextAwareApp {
338
  console.error('API key test error:', error);
339
  this.state.apiKeyValidated = false;
340
  this.state.userApiKey = '';
341
-
342
  let errorMessage = (error.name === 'AbortError') ? 'Request timed out.' : error.message;
343
  this.updateApiKeyStatus('error', errorMessage);
344
  if (!silent) this.addMessageToChat(`❌ **Connection Error**: ${errorMessage}`, 'system');
@@ -358,56 +305,29 @@ class ContextAwareApp {
358
  return;
359
  }
360
  try {
361
- localStorage.setItem('ai_api_key', apiKey);
362
- localStorage.setItem('ai_provider', this.state.provider);
363
  this.updateApiKeyStatus('success', 'API key saved locally!');
364
- const providerName = this.state.provider === 'openai' ? 'OpenAI' : 'OpenRouter';
365
- this.addMessageToChat(`💾 **API Key Saved!** Your ${providerName} key will be remembered for future sessions.`, 'system');
366
  } catch (error) {
367
  console.error('Save error:', error);
368
  this.addMessageToChat("❌ **Save Failed**: Could not save API key to local storage.", 'system');
369
  }
370
  }
371
 
372
- /**
373
- * Handle provider selection change
374
- */
375
- handleProviderChange() {
376
- this.state.provider = this.elements.providerSelect.value;
377
-
378
- // Update placeholder text
379
- if (this.state.provider === 'openai') {
380
- this.elements.apiKeyInput.placeholder = 'sk-your-openai-api-key-here';
381
- this.elements.providerLink.innerHTML = '• Get your OpenAI API key from <a href="https://platform.openai.com/api-keys" target="_blank" class="text-indigo-400 hover:text-indigo-300">platform.openai.com</a>';
382
- this.elements.providerModels.textContent = '• Access GPT-4o, GPT-4o-mini, GPT-4, GPT-3.5-turbo, and more models';
383
- } else {
384
- this.elements.apiKeyInput.placeholder = 'sk-or-your-openrouter-api-key-here';
385
- this.elements.providerLink.innerHTML = '• Get your free API key from <a href="https://openrouter.ai/" target="_blank" class="text-indigo-400 hover:text-indigo-300">openrouter.ai</a>';
386
- this.elements.providerModels.textContent = '• OpenRouter provides access to 200+ models including Claude, GPT, Gemini, and more';
387
- }
388
-
389
- // Reset validation state when provider changes
390
- this.state.apiKeyValidated = false;
391
- this.state.userApiKey = '';
392
- this.onApiKeyInputChange();
393
- this.updateUI();
394
- this.updateRagInfo();
395
- }
396
-
397
  /**
398
  * Update API key status display
399
  */
400
  updateApiKeyStatus(status, message) {
401
  const statusEl = this.elements.apiStatusText;
402
  const iconEl = this.elements.apiStatusIcon;
403
-
404
  const statusConfig = {
405
  testing: { icon: 'bg-blue-500 animate-pulse', text: 'Testing...' },
406
  success: { icon: 'bg-green-500', text: 'API Key Valid' },
407
  error: { icon: 'bg-red-500', text: 'API Key Invalid' },
408
  pending: { icon: 'bg-yellow-500', text: 'API Key Pending' },
409
  };
410
-
411
  iconEl.className = `w-3 h-3 ${statusConfig[status].icon} rounded-full flex-shrink-0`;
412
  statusEl.textContent = statusConfig[status].text;
413
 
@@ -446,7 +366,7 @@ class ContextAwareApp {
446
  this.handleExecuteTask();
447
  }
448
  }
449
-
450
  /**
451
  * ✨ REFACTORED: Unified logic for indexing from file or text.
452
  */
@@ -501,20 +421,14 @@ class ContextAwareApp {
501
  }
502
 
503
  this.state.isIndexed = true;
504
- this.state.chunksIndexed = result.documents_added || 0;
505
- this.state.contextSize = textContext.length || result.extracted_text?.length || 0;
506
  this.showStatus(result.message || 'Successfully indexed context.', 'success');
507
 
508
- // Populate textarea with extracted text if available
509
  if (result.extracted_text) {
510
  this.elements.contextInput.value = result.extracted_text;
511
- this.state.contextSize = result.extracted_text.length;
512
  this.updateContextStats();
513
  }
514
-
515
- // Update RAG info panel
516
- this.updateRagInfo();
517
-
518
  } catch (error) {
519
  console.error('Indexing error:', error);
520
  this.showStatus(`Error: ${error.message}`, 'error');
@@ -529,44 +443,36 @@ class ContextAwareApp {
529
  }
530
 
531
  /**
532
- * Handles sending a user's prompt to the backend for a response with conversation history.
533
  */
534
  async handleSendPrompt() {
535
  const prompt = this.elements.chatInput.value.trim();
536
  if (prompt.length < 2 || this.state.isGenerating) return;
537
-
538
  if (!this.state.isIndexed) {
539
  this.showStatus('Please index your context before asking questions.', 'error');
540
  return;
541
  }
542
 
543
- // Add user message to chat and conversation history
544
  this.addMessageToChat(prompt, 'user');
545
- this.state.conversationHistory.push({ role: 'user', content: prompt });
546
-
547
  this.elements.chatInput.value = '';
548
  this.autoResizeTextarea(this.elements.chatInput);
549
 
550
  this.state.isGenerating = true;
551
  this.updateUI();
552
- this.updateRagInfo();
553
- this.showStatus('AI is thinking with full conversation context...', 'loading');
554
 
555
  try {
556
  const controller = new AbortController();
557
- const timeoutId = setTimeout(() => controller.abort(), 90000); // Increased timeout for better responses
558
 
559
- // Send prompt with conversation history for context-aware responses
560
  const response = await fetch('/api/v1/generate', {
561
  method: 'POST',
562
- headers: {
563
  'Content-Type': 'application/json',
564
  'X-API-Key': this.state.userApiKey
565
  },
566
- body: JSON.stringify({
567
- prompt,
568
- conversation_history: this.state.conversationHistory.slice(-20) // Last 20 messages (10 exchanges)
569
- }),
570
  signal: controller.signal
571
  });
572
 
@@ -574,15 +480,7 @@ class ContextAwareApp {
574
  const result = await response.json();
575
  if (!response.ok) throw new Error(result.detail || 'An unknown error occurred.');
576
 
577
- // Add AI response to chat and conversation history
578
  this.addMessageToChat(result.response, 'ai');
579
- this.state.conversationHistory.push({ role: 'assistant', content: result.response });
580
-
581
- // Limit history size to prevent memory issues (keep last 40 messages = 20 exchanges)
582
- if (this.state.conversationHistory.length > 40) {
583
- this.state.conversationHistory = this.state.conversationHistory.slice(-40);
584
- }
585
-
586
  this.showStatus('Ready for your next question.', 'success');
587
  } catch (error) {
588
  console.error('Generation error:', error);
@@ -592,7 +490,6 @@ class ContextAwareApp {
592
  } finally {
593
  this.state.isGenerating = false;
594
  this.updateUI();
595
- this.updateRagInfo();
596
  }
597
  }
598
 
@@ -626,7 +523,7 @@ class ContextAwareApp {
626
 
627
  const response = await fetch('/api/v1/task', {
628
  method: 'POST',
629
- headers: {
630
  'Content-Type': 'application/json',
631
  'X-API-Key': this.state.userApiKey
632
  },
@@ -656,95 +553,25 @@ class ContextAwareApp {
656
  */
657
  async handleClearContext() {
658
  this.elements.contextInput.value = '';
659
- this.elements.fileInput.value = '';
660
  this.elements.fileName.textContent = 'Choose a file...';
661
  this.updateContextStats();
662
  this.state.isIndexed = false;
663
- this.state.chunksIndexed = 0;
664
- this.state.contextSize = 0;
665
-
666
  this.showStatus('Clearing knowledge base...', 'loading');
667
 
668
  try {
669
- await fetch('/api/v1/clear_index', {
670
  method: 'POST',
671
  headers: { 'X-API-Key': this.state.userApiKey }
672
  });
673
  this.showStatus('Knowledge base cleared. Ready for new context.', 'success');
674
- if (this.elements.ragInfoPanel) {
675
- this.elements.ragInfoPanel.classList.add('hidden');
676
- }
677
  } catch (error) {
678
  console.error('Clear index error:', error);
679
  this.showStatus(`Error clearing index: ${error.message}`, 'error');
680
  } finally {
681
  this.updateUI();
682
- this.updateRagInfo();
683
- }
684
- }
685
-
686
- /**
687
- * Clear conversation history for a fresh start
688
- */
689
- clearConversationHistory() {
690
- if (this.state.conversationHistory.length === 0) {
691
- this.showStatus('Conversation history is already empty.', 'success');
692
- return;
693
  }
694
-
695
- if (confirm(`Clear ${this.state.conversationHistory.length} message(s) from conversation history?\n\nThis will reset the AI's memory of the conversation.`)) {
696
- this.state.conversationHistory = [];
697
- this.showStatus('Conversation history cleared. The AI will start fresh.', 'success');
698
- this.addMessageToChat('💭 **Conversation history cleared.** I\'ll start fresh with your next question!', 'system');
699
- this.updateRagInfo();
700
- }
701
- }
702
-
703
- toggleRagInfo() {
704
- this.state.ragInfoCollapsed = !this.state.ragInfoCollapsed;
705
- if (this.state.ragInfoCollapsed) {
706
- this.elements.ragInfoContent.classList.add('hidden');
707
- this.elements.ragToggleText.textContent = 'Show';
708
- } else {
709
- this.elements.ragInfoContent.classList.remove('hidden');
710
- this.elements.ragToggleText.textContent = 'Hide';
711
- }
712
- }
713
-
714
- updateRagInfo() {
715
- if (!this.elements.ragInfoPanel) return;
716
-
717
- // Show panel when indexed
718
- if (this.state.isIndexed) {
719
- this.elements.ragInfoPanel.classList.remove('hidden');
720
- }
721
-
722
- // Update status
723
- if (this.state.isGenerating) {
724
- this.elements.ragStatus.textContent = 'Processing...';
725
- this.elements.ragStatus.className = 'font-medium text-yellow-400';
726
- } else if (this.state.isIndexed) {
727
- this.elements.ragStatus.textContent = 'Active';
728
- this.elements.ragStatus.className = 'font-medium text-green-400';
729
- } else {
730
- this.elements.ragStatus.textContent = 'Ready';
731
- this.elements.ragStatus.className = 'font-medium text-slate-400';
732
- }
733
-
734
- // Update chunks and context size
735
- this.elements.ragChunks.textContent = this.state.chunksIndexed;
736
- this.elements.ragContextSize.textContent = this.state.contextSize.toLocaleString() + ' chars';
737
-
738
- // Update model info
739
- const modelMap = {
740
- 'openrouter': 'DeepSeek R1 (Free)',
741
- 'openai': 'GPT-4o-mini'
742
- };
743
- this.elements.ragModel.textContent = this.state.apiKeyValidated ?
744
- modelMap[this.state.provider] || this.state.provider : 'Not set';
745
-
746
- // Update conversation history count
747
- this.elements.ragHistory.textContent = this.state.conversationHistory.length + ' messages';
748
  }
749
 
750
  /**
 
9
  chatContainer: document.getElementById('chat-container'),
10
  statusIndicator: document.getElementById('status-indicator'),
11
  clearContextBtn: document.getElementById('clear-context-btn'),
 
12
  indexContextBtn: document.getElementById('index-context-btn'),
13
  taskSelect: document.getElementById('task-select'),
14
  charCount: document.getElementById('char-count'),
15
  wordCount: document.getElementById('word-count'),
16
+
17
  // API Key elements
18
  apiKeyInput: document.getElementById('api-key-input'),
 
 
 
19
  testApiKeyBtn: document.getElementById('test-api-key'),
20
  saveApiKeyBtn: document.getElementById('save-api-key'),
21
  apiKeyStatus: document.getElementById('api-key-status'),
 
36
  assistantHeader: document.getElementById('assistant-header'),
37
  assistantContent: document.getElementById('assistant-content'),
38
  assistantToggleIcon: document.getElementById('assistant-toggle-icon'),
 
 
 
 
 
 
 
 
 
 
 
 
39
  };
40
 
41
  // Application state
 
46
  apiKeyValidated: false,
47
  isTestingApiKey: false,
48
  userApiKey: '',
49
+ // Collapse states for mobile view
 
 
 
 
50
  apiSectionCollapsed: false,
51
  kbSectionCollapsed: false,
52
  assistantSectionCollapsed: false,
 
62
  this.addEventListeners();
63
  this.loadStoredApiKey();
64
  this.setupResponsiveUI();
65
+
66
  // Show welcome message
67
  this.addMessageToChat(
68
  "👋 **Welcome to ContextIQ!**\n\n" +
69
  "To get started:\n" +
70
+ "1. **Enter your OpenRouter API key** in the configuration section above.\n" +
71
+ "2. **Add your context** by uploading a file or pasting text in the Knowledge Base.\n" +
72
+ "3. **Index the context** and start asking questions!\n\n" +
73
+ "🆓 You can get a free API key from [openrouter.ai](https://openrouter.ai) - no credit card required!",
 
 
74
  'system'
75
  );
76
+
77
  // Initial UI update
78
  this.updateUI();
79
  this.updateContextStats();
 
85
  addEventListeners() {
86
  this.elements.indexContextBtn.addEventListener('click', () => this.handleIndexContext());
87
  this.elements.clearContextBtn.addEventListener('click', () => this.handleClearContext());
 
88
  this.elements.sendButton.addEventListener('click', () => this.handleSubmit());
89
  this.elements.chatInput.addEventListener('keydown', e => {
90
  if (e.key === 'Enter' && !e.shiftKey) {
 
102
  this.updateUI();
103
  });
104
  this.elements.chatInput.addEventListener('input', () => this.autoResizeTextarea(this.elements.chatInput));
105
+
106
  // File input listener
107
  this.elements.fileInput.addEventListener('change', () => this.handleFileSelection());
108
 
109
 
 
 
 
 
 
110
  // API Key listeners
111
  this.elements.testApiKeyBtn.addEventListener('click', (e) => {
112
  e.preventDefault();
 
126
  this.testApiKey();
127
  }
128
  });
129
+
130
  // Toggle listeners for collapsible sections
131
  this.elements.toggleApiSection.addEventListener('click', () => this.toggleSection('api'));
132
  this.elements.kbHeader.addEventListener('click', () => this.toggleSection('kb'));
133
  this.elements.assistantHeader.addEventListener('click', () => this.toggleSection('assistant'));
134
 
 
 
 
 
 
135
  // Listen for window resize to adjust UI
136
  window.addEventListener('resize', () => this.setupResponsiveUI());
137
  }
 
158
  */
159
  setupResponsiveUI() {
160
  const isMobile = window.innerWidth < 1024;
161
+
162
  this.state.kbSectionCollapsed = isMobile;
163
  this.state.assistantSectionCollapsed = false;
164
+
165
  if (this.state.apiKeyValidated) {
166
  this.state.apiSectionCollapsed = true;
167
  }
 
204
  */
205
  loadStoredApiKey() {
206
  try {
207
+ const storedKey = localStorage.getItem('openrouter_api_key');
 
 
208
  if (storedKey) {
209
  this.elements.apiKeyInput.value = storedKey;
210
  this.state.userApiKey = storedKey;
 
 
 
 
 
 
 
 
 
 
 
211
  this.onApiKeyInputChange();
212
  }
213
  } catch (error) {
 
220
  */
221
  onApiKeyInputChange() {
222
  const apiKey = this.elements.apiKeyInput.value.trim();
223
+
 
224
  this.state.apiKeyValidated = false;
225
  this.state.userApiKey = '';
226
+
227
  if (!apiKey) {
228
  this.updateApiKeyStatus('pending', 'Enter API key and click Test');
229
+ } else if (!apiKey.startsWith('sk-or-')) {
230
+ this.updateApiKeyStatus('error', 'Key should start with "sk-or-"');
 
 
231
  } else if (apiKey.length < 40) {
232
  this.updateApiKeyStatus('error', 'API key appears too short');
233
  } else {
 
258
  const response = await fetch('/api/v1/test-api-key', {
259
  method: 'POST',
260
  headers: { 'Content-Type': 'application/json' },
261
+ body: JSON.stringify({ api_key: apiKey }),
 
 
 
262
  signal: controller.signal
263
  });
264
 
 
270
  this.state.apiKeyValidated = true;
271
  this.state.userApiKey = apiKey;
272
  this.updateApiKeyStatus('success', result.message || 'API key is valid');
 
273
  if (!silent) {
274
  this.addMessageToChat("✅ **API Key Validated!** You can now use the assistant.", 'system');
275
  this.state.apiSectionCollapsed = true;
 
285
  console.error('API key test error:', error);
286
  this.state.apiKeyValidated = false;
287
  this.state.userApiKey = '';
288
+
289
  let errorMessage = (error.name === 'AbortError') ? 'Request timed out.' : error.message;
290
  this.updateApiKeyStatus('error', errorMessage);
291
  if (!silent) this.addMessageToChat(`❌ **Connection Error**: ${errorMessage}`, 'system');
 
305
  return;
306
  }
307
  try {
308
+ localStorage.setItem('openrouter_api_key', apiKey);
 
309
  this.updateApiKeyStatus('success', 'API key saved locally!');
310
+ this.addMessageToChat("💾 **API Key Saved!** It will be remembered for future sessions.", 'system');
 
311
  } catch (error) {
312
  console.error('Save error:', error);
313
  this.addMessageToChat("❌ **Save Failed**: Could not save API key to local storage.", 'system');
314
  }
315
  }
316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  /**
318
  * Update API key status display
319
  */
320
  updateApiKeyStatus(status, message) {
321
  const statusEl = this.elements.apiStatusText;
322
  const iconEl = this.elements.apiStatusIcon;
323
+
324
  const statusConfig = {
325
  testing: { icon: 'bg-blue-500 animate-pulse', text: 'Testing...' },
326
  success: { icon: 'bg-green-500', text: 'API Key Valid' },
327
  error: { icon: 'bg-red-500', text: 'API Key Invalid' },
328
  pending: { icon: 'bg-yellow-500', text: 'API Key Pending' },
329
  };
330
+
331
  iconEl.className = `w-3 h-3 ${statusConfig[status].icon} rounded-full flex-shrink-0`;
332
  statusEl.textContent = statusConfig[status].text;
333
 
 
366
  this.handleExecuteTask();
367
  }
368
  }
369
+
370
  /**
371
  * ✨ REFACTORED: Unified logic for indexing from file or text.
372
  */
 
421
  }
422
 
423
  this.state.isIndexed = true;
 
 
424
  this.showStatus(result.message || 'Successfully indexed context.', 'success');
425
 
426
+ // NEW: Populate textarea with extracted text if available
427
  if (result.extracted_text) {
428
  this.elements.contextInput.value = result.extracted_text;
 
429
  this.updateContextStats();
430
  }
431
+
 
 
 
432
  } catch (error) {
433
  console.error('Indexing error:', error);
434
  this.showStatus(`Error: ${error.message}`, 'error');
 
443
  }
444
 
445
  /**
446
+ * Handles sending a user's prompt to the backend for a response.
447
  */
448
  async handleSendPrompt() {
449
  const prompt = this.elements.chatInput.value.trim();
450
  if (prompt.length < 2 || this.state.isGenerating) return;
451
+
452
  if (!this.state.isIndexed) {
453
  this.showStatus('Please index your context before asking questions.', 'error');
454
  return;
455
  }
456
 
 
457
  this.addMessageToChat(prompt, 'user');
 
 
458
  this.elements.chatInput.value = '';
459
  this.autoResizeTextarea(this.elements.chatInput);
460
 
461
  this.state.isGenerating = true;
462
  this.updateUI();
463
+ this.showStatus('AI is thinking...', 'loading');
 
464
 
465
  try {
466
  const controller = new AbortController();
467
+ const timeoutId = setTimeout(() => controller.abort(), 60000);
468
 
 
469
  const response = await fetch('/api/v1/generate', {
470
  method: 'POST',
471
+ headers: {
472
  'Content-Type': 'application/json',
473
  'X-API-Key': this.state.userApiKey
474
  },
475
+ body: JSON.stringify({ prompt }),
 
 
 
476
  signal: controller.signal
477
  });
478
 
 
480
  const result = await response.json();
481
  if (!response.ok) throw new Error(result.detail || 'An unknown error occurred.');
482
 
 
483
  this.addMessageToChat(result.response, 'ai');
 
 
 
 
 
 
 
484
  this.showStatus('Ready for your next question.', 'success');
485
  } catch (error) {
486
  console.error('Generation error:', error);
 
490
  } finally {
491
  this.state.isGenerating = false;
492
  this.updateUI();
 
493
  }
494
  }
495
 
 
523
 
524
  const response = await fetch('/api/v1/task', {
525
  method: 'POST',
526
+ headers: {
527
  'Content-Type': 'application/json',
528
  'X-API-Key': this.state.userApiKey
529
  },
 
553
  */
554
  async handleClearContext() {
555
  this.elements.contextInput.value = '';
556
+ this.elements.fileInput.value = ''; // Also clear the file input
557
  this.elements.fileName.textContent = 'Choose a file...';
558
  this.updateContextStats();
559
  this.state.isIndexed = false;
560
+
 
 
561
  this.showStatus('Clearing knowledge base...', 'loading');
562
 
563
  try {
564
+ await fetch('/api/v1/clear_index', {
565
  method: 'POST',
566
  headers: { 'X-API-Key': this.state.userApiKey }
567
  });
568
  this.showStatus('Knowledge base cleared. Ready for new context.', 'success');
 
 
 
569
  } catch (error) {
570
  console.error('Clear index error:', error);
571
  this.showStatus(`Error clearing index: ${error.message}`, 'error');
572
  } finally {
573
  this.updateUI();
 
 
 
 
 
 
 
 
 
 
 
574
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
  }
576
 
577
  /**
templates/index.html CHANGED
@@ -30,28 +30,28 @@
30
  </script>
31
  <style>
32
  /* General styling for a modern look and feel */
33
- body {
34
- font-family: 'Inter', sans-serif;
35
- background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
36
  overflow-x: hidden; /* Prevent horizontal scroll */
37
  }
38
- .glass-effect {
39
- background: rgba(30, 41, 59, 0.7);
40
- backdrop-filter: blur(12px);
41
- border: 1px solid rgba(148, 163, 184, 0.1);
42
  }
43
- .gradient-text {
44
- background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 50%, #06b6d4 100%);
45
- -webkit-background-clip: text;
46
- -webkit-text-fill-color: transparent;
47
- background-clip: text;
48
  }
49
-
50
  /* Custom scrollbar for a cleaner UI */
51
  .scroll-container::-webkit-scrollbar { width: 6px; }
52
  .scroll-container::-webkit-scrollbar-track { background: transparent; }
53
  .scroll-container::-webkit-scrollbar-thumb { background: #475569; border-radius: 3px; }
54
-
55
  /* Styling for markdown content rendered by marked.js */
56
  .markdown-content { word-wrap: break-word; }
57
  .markdown-content p { margin-bottom: 0.75rem; }
@@ -81,11 +81,11 @@
81
  <svg width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M12 2L2 7l10 5 10-5-10-5zM2 17l10 5 10-5M2 12l10 5 10-5"/></svg>
82
  </div>
83
  <div>
84
- <h1 class="text-xl sm:text-2xl font-bold gradient-text">Context AI by Ab-Romia</h1>
85
  <p class="text-xs sm:text-sm text-slate-400">Abdelrahman Abouroumia...</p>
86
  </div>
87
  </div>
88
-
89
  <div id="api-status" class="flex items-center space-x-2 mt-2 sm:mt-0">
90
  <div id="api-status-icon" class="w-3 h-3 bg-red-500 rounded-full flex-shrink-0"></div>
91
  <span id="api-status-text" class="text-sm text-slate-400">API Key Required</span>
@@ -104,21 +104,13 @@
104
  </svg>
105
  </button>
106
  </div>
107
-
108
  <div id="api-key-content" class="space-y-4 mt-4">
109
- <div class="mb-3">
110
- <label for="provider-select" class="block text-sm font-medium text-slate-300 mb-2">Choose AI Provider:</label>
111
- <select id="provider-select" class="w-full bg-slate-900/50 border border-slate-600/50 rounded-lg p-3 text-slate-200 focus:ring-2 focus:ring-indigo-500 focus:outline-none transition">
112
- <option value="openrouter">OpenRouter (Free & Multiple Models)</option>
113
- <option value="openai">OpenAI (GPT-4, GPT-3.5, etc.)</option>
114
- </select>
115
- </div>
116
-
117
  <div class="flex flex-col sm:flex-row space-y-2 sm:space-y-0 sm:space-x-3">
118
- <input
119
- type="password"
120
- id="api-key-input"
121
- placeholder="Enter your API key here..."
122
  class="flex-1 bg-slate-900/50 border border-slate-600/50 rounded-lg p-3 text-slate-200 placeholder-slate-500 focus:ring-2 focus:ring-indigo-500 focus:outline-none transition"
123
  >
124
  <div class="flex space-x-2">
@@ -130,13 +122,12 @@
130
  </button>
131
  </div>
132
  </div>
133
-
134
- <div id="provider-info" class="text-xs text-slate-400 space-y-1">
135
- <p id="provider-link">• Get your free API key from <a href="https://openrouter.ai/" target="_blank" class="text-indigo-400 hover:text-indigo-300">openrouter.ai</a></p>
136
  <p>• Your API key is stored locally in your browser and never sent to our servers</p>
137
- <p id="provider-models">• OpenRouter provides access to 200+ models including Claude, GPT, Gemini, and more</p>
138
  </div>
139
-
140
  <div id="api-key-status" class="hidden p-3 rounded-lg text-sm"></div>
141
  </div>
142
  </div>
@@ -147,11 +138,11 @@
147
  <h2 class="text-lg font-semibold text-slate-200">📚 Knowledge Base</h2>
148
  <svg id="kb-toggle-icon" class="w-5 h-5 transition-transform duration-300" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M6 9l6 6 6-6"/></svg>
149
  </div>
150
-
151
  <div id="kb-content" class="flex-1 p-4 sm:p-6 flex flex-col lg:flex">
152
  <h2 class="hidden lg:block text-lg font-semibold text-slate-200">📚 Knowledge Base</h2>
153
  <p class="hidden lg:block text-sm text-slate-400 mt-1 mb-4">Provide context for the AI to learn from.</p>
154
-
155
  <div class="mb-4">
156
  <label for="file-input" class="block text-sm font-medium text-slate-300 mb-2">Upload a document:</label>
157
  <div class="flex items-center space-x-2">
@@ -170,7 +161,7 @@
170
  <p class="text-slate-400 italic">Uploading a file will replace the text in the text area below.</p>
171
  </div>
172
  </div>
173
-
174
  <div class="flex-1 flex flex-col">
175
  <textarea id="context-input" class="w-full flex-1 bg-slate-900/50 border border-slate-600/50 rounded-xl p-4 text-slate-200 placeholder-slate-500 focus:ring-2 focus:ring-indigo-500 focus:outline-none resize-none transition scroll-container min-h-[250px] lg:min-h-0" placeholder="... or paste your documents, meeting notes, or any relevant context here"></textarea>
176
  </div>
@@ -206,43 +197,6 @@
206
  <option value="creative">Creative Writing</option>
207
  </select>
208
  </div>
209
-
210
- <!-- RAG System Info Panel -->
211
- <div id="rag-info-panel" class="hidden p-4 sm:p-4 border-b border-slate-600/30 bg-slate-900/30">
212
- <div class="flex items-center justify-between mb-3">
213
- <h3 class="text-sm font-semibold text-indigo-400">📊 RAG System Info</h3>
214
- <button id="toggle-rag-info" class="text-xs text-slate-400 hover:text-slate-300">
215
- <span id="rag-toggle-text">Hide</span>
216
- </button>
217
- </div>
218
- <div id="rag-info-content" class="space-y-2 text-xs text-slate-300">
219
- <div class="flex justify-between">
220
- <span class="text-slate-400">Status:</span>
221
- <span id="rag-status" class="font-medium text-green-400">Ready</span>
222
- </div>
223
- <div class="flex justify-between">
224
- <span class="text-slate-400">Indexed Chunks:</span>
225
- <span id="rag-chunks" class="font-medium">0</span>
226
- </div>
227
- <div class="flex justify-between">
228
- <span class="text-slate-400">Context Size:</span>
229
- <span id="rag-context-size" class="font-medium">0 chars</span>
230
- </div>
231
- <div class="flex justify-between">
232
- <span class="text-slate-400">AI Model:</span>
233
- <span id="rag-model" class="font-medium">Not set</span>
234
- </div>
235
- <div class="flex justify-between">
236
- <span class="text-slate-400">Retrieval Mode:</span>
237
- <span id="rag-retrieval" class="font-medium">Smart (Overlapping)</span>
238
- </div>
239
- <div class="flex justify-between">
240
- <span class="text-slate-400">Conversation:</span>
241
- <span id="rag-history" class="font-medium">0 messages</span>
242
- </div>
243
- </div>
244
- </div>
245
-
246
  <div id="chat-container" class="flex-1 overflow-y-auto scroll-container p-6 space-y-6 min-h-[300px] lg:min-h-0">
247
  </div>
248
  <div class="p-4 sm:p-6 border-t border-slate-600/30">
@@ -253,10 +207,8 @@
253
  </button>
254
  </div>
255
  <div class="flex items-center justify-between mt-3 text-xs text-slate-400 h-5">
256
- <div id="status-indicator" class="hidden"></div>
257
- <button id="clear-history-btn" class="px-3 py-1 text-xs bg-slate-700/50 text-slate-300 rounded-lg hover:bg-slate-600/50 transition-colors" title="Clear conversation history">
258
- 🗑️ Clear History
259
- </button>
260
  </div>
261
  </div>
262
  </div>
 
30
  </script>
31
  <style>
32
  /* General styling for a modern look and feel */
33
+ body {
34
+ font-family: 'Inter', sans-serif;
35
+ background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
36
  overflow-x: hidden; /* Prevent horizontal scroll */
37
  }
38
+ .glass-effect {
39
+ background: rgba(30, 41, 59, 0.7);
40
+ backdrop-filter: blur(12px);
41
+ border: 1px solid rgba(148, 163, 184, 0.1);
42
  }
43
+ .gradient-text {
44
+ background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 50%, #06b6d4 100%);
45
+ -webkit-background-clip: text;
46
+ -webkit-text-fill-color: transparent;
47
+ background-clip: text;
48
  }
49
+
50
  /* Custom scrollbar for a cleaner UI */
51
  .scroll-container::-webkit-scrollbar { width: 6px; }
52
  .scroll-container::-webkit-scrollbar-track { background: transparent; }
53
  .scroll-container::-webkit-scrollbar-thumb { background: #475569; border-radius: 3px; }
54
+
55
  /* Styling for markdown content rendered by marked.js */
56
  .markdown-content { word-wrap: break-word; }
57
  .markdown-content p { margin-bottom: 0.75rem; }
 
81
  <svg width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M12 2L2 7l10 5 10-5-10-5zM2 17l10 5 10-5M2 12l10 5 10-5"/></svg>
82
  </div>
83
  <div>
84
+ <h1 class="text-xl sm:text-2xl font-bold gradient-text">ContextIQ by Ab-Romia</h1>
85
  <p class="text-xs sm:text-sm text-slate-400">Abdelrahman Abouroumia...</p>
86
  </div>
87
  </div>
88
+
89
  <div id="api-status" class="flex items-center space-x-2 mt-2 sm:mt-0">
90
  <div id="api-status-icon" class="w-3 h-3 bg-red-500 rounded-full flex-shrink-0"></div>
91
  <span id="api-status-text" class="text-sm text-slate-400">API Key Required</span>
 
104
  </svg>
105
  </button>
106
  </div>
107
+
108
  <div id="api-key-content" class="space-y-4 mt-4">
 
 
 
 
 
 
 
 
109
  <div class="flex flex-col sm:flex-row space-y-2 sm:space-y-0 sm:space-x-3">
110
+ <input
111
+ type="password"
112
+ id="api-key-input"
113
+ placeholder="sk-or-your-openrouter-api-key-here"
114
  class="flex-1 bg-slate-900/50 border border-slate-600/50 rounded-lg p-3 text-slate-200 placeholder-slate-500 focus:ring-2 focus:ring-indigo-500 focus:outline-none transition"
115
  >
116
  <div class="flex space-x-2">
 
122
  </button>
123
  </div>
124
  </div>
125
+
126
+ <div class="text-xs text-slate-400 space-y-1">
127
+ <p>• Get your free API key from <a href="https://openrouter.ai/" target="_blank" class="text-indigo-400 hover:text-indigo-300">openrouter.ai</a></p>
128
  <p>• Your API key is stored locally in your browser and never sent to our servers</p>
 
129
  </div>
130
+
131
  <div id="api-key-status" class="hidden p-3 rounded-lg text-sm"></div>
132
  </div>
133
  </div>
 
138
  <h2 class="text-lg font-semibold text-slate-200">📚 Knowledge Base</h2>
139
  <svg id="kb-toggle-icon" class="w-5 h-5 transition-transform duration-300" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M6 9l6 6 6-6"/></svg>
140
  </div>
141
+
142
  <div id="kb-content" class="flex-1 p-4 sm:p-6 flex flex-col lg:flex">
143
  <h2 class="hidden lg:block text-lg font-semibold text-slate-200">📚 Knowledge Base</h2>
144
  <p class="hidden lg:block text-sm text-slate-400 mt-1 mb-4">Provide context for the AI to learn from.</p>
145
+
146
  <div class="mb-4">
147
  <label for="file-input" class="block text-sm font-medium text-slate-300 mb-2">Upload a document:</label>
148
  <div class="flex items-center space-x-2">
 
161
  <p class="text-slate-400 italic">Uploading a file will replace the text in the text area below.</p>
162
  </div>
163
  </div>
164
+
165
  <div class="flex-1 flex flex-col">
166
  <textarea id="context-input" class="w-full flex-1 bg-slate-900/50 border border-slate-600/50 rounded-xl p-4 text-slate-200 placeholder-slate-500 focus:ring-2 focus:ring-indigo-500 focus:outline-none resize-none transition scroll-container min-h-[250px] lg:min-h-0" placeholder="... or paste your documents, meeting notes, or any relevant context here"></textarea>
167
  </div>
 
197
  <option value="creative">Creative Writing</option>
198
  </select>
199
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  <div id="chat-container" class="flex-1 overflow-y-auto scroll-container p-6 space-y-6 min-h-[300px] lg:min-h-0">
201
  </div>
202
  <div class="p-4 sm:p-6 border-t border-slate-600/30">
 
207
  </button>
208
  </div>
209
  <div class="flex items-center justify-between mt-3 text-xs text-slate-400 h-5">
210
+ <div id="status-indicator" class="hidden">
211
+ </div>
 
 
212
  </div>
213
  </div>
214
  </div>