brickfrog commited on
Commit
f5605ad
·
verified ·
1 Parent(s): f403842

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -56,21 +56,21 @@ Generate flashcards directly from your terminal with intelligent auto-configurat
56
 
57
  ```bash
58
  # Quick generation (auto-detects best settings)
59
- uv run python -m ankigen_core.cli -p "Basic SQL"
60
 
61
  # Custom settings
62
- uv run python -m ankigen_core.cli -p "React Hooks" \
63
  --topics 5 \
64
  --cards-per-topic 8 \
65
  --output hooks.apkg
66
 
67
  # Export to CSV
68
- uv run python -m ankigen_core.cli -p "Docker basics" \
69
  --format csv \
70
  -o docker.csv
71
 
72
  # Skip confirmation prompt
73
- uv run python -m ankigen_core.cli -p "Python Lists" --no-confirm
74
  ```
75
 
76
  **CLI Options:**
@@ -100,7 +100,7 @@ uv run python -m ankigen_core.cli -p "Python Lists" --no-confirm
100
  ## Project Structure
101
 
102
  - `app.py`: Main Gradio web application
103
- - `ankigen_core/`: Core logic modules
104
  - `cli.py`: Command-line interface
105
  - `agents/`: Agent system implementation
106
  - `card_generator.py`: Card generation orchestration
@@ -123,7 +123,7 @@ uv run python -m ankigen_core.cli -p "Python Lists" --no-confirm
123
 
124
  3. Run with coverage:
125
  ```bash
126
- uv run pytest --cov=ankigen_core tests/
127
  ```
128
 
129
  ## License
 
56
 
57
  ```bash
58
  # Quick generation (auto-detects best settings)
59
+ uv run python -m ankigen.cli -p "Basic SQL"
60
 
61
  # Custom settings
62
+ uv run python -m ankigen.cli -p "React Hooks" \
63
  --topics 5 \
64
  --cards-per-topic 8 \
65
  --output hooks.apkg
66
 
67
  # Export to CSV
68
+ uv run python -m ankigen.cli -p "Docker basics" \
69
  --format csv \
70
  -o docker.csv
71
 
72
  # Skip confirmation prompt
73
+ uv run python -m ankigen.cli -p "Python Lists" --no-confirm
74
  ```
75
 
76
  **CLI Options:**
 
100
  ## Project Structure
101
 
102
  - `app.py`: Main Gradio web application
103
+ - `ankigen/`: Core logic modules
104
  - `cli.py`: Command-line interface
105
  - `agents/`: Agent system implementation
106
  - `card_generator.py`: Card generation orchestration
 
123
 
124
  3. Run with coverage:
125
  ```bash
126
+ uv run pytest --cov=ankigen tests/
127
  ```
128
 
129
  ## License
ankigen/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file marks ankigen as a Python package
ankigen/agents/.env.example ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AnkiGen Agent System Configuration
2
+ # Copy this file to .env and modify as needed
3
+
4
+ # =====================================
5
+ # AGENT OPERATING MODE
6
+ # =====================================
7
+
8
+ # Main operating mode: legacy, agent_only, hybrid, a_b_test
9
+ ANKIGEN_AGENT_MODE=hybrid
10
+
11
+ # A/B testing configuration (only used when mode=a_b_test)
12
+ ANKIGEN_AB_TEST_RATIO=0.5
13
+ ANKIGEN_AB_TEST_USER_HASH=
14
+
15
+ # =====================================
16
+ # GENERATION AGENTS
17
+ # =====================================
18
+
19
+ # Subject Expert Agent - domain-specific card generation
20
+ ANKIGEN_ENABLE_SUBJECT_EXPERT=true
21
+
22
+ # Pedagogical Agent - educational effectiveness review
23
+ ANKIGEN_ENABLE_PEDAGOGICAL_AGENT=false
24
+
25
+ # Content Structuring Agent - formatting and organization
26
+ ANKIGEN_ENABLE_CONTENT_STRUCTURING=false
27
+
28
+ # Generation Coordinator - orchestrates multi-agent workflows
29
+ ANKIGEN_ENABLE_GENERATION_COORDINATOR=false
30
+
31
+ # =====================================
32
+ # JUDGE AGENTS
33
+ # =====================================
34
+
35
+ # Content Accuracy Judge - fact-checking and accuracy
36
+ ANKIGEN_ENABLE_CONTENT_JUDGE=true
37
+
38
+ # Pedagogical Judge - educational effectiveness
39
+ ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE=false
40
+
41
+ # Clarity Judge - communication and readability
42
+ ANKIGEN_ENABLE_CLARITY_JUDGE=false
43
+
44
+ # Technical Judge - code and technical content
45
+ ANKIGEN_ENABLE_TECHNICAL_JUDGE=false
46
+
47
+ # Completeness Judge - quality standards and completeness
48
+ ANKIGEN_ENABLE_COMPLETENESS_JUDGE=false
49
+
50
+ # Judge Coordinator - orchestrates multi-judge workflows
51
+ ANKIGEN_ENABLE_JUDGE_COORDINATOR=false
52
+
53
+ # =====================================
54
+ # ENHANCEMENT AGENTS
55
+ # =====================================
56
+
57
+ # Revision Agent - improves rejected cards
58
+ ANKIGEN_ENABLE_REVISION_AGENT=false
59
+
60
+ # Enhancement Agent - enriches content and metadata
61
+ ANKIGEN_ENABLE_ENHANCEMENT_AGENT=false
62
+
63
+ # =====================================
64
+ # WORKFLOW FEATURES
65
+ # =====================================
66
+
67
+ # Multi-agent generation workflows
68
+ ANKIGEN_ENABLE_MULTI_AGENT_GEN=false
69
+
70
+ # Parallel judge execution
71
+ ANKIGEN_ENABLE_PARALLEL_JUDGING=true
72
+
73
+ # Agent handoff capabilities
74
+ ANKIGEN_ENABLE_AGENT_HANDOFFS=false
75
+
76
+ # Agent tracing and debugging
77
+ ANKIGEN_ENABLE_AGENT_TRACING=true
78
+
79
+ # =====================================
80
+ # PERFORMANCE SETTINGS
81
+ # =====================================
82
+
83
+ # Agent execution timeout (seconds)
84
+ ANKIGEN_AGENT_TIMEOUT=30.0
85
+
86
+ # Maximum retry attempts for failed agents
87
+ ANKIGEN_MAX_AGENT_RETRIES=3
88
+
89
+ # Enable response caching for efficiency
90
+ ANKIGEN_ENABLE_AGENT_CACHING=true
91
+
92
+ # =====================================
93
+ # QUALITY CONTROL
94
+ # =====================================
95
+
96
+ # Minimum judge consensus for card approval (0.0-1.0)
97
+ ANKIGEN_MIN_JUDGE_CONSENSUS=0.6
98
+
99
+ # Maximum revision iterations for rejected cards
100
+ ANKIGEN_MAX_REVISION_ITERATIONS=3
101
+
102
+ # =====================================
103
+ # PRESET CONFIGURATIONS
104
+ # =====================================
105
+
106
+ # Uncomment one of these preset configurations:
107
+
108
+ # MINIMAL SETUP - Single subject expert + content judge
109
+ # ANKIGEN_AGENT_MODE=hybrid
110
+ # ANKIGEN_ENABLE_SUBJECT_EXPERT=true
111
+ # ANKIGEN_ENABLE_CONTENT_JUDGE=true
112
+ # ANKIGEN_ENABLE_AGENT_TRACING=true
113
+
114
+ # QUALITY FOCUSED - Full judge pipeline
115
+ # ANKIGEN_AGENT_MODE=hybrid
116
+ # ANKIGEN_ENABLE_SUBJECT_EXPERT=true
117
+ # ANKIGEN_ENABLE_CONTENT_JUDGE=true
118
+ # ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE=true
119
+ # ANKIGEN_ENABLE_CLARITY_JUDGE=true
120
+ # ANKIGEN_ENABLE_COMPLETENESS_JUDGE=true
121
+ # ANKIGEN_ENABLE_JUDGE_COORDINATOR=true
122
+ # ANKIGEN_ENABLE_PARALLEL_JUDGING=true
123
+ # ANKIGEN_MIN_JUDGE_CONSENSUS=0.7
124
+
125
+ # FULL PIPELINE - All agents enabled
126
+ # ANKIGEN_AGENT_MODE=agent_only
127
+ # ANKIGEN_ENABLE_SUBJECT_EXPERT=true
128
+ # ANKIGEN_ENABLE_PEDAGOGICAL_AGENT=true
129
+ # ANKIGEN_ENABLE_CONTENT_STRUCTURING=true
130
+ # ANKIGEN_ENABLE_GENERATION_COORDINATOR=true
131
+ # ANKIGEN_ENABLE_CONTENT_JUDGE=true
132
+ # ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE=true
133
+ # ANKIGEN_ENABLE_CLARITY_JUDGE=true
134
+ # ANKIGEN_ENABLE_TECHNICAL_JUDGE=true
135
+ # ANKIGEN_ENABLE_COMPLETENESS_JUDGE=true
136
+ # ANKIGEN_ENABLE_JUDGE_COORDINATOR=true
137
+ # ANKIGEN_ENABLE_REVISION_AGENT=true
138
+ # ANKIGEN_ENABLE_ENHANCEMENT_AGENT=true
139
+ # ANKIGEN_ENABLE_PARALLEL_JUDGING=true
140
+ # ANKIGEN_ENABLE_AGENT_HANDOFFS=true
141
+
142
+ # A/B TESTING SETUP - Compare agents vs legacy
143
+ # ANKIGEN_AGENT_MODE=a_b_test
144
+ # ANKIGEN_AB_TEST_RATIO=0.5
145
+ # ANKIGEN_ENABLE_SUBJECT_EXPERT=true
146
+ # ANKIGEN_ENABLE_CONTENT_JUDGE=true
147
+ # ANKIGEN_ENABLE_AGENT_TRACING=true
148
+
149
+ # =====================================
150
+ # MONITORING & DEBUGGING
151
+ # =====================================
152
+
153
+ # Agent metrics persistence directory
154
+ # ANKIGEN_METRICS_DIR=metrics/agents
155
+
156
+ # Agent configuration directory
157
+ # ANKIGEN_CONFIG_DIR=config/agents
158
+
159
+ # Enable detailed debug logging
160
+ # ANKIGEN_DEBUG_MODE=false
161
+
162
+ # =====================================
163
+ # COST OPTIMIZATION
164
+ # =====================================
165
+
166
+ # Model preferences for different agent types
167
+ # ANKIGEN_GENERATION_MODEL=gpt-4o
168
+ # ANKIGEN_JUDGE_MODEL=gpt-4o-mini
169
+ # ANKIGEN_CRITICAL_JUDGE_MODEL=gpt-4o
170
+
171
+ # Token usage limits per request
172
+ # ANKIGEN_MAX_INPUT_TOKENS=4000
173
+ # ANKIGEN_MAX_OUTPUT_TOKENS=2000
174
+
175
+ # =====================================
176
+ # NOTES
177
+ # =====================================
178
+
179
+ # Performance Impact:
180
+ # - Each enabled agent adds processing time and cost
181
+ # - Parallel judging reduces latency but increases concurrent API calls
182
+ # - Caching significantly improves performance for similar requests
183
+
184
+ # Quality vs Speed:
185
+ # - More judges = better quality but slower generation
186
+ # - Agent coordination adds overhead but improves consistency
187
+ # - Enhancement agents provide best quality but highest cost
188
+
189
+ # Recommended Starting Configuration:
190
+ # 1. Start with hybrid mode + subject expert + content judge
191
+ # 2. Enable A/B testing to compare with legacy system
192
+ # 3. Gradually add more agents based on quality needs
193
+ # 4. Monitor metrics and adjust consensus thresholds
194
+
195
+ # Cost Considerations:
196
+ # - Subject Expert: ~2-3x cost of legacy (higher quality)
197
+ # - Judge Pipeline: ~1.5-2x additional cost (significant quality improvement)
198
+ # - Enhancement Pipeline: ~1.2-1.5x additional cost (marginal improvement)
199
+ # - Full pipeline: ~4-6x cost of legacy (maximum quality)
ankigen/agents/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agent system for AnkiGen agentic workflows
2
+
3
+ from .base import BaseAgentWrapper, AgentConfig
4
+ from .generators import SubjectExpertAgent
5
+ from .config import AgentConfigManager
6
+
7
+ __all__ = [
8
+ "BaseAgentWrapper",
9
+ "AgentConfig",
10
+ "SubjectExpertAgent",
11
+ "AgentConfigManager",
12
+ ]
ankigen/agents/base.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base agent wrapper and configuration classes
2
+
3
+ from typing import Dict, Any, Optional, List
4
+ from dataclasses import dataclass
5
+ from pydantic import BaseModel
6
+ import asyncio
7
+ import json
8
+ from openai import AsyncOpenAI
9
+ from agents import Agent, Runner, ModelSettings
10
+
11
+ from ankigen.logging import logger
12
+ from .token_tracker import track_usage_from_agents_sdk
13
+
14
+
15
+ def parse_agent_json_response(response: Any) -> Dict[str, Any]:
16
+ """Parse agent response, handling markdown code blocks if present"""
17
+ if isinstance(response, str):
18
+ # Strip markdown code blocks
19
+ response = response.strip()
20
+ if response.startswith("```json"):
21
+ response = response[7:] # Remove ```json
22
+ if response.startswith("```"):
23
+ response = response[3:] # Remove ```
24
+ if response.endswith("```"):
25
+ response = response[:-3] # Remove trailing ```
26
+ response = response.strip()
27
+
28
+ return json.loads(response)
29
+ else:
30
+ return response
31
+
32
+
33
+ @dataclass
34
+ class AgentConfig:
35
+ """Configuration for individual agents"""
36
+
37
+ name: str
38
+ instructions: str
39
+ model: str = "gpt-5.2"
40
+ reasoning_effort: Optional[str] = None
41
+ temperature: float = 0.7
42
+ max_tokens: Optional[int] = None
43
+ timeout: float = 30.0
44
+ retry_attempts: int = 3
45
+ enable_tracing: bool = True
46
+ custom_prompts: Optional[Dict[str, str]] = None
47
+ output_type: Optional[type] = None # For structured outputs
48
+
49
+ def __post_init__(self):
50
+ if self.custom_prompts is None:
51
+ self.custom_prompts = {}
52
+
53
+
54
+ class BaseAgentWrapper:
55
+ """Base wrapper for OpenAI Agents SDK integration"""
56
+
57
+ def __init__(self, config: AgentConfig, openai_client: AsyncOpenAI):
58
+ self.config = config
59
+ self.openai_client = openai_client
60
+ self.agent = None
61
+ self.runner = None
62
+
63
+ async def initialize(self):
64
+ """Initialize the OpenAI agent with structured output support"""
65
+ try:
66
+ # Set the default OpenAI client for the agents SDK
67
+ from agents import set_default_openai_client
68
+
69
+ set_default_openai_client(self.openai_client, use_for_tracing=False)
70
+
71
+ # Create model settings with temperature and optional reasoning effort
72
+ model_settings_kwargs = {"temperature": self.config.temperature}
73
+ effort = self.config.reasoning_effort
74
+ if effort in ("auto", "", None):
75
+ effort = None
76
+
77
+ # GPT-5.x (not chat-latest) supports reasoning_effort
78
+ if (
79
+ effort
80
+ and self.config.model.startswith("gpt-5")
81
+ and "chat-latest" not in self.config.model
82
+ ):
83
+ from openai.types.shared import Reasoning
84
+
85
+ model_settings_kwargs["reasoning"] = Reasoning(effort=effort)
86
+
87
+ model_settings = ModelSettings(**model_settings_kwargs)
88
+
89
+ # Use clean instructions without JSON formatting hacks
90
+ clean_instructions = self.config.instructions
91
+
92
+ # Create agent with structured output if output_type is provided
93
+ if self.config.output_type:
94
+ self.agent = Agent(
95
+ name=self.config.name,
96
+ instructions=clean_instructions,
97
+ model=self.config.model,
98
+ model_settings=model_settings,
99
+ output_type=self.config.output_type,
100
+ )
101
+ logger.info(
102
+ f"Initialized agent with structured output: {self.config.name} -> {self.config.output_type}"
103
+ )
104
+ else:
105
+ self.agent = Agent(
106
+ name=self.config.name,
107
+ instructions=clean_instructions,
108
+ model=self.config.model,
109
+ model_settings=model_settings,
110
+ )
111
+ logger.info(
112
+ f"Initialized agent (no structured output): {self.config.name}"
113
+ )
114
+
115
+ except Exception as e:
116
+ logger.error(f"Failed to initialize agent {self.config.name}: {e}")
117
+ raise
118
+
119
+ def _enhance_input_with_context(
120
+ self, user_input: str, context: Optional[Dict[str, Any]]
121
+ ) -> str:
122
+ """Add context to user input if provided."""
123
+ if context is None:
124
+ return user_input
125
+ context_str = "\n".join([f"{k}: {v}" for k, v in context.items()])
126
+ return f"{user_input}\n\nContext:\n{context_str}"
127
+
128
+ async def _execute_with_retry(self, enhanced_input: str) -> Any:
129
+ """Execute agent with retry logic on timeout."""
130
+ for attempt in range(self.config.retry_attempts):
131
+ try:
132
+ result = await asyncio.wait_for(
133
+ Runner.run(
134
+ starting_agent=self.agent,
135
+ input=enhanced_input,
136
+ ),
137
+ timeout=self.config.timeout,
138
+ )
139
+ return result
140
+ except asyncio.TimeoutError:
141
+ if attempt < self.config.retry_attempts - 1:
142
+ logger.warning(
143
+ f"Agent {self.config.name} timed out "
144
+ f"(attempt {attempt + 1}/{self.config.retry_attempts}), retrying..."
145
+ )
146
+ continue
147
+ logger.error(
148
+ f"Agent {self.config.name} timed out after {self.config.retry_attempts} attempts"
149
+ )
150
+ raise
151
+ raise RuntimeError("Retry loop exited without result")
152
+
153
+ def _extract_and_track_usage(self, result: Any) -> Dict[str, Any]:
154
+ """Extract usage info from result and track it."""
155
+ total_usage = {
156
+ "input_tokens": 0,
157
+ "output_tokens": 0,
158
+ "total_tokens": 0,
159
+ "requests": 0,
160
+ }
161
+
162
+ if hasattr(result, "raw_responses") and result.raw_responses:
163
+ for response in result.raw_responses:
164
+ if hasattr(response, "usage") and response.usage:
165
+ total_usage["input_tokens"] += response.usage.input_tokens
166
+ total_usage["output_tokens"] += response.usage.output_tokens
167
+ total_usage["total_tokens"] += response.usage.total_tokens
168
+ total_usage["requests"] += response.usage.requests
169
+
170
+ track_usage_from_agents_sdk(total_usage, self.config.model)
171
+ logger.info(f"Agent usage: {total_usage}")
172
+
173
+ return total_usage
174
+
175
+ def _extract_output(self, result: Any) -> Any:
176
+ """Extract final output from agent result."""
177
+ if not (hasattr(result, "new_items") and result.new_items):
178
+ return str(result)
179
+
180
+ from agents.items import ItemHelpers
181
+
182
+ text_output = ItemHelpers.text_message_outputs(result.new_items)
183
+
184
+ if self.config.output_type and self.config.output_type is not str:
185
+ logger.info(
186
+ f"Structured output: {type(text_output)} -> {self.config.output_type}"
187
+ )
188
+
189
+ return text_output
190
+
191
+ async def execute(
192
+ self, user_input: str, context: Optional[Dict[str, Any]] = None
193
+ ) -> tuple[Any, Dict[str, Any]]:
194
+ """Execute the agent with user input and optional context."""
195
+ if not self.agent:
196
+ await self.initialize()
197
+
198
+ if self.agent is None:
199
+ raise ValueError("Agent not initialized")
200
+
201
+ enhanced_input = self._enhance_input_with_context(user_input, context)
202
+
203
+ logger.info(f"Executing agent: {self.config.name}")
204
+ logger.info(f"Input: {enhanced_input[:200]}...")
205
+
206
+ import time
207
+
208
+ start_time = time.time()
209
+
210
+ try:
211
+ result = await self._execute_with_retry(enhanced_input)
212
+ execution_time = time.time() - start_time
213
+ logger.info(f"Agent {self.config.name} executed in {execution_time:.2f}s")
214
+
215
+ total_usage = self._extract_and_track_usage(result)
216
+ output = self._extract_output(result)
217
+
218
+ return output, total_usage
219
+
220
+ except asyncio.TimeoutError:
221
+ logger.error(
222
+ f"Agent {self.config.name} timed out after {self.config.timeout}s"
223
+ )
224
+ raise
225
+ except Exception as e:
226
+ logger.error(f"Agent {self.config.name} execution failed: {e}")
227
+ raise
228
+
229
+ async def handoff_to(
230
+ self, target_agent: "BaseAgentWrapper", context: Dict[str, Any]
231
+ ) -> Any:
232
+ """Hand off execution to another agent with context"""
233
+ logger.info(
234
+ f"Handing off from {self.config.name} to {target_agent.config.name}"
235
+ )
236
+
237
+ # Prepare handoff context
238
+ handoff_context = {
239
+ "from_agent": self.config.name,
240
+ "handoff_reason": context.get("reason", "Standard workflow handoff"),
241
+ **context,
242
+ }
243
+
244
+ # Execute the target agent
245
+ return await target_agent.execute(
246
+ context.get("user_input", "Continue processing"), handoff_context
247
+ )
248
+
249
+
250
+ class AgentResponse(BaseModel):
251
+ """Standard response format for agents"""
252
+
253
+ success: bool
254
+ data: Any
255
+ agent_name: str
256
+ metadata: Dict[str, Any] = {}
257
+ errors: List[str] = []
ankigen/agents/config.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agent configuration management system
2
+
3
+ import json
4
+ from typing import Dict, Any, Optional, List
5
+ from pathlib import Path
6
+ from dataclasses import dataclass, asdict
7
+ from jinja2 import Environment, FileSystemLoader
8
+
9
+ from ankigen.logging import logger
10
+ from .base import AgentConfig
11
+
12
+
13
+ @dataclass
14
+ class AgentPromptTemplate:
15
+ """Template for agent prompts with variables"""
16
+
17
+ system_prompt: str
18
+ user_prompt_template: str
19
+ variables: Optional[Dict[str, str]] = None
20
+
21
+ def __post_init__(self):
22
+ if self.variables is None:
23
+ self.variables = {}
24
+
25
+ def render_system_prompt(self, **kwargs) -> str:
26
+ """Render system prompt with provided variables"""
27
+ try:
28
+ variables = self.variables or {}
29
+ return self.system_prompt.format(**{**variables, **kwargs})
30
+ except KeyError as e:
31
+ logger.error(f"Missing variable in system prompt template: {e}")
32
+ return self.system_prompt
33
+
34
+ def render_user_prompt(self, **kwargs) -> str:
35
+ """Render user prompt template with provided variables"""
36
+ try:
37
+ variables = self.variables or {}
38
+ return self.user_prompt_template.format(**{**variables, **kwargs})
39
+ except KeyError as e:
40
+ logger.error(f"Missing variable in user prompt template: {e}")
41
+ return self.user_prompt_template
42
+
43
+
44
+ class AgentConfigManager:
45
+ """Manages agent configurations using Jinja templates and runtime updates"""
46
+
47
+ def __init__(
48
+ self,
49
+ model_overrides: Optional[Dict[str, str]] = None,
50
+ template_vars: Optional[Dict[str, Any]] = None,
51
+ ):
52
+ self.model_overrides = model_overrides or {}
53
+ self.template_vars = template_vars or {}
54
+ self.configs: Dict[str, AgentConfig] = {}
55
+ self.prompt_templates: Dict[str, AgentPromptTemplate] = {}
56
+
57
+ template_dir = Path(__file__).parent / "templates"
58
+ self.jinja_env = Environment(loader=FileSystemLoader(template_dir))
59
+ self._load_default_configs()
60
+
61
+ def update_models(self, model_overrides: Dict[str, str]):
62
+ """Update model selections and regenerate configs"""
63
+ self.model_overrides = model_overrides
64
+ self._load_default_configs()
65
+ logger.info(f"Updated model overrides: {model_overrides}")
66
+
67
+ def update_template_vars(self, template_vars: Dict[str, Any]):
68
+ logger.info(
69
+ "Template vars are no longer used in the simplified agent pipeline."
70
+ )
71
+
72
+ def _load_default_configs(self):
73
+ """Load all default configurations from Jinja templates"""
74
+ try:
75
+ self._load_configs_from_template("generators.j2")
76
+ self.prompt_templates.clear()
77
+ logger.info(
78
+ f"Loaded {len(self.configs)} agent configurations from Jinja templates"
79
+ )
80
+ except Exception as e:
81
+ logger.error(f"Failed to load agent configurations from templates: {e}")
82
+
83
+ def _get_model_for_agent(self, agent_name: str, default_model: str) -> str:
84
+ """Get model for agent, using override if available"""
85
+ return self.model_overrides.get(agent_name, default_model)
86
+
87
+ def _load_configs_from_template(self, template_name: str):
88
+ """Load agent configurations from a Jinja template"""
89
+ try:
90
+ template = self.jinja_env.get_template(template_name)
91
+
92
+ # Default models for each agent type
93
+ default_models = {
94
+ "subject_expert_model": "gpt-5.2",
95
+ }
96
+
97
+ # Simple mapping: agent_name -> agent_name_model
98
+ model_vars = {}
99
+ for agent_name, model in self.model_overrides.items():
100
+ model_vars[f"{agent_name}_model"] = model
101
+
102
+ # Merge all template variables with defaults
103
+ render_vars = {**default_models, **self.template_vars, **model_vars}
104
+
105
+ logger.info(f"Rendering template {template_name} with vars: {render_vars}")
106
+ rendered_json = template.render(**render_vars)
107
+ config_data = json.loads(rendered_json)
108
+
109
+ # Create AgentConfig objects from the rendered data
110
+ for agent_name, agent_data in config_data.items():
111
+ config = AgentConfig(
112
+ name=agent_data.get("name", agent_name),
113
+ instructions=agent_data.get("instructions", ""),
114
+ model=agent_data.get("model", "gpt-5.2"),
115
+ reasoning_effort=agent_data.get("reasoning_effort"),
116
+ temperature=agent_data.get("temperature", 0.7),
117
+ max_tokens=agent_data.get("max_tokens"),
118
+ timeout=agent_data.get("timeout", 30.0),
119
+ retry_attempts=agent_data.get("retry_attempts", 3),
120
+ enable_tracing=agent_data.get("enable_tracing", True),
121
+ custom_prompts=agent_data.get("custom_prompts", {}),
122
+ )
123
+ self.configs[agent_name] = config
124
+ logger.info(f"Loaded config for {agent_name}: model={config.model}")
125
+
126
+ except Exception as e:
127
+ logger.error(f"Failed to load configs from template {template_name}: {e}")
128
+
129
+ def get_agent_config(self, agent_name: str) -> Optional[AgentConfig]:
130
+ """Get configuration for a specific agent"""
131
+ return self.configs.get(agent_name)
132
+
133
+ def get_config(self, agent_name: str) -> Optional[AgentConfig]:
134
+ """Alias for get_agent_config for compatibility"""
135
+ return self.get_agent_config(agent_name)
136
+
137
+ def get_prompt_template(self, template_name: str) -> Optional[AgentPromptTemplate]:
138
+ """Get a prompt template by name"""
139
+ return self.prompt_templates.get(template_name)
140
+
141
+ def update_agent_config(self, agent_name: str, **kwargs):
142
+ """Update an agent's configuration at runtime"""
143
+ if agent_name in self.configs:
144
+ config = self.configs[agent_name]
145
+ for key, value in kwargs.items():
146
+ if hasattr(config, key):
147
+ setattr(config, key, value)
148
+ logger.info(f"Updated {agent_name} config: {key} = {value}")
149
+
150
+ def update_config(
151
+ self, agent_name: str, updates: Dict[str, Any]
152
+ ) -> Optional[AgentConfig]:
153
+ """Update agent configuration with a dictionary of updates"""
154
+ if agent_name not in self.configs:
155
+ return None
156
+
157
+ config = self.configs[agent_name]
158
+ for key, value in updates.items():
159
+ if hasattr(config, key):
160
+ setattr(config, key, value)
161
+
162
+ return config
163
+
164
+ def list_configs(self) -> List[str]:
165
+ """List all agent configuration names"""
166
+ return list(self.configs.keys())
167
+
168
+ def list_prompt_templates(self) -> List[str]:
169
+ """List all prompt template names"""
170
+ return list(self.prompt_templates.keys())
171
+
172
+ def load_config_from_dict(self, config_dict: Dict[str, Any]):
173
+ """Load configuration from a dictionary"""
174
+ # Load agent configs
175
+ if "agents" in config_dict:
176
+ for agent_name, agent_data in config_dict["agents"].items():
177
+ config = AgentConfig(
178
+ name=agent_name,
179
+ instructions=agent_data.get("instructions", ""),
180
+ model=agent_data.get("model", "gpt-5.2"),
181
+ reasoning_effort=agent_data.get("reasoning_effort"),
182
+ temperature=agent_data.get("temperature", 0.7),
183
+ max_tokens=agent_data.get("max_tokens"),
184
+ timeout=agent_data.get("timeout", 30.0),
185
+ retry_attempts=agent_data.get("retry_attempts", 3),
186
+ enable_tracing=agent_data.get("enable_tracing", True),
187
+ custom_prompts=agent_data.get("custom_prompts", {}),
188
+ )
189
+ self.configs[agent_name] = config
190
+
191
+ # Load prompt templates
192
+ if "prompt_templates" in config_dict:
193
+ for template_name, template_data in config_dict["prompt_templates"].items():
194
+ template = AgentPromptTemplate(
195
+ system_prompt=template_data.get("system_prompt", ""),
196
+ user_prompt_template=template_data.get("user_prompt_template", ""),
197
+ variables=template_data.get("variables", {}),
198
+ )
199
+ self.prompt_templates[template_name] = template
200
+
201
+ def _validate_config(self, config_data: Dict[str, Any]) -> bool:
202
+ """Validate agent configuration data"""
203
+ # Check required fields
204
+ if "name" not in config_data or "instructions" not in config_data:
205
+ return False
206
+
207
+ # Check temperature range
208
+ temperature = config_data.get("temperature", 0.7)
209
+ if not 0.0 <= temperature <= 2.0:
210
+ return False
211
+
212
+ # Check timeout is positive
213
+ timeout = config_data.get("timeout", 30.0)
214
+ if timeout <= 0:
215
+ return False
216
+
217
+ return True
218
+
219
+ def save_config_to_file(self, filename: str, agents: List[str] = None):
220
+ """Save current configurations to a file"""
221
+ # Prepare data structure
222
+ data = {"agents": {}, "prompt_templates": {}}
223
+
224
+ # Add agent configs
225
+ agents_to_save = agents if agents else list(self.configs.keys())
226
+ for agent_name in agents_to_save:
227
+ if agent_name in self.configs:
228
+ config = self.configs[agent_name]
229
+ data["agents"][agent_name] = asdict(config)
230
+
231
+ # Add prompt templates
232
+ for template_name, template in self.prompt_templates.items():
233
+ data["prompt_templates"][template_name] = asdict(template)
234
+
235
+ try:
236
+ with open(filename, "w") as f:
237
+ json.dump(data, f, indent=2)
238
+ logger.info(f"Saved agent configurations to {filename}")
239
+ except Exception as e:
240
+ logger.error(f"Failed to save agent config to {filename}: {e}")
241
+
242
+
243
+ # Global config manager instance
244
+ _global_config_manager: Optional[AgentConfigManager] = None
245
+
246
+
247
+ def get_config_manager(
248
+ model_overrides: Optional[Dict[str, str]] = None,
249
+ template_vars: Optional[Dict[str, Any]] = None,
250
+ ) -> AgentConfigManager:
251
+ """Get the global agent configuration manager"""
252
+ global _global_config_manager
253
+ if _global_config_manager is None:
254
+ _global_config_manager = AgentConfigManager(model_overrides, template_vars)
255
+ else:
256
+ if model_overrides:
257
+ _global_config_manager.update_models(model_overrides)
258
+ if template_vars:
259
+ _global_config_manager.update_template_vars(template_vars)
260
+ return _global_config_manager
ankigen/agents/generators.py ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Specialized generator agents for card generation
2
+
3
+ import json
4
+ from typing import List, Dict, Any, Optional, Tuple
5
+
6
+ from openai import AsyncOpenAI
7
+
8
+ from ankigen.logging import logger
9
+ from ankigen.models import Card, CardFront, CardBack
10
+ from .base import BaseAgentWrapper, AgentConfig
11
+ from .config import get_config_manager
12
+ from .schemas import CardsGenerationSchema
13
+
14
+
15
+ def card_dict_to_card(
16
+ card_data: Dict[str, Any],
17
+ default_topic: str,
18
+ default_subject: str,
19
+ ) -> Card:
20
+ """Convert a dictionary representation of a card into a Card object."""
21
+
22
+ if not isinstance(card_data, dict):
23
+ raise ValueError("Card payload must be a dictionary")
24
+
25
+ front_data = card_data.get("front")
26
+ back_data = card_data.get("back")
27
+
28
+ if not isinstance(front_data, dict) or "question" not in front_data:
29
+ raise ValueError("Card front must include a question field")
30
+ if not isinstance(back_data, dict) or "answer" not in back_data:
31
+ raise ValueError("Card back must include an answer field")
32
+
33
+ metadata = card_data.get("metadata", {}) or {}
34
+ if not isinstance(metadata, dict):
35
+ metadata = {}
36
+
37
+ subject = metadata.get("subject") or default_subject or "general"
38
+ topic = metadata.get("topic") or default_topic or "General Concepts"
39
+
40
+ card = Card(
41
+ card_type=str(card_data.get("card_type", "basic")),
42
+ front=CardFront(question=str(front_data.get("question", ""))),
43
+ back=CardBack(
44
+ answer=str(back_data.get("answer", "")),
45
+ explanation=str(back_data.get("explanation", "")),
46
+ example=str(back_data.get("example", "")),
47
+ ),
48
+ metadata=metadata,
49
+ )
50
+
51
+ if card.metadata is not None:
52
+ card.metadata.setdefault("subject", subject)
53
+ card.metadata.setdefault("topic", topic)
54
+
55
+ return card
56
+
57
+
58
+ class SubjectExpertAgent(BaseAgentWrapper):
59
+ """Subject matter expert agent for domain-specific card generation"""
60
+
61
+ def __init__(self, openai_client: AsyncOpenAI, subject: str = "general"):
62
+ config_manager = get_config_manager()
63
+ base_config = config_manager.get_agent_config("subject_expert")
64
+
65
+ if not base_config:
66
+ raise ValueError(
67
+ "subject_expert configuration not found - agent system not properly initialized"
68
+ )
69
+
70
+ base_config.output_type = CardsGenerationSchema
71
+
72
+ if subject != "general" and base_config.custom_prompts:
73
+ subject_prompt = base_config.custom_prompts.get(subject.lower(), "")
74
+ if subject_prompt:
75
+ base_config.instructions += (
76
+ f"\n\nSubject-specific guidance: {subject_prompt}"
77
+ )
78
+
79
+ super().__init__(base_config, openai_client)
80
+ self.subject = subject
81
+
82
+ def _build_batch_prompt(
83
+ self,
84
+ topic: str,
85
+ cards_in_batch: int,
86
+ batch_num: int,
87
+ context: Optional[Dict[str, Any]],
88
+ previous_topics: List[str],
89
+ ) -> str:
90
+ """Build user input prompt for a batch of cards."""
91
+ user_input = f"Generate {cards_in_batch} flashcards for the topic: {topic}"
92
+
93
+ if context and context.get("generate_cloze"):
94
+ user_input += (
95
+ "\n\nIMPORTANT: Generate a mix of card types including cloze cards. "
96
+ "For code examples, syntax, and fill-in-the-blank concepts, use cloze cards "
97
+ "(card_type='cloze'). Aim for roughly 50% cloze cards when dealing with technical/programming content."
98
+ )
99
+
100
+ if context:
101
+ learning_preferences = context.get("learning_preferences")
102
+ if learning_preferences:
103
+ user_input += f"\n\nLearning focus: {learning_preferences}"
104
+ user_input += f"\n\nAdditional context: {context}"
105
+
106
+ if previous_topics:
107
+ topics_summary = ", ".join(previous_topics[-20:])
108
+ user_input += f"\n\nAvoid creating cards about these already covered topics: {topics_summary}"
109
+
110
+ if batch_num > 1:
111
+ user_input += f"\n\nThis is batch {batch_num} of cards. Ensure these cards cover different aspects of the topic."
112
+
113
+ return user_input
114
+
115
+ def _extract_topics_for_dedup(self, batch_cards: List[Card]) -> List[str]:
116
+ """Extract key terms from card questions for deduplication."""
117
+ topics = []
118
+ for card in batch_cards:
119
+ if hasattr(card, "front") and card.front and card.front.question:
120
+ question_words = card.front.question.lower().split()
121
+ key_terms = [word for word in question_words if len(word) > 3][:3]
122
+ if key_terms:
123
+ topics.append(" ".join(key_terms))
124
+ return topics
125
+
126
+ def _accumulate_usage(
127
+ self, total_usage: Dict[str, int], batch_usage: Optional[Dict[str, Any]]
128
+ ) -> None:
129
+ """Accumulate batch usage into total usage."""
130
+ if batch_usage:
131
+ for key in total_usage:
132
+ total_usage[key] += batch_usage.get(key, 0)
133
+
134
+ async def generate_cards(
135
+ self, topic: str, num_cards: int = 5, context: Optional[Dict[str, Any]] = None
136
+ ) -> List[Card]:
137
+ """Generate flashcards for a given topic with automatic batching."""
138
+ batch_size = 10
139
+ all_cards: List[Card] = []
140
+ total_usage: Dict[str, int] = {
141
+ "total_tokens": 0,
142
+ "input_tokens": 0,
143
+ "output_tokens": 0,
144
+ }
145
+ previous_topics: List[str] = []
146
+
147
+ cards_remaining = num_cards
148
+ batch_num = 1
149
+ num_batches = ((num_cards - 1) // batch_size) + 1
150
+
151
+ logger.info(
152
+ f"Generating {num_cards} cards for '{topic}' using {num_batches} batches"
153
+ )
154
+
155
+ try:
156
+ while cards_remaining > 0:
157
+ cards_in_batch = min(batch_size, cards_remaining)
158
+ logger.info(f"Generating batch {batch_num}: {cards_in_batch} cards")
159
+
160
+ if not self.agent:
161
+ await self.initialize()
162
+
163
+ user_input = self._build_batch_prompt(
164
+ topic, cards_in_batch, batch_num, context, previous_topics
165
+ )
166
+ response, usage = await self.execute(user_input, context)
167
+
168
+ self._accumulate_usage(total_usage, usage)
169
+ batch_cards = self._parse_cards_response(response, topic)
170
+ all_cards.extend(batch_cards)
171
+
172
+ previous_topics.extend(self._extract_topics_for_dedup(batch_cards))
173
+ cards_remaining -= len(batch_cards)
174
+
175
+ logger.info(
176
+ f"Batch {batch_num} generated {len(batch_cards)} cards. {cards_remaining} remaining."
177
+ )
178
+
179
+ if len(batch_cards) == 0:
180
+ logger.warning(f"No cards generated in batch {batch_num}, stopping")
181
+ break
182
+
183
+ batch_num += 1
184
+
185
+ if total_usage.get("total_tokens", 0) > 0:
186
+ logger.info(
187
+ f"Total usage: {total_usage['total_tokens']} tokens "
188
+ f"(Input: {total_usage['input_tokens']}, Output: {total_usage['output_tokens']})"
189
+ )
190
+
191
+ logger.info(
192
+ f"Generated {len(all_cards)} cards across {batch_num} batches for '{topic}'"
193
+ )
194
+ return all_cards
195
+
196
+ except Exception as e:
197
+ logger.error(f"Card generation failed: {e}")
198
+ raise
199
+
200
+ def _build_generation_prompt(
201
+ self,
202
+ topic: str,
203
+ num_cards: int,
204
+ difficulty: str,
205
+ prerequisites: List[str],
206
+ context: Dict[str, Any],
207
+ ) -> str:
208
+ """Build the generation prompt"""
209
+ prerequisites_str = ", ".join(prerequisites) if prerequisites else "None"
210
+
211
+ prompt = f"""Generate {num_cards} high-quality flashcards for the topic: {topic}
212
+
213
+ Subject: {self.subject}
214
+ Difficulty Level: {difficulty}
215
+ Prerequisites: {prerequisites_str}
216
+
217
+ Requirements:
218
+ - Focus on {self.subject} concepts and terminology
219
+ - Ensure technical accuracy and depth appropriate for {difficulty} level
220
+ - Include practical applications and real-world examples
221
+ - Test understanding, not just memorization
222
+ - Use clear, unambiguous questions
223
+
224
+ Return your response as a JSON object with this structure:
225
+ {{
226
+ "cards": [
227
+ {{
228
+ "card_type": "basic",
229
+ "front": {{
230
+ "question": "Clear, specific question"
231
+ }},
232
+ "back": {{
233
+ "answer": "Concise, accurate answer",
234
+ "explanation": "Detailed explanation with reasoning",
235
+ "example": "Practical example or application"
236
+ }},
237
+ "metadata": {{
238
+ "difficulty": "{difficulty}",
239
+ "prerequisites": {json.dumps(prerequisites)},
240
+ "topic": "{topic}",
241
+ "subject": "{self.subject}",
242
+ "learning_outcomes": ["outcome1", "outcome2"],
243
+ "common_misconceptions": ["misconception1"]
244
+ }}
245
+ }}
246
+ ]
247
+ }}"""
248
+
249
+ if context.get("source_text"):
250
+ prompt += f"\n\nBase the cards on this source material:\n{context['source_text'][:2000]}..."
251
+
252
+ return prompt
253
+
254
+ def _parse_cards_response(self, response: Any, topic: str) -> List[Card]:
255
+ """Parse the agent response into Card objects"""
256
+ try:
257
+ # Handle structured output from CardsGenerationSchema
258
+ if hasattr(response, "cards"):
259
+ # Response is already a CardsGenerationSchema object
260
+ logger.info(f"✅ STRUCTURED OUTPUT RECEIVED: {type(response)}")
261
+ card_data_list = response.cards
262
+ elif isinstance(response, dict) and "cards" in response:
263
+ # Response is a dict with cards
264
+ card_data_list = response["cards"]
265
+ elif isinstance(response, str):
266
+ # Fallback: Clean up the response - remove markdown code blocks if present
267
+ response = response.strip()
268
+ if response.startswith("```json"):
269
+ response = response[7:] # Remove ```json
270
+ if response.startswith("```"):
271
+ response = response[3:] # Remove ```
272
+ if response.endswith("```"):
273
+ response = response[:-3] # Remove trailing ```
274
+ response = response.strip()
275
+
276
+ data = json.loads(response)
277
+ if "cards" not in data:
278
+ raise ValueError("Response missing 'cards' field")
279
+ card_data_list = data["cards"]
280
+ else:
281
+ raise ValueError(f"Unexpected response format: {type(response)}")
282
+
283
+ cards = []
284
+ for i, card_data in enumerate(card_data_list):
285
+ try:
286
+ if hasattr(card_data, "dict"):
287
+ payload = card_data.dict()
288
+ elif isinstance(card_data, dict):
289
+ payload = card_data
290
+ else:
291
+ logger.warning(
292
+ f"Skipping card {i}: unsupported payload type {type(card_data)}"
293
+ )
294
+ continue
295
+
296
+ card = card_dict_to_card(payload, topic, self.subject)
297
+ cards.append(card)
298
+
299
+ except Exception as e:
300
+ logger.warning(f"Failed to parse card {i}: {e}")
301
+ continue
302
+
303
+ logger.info(f"✅ PARSED {len(cards)} CARDS FROM STRUCTURED OUTPUT")
304
+ return cards
305
+
306
+ except json.JSONDecodeError as e:
307
+ logger.error(f"💥 JSON DECODE ERROR: {e}")
308
+ logger.error("💥 RAW RESPONSE THAT FAILED TO PARSE:")
309
+ logger.error("---FAILED RESPONSE START---")
310
+ logger.error(f"{response}")
311
+ logger.error("---FAILED RESPONSE END---")
312
+ logger.error(f"💥 RESPONSE TYPE: {type(response)}")
313
+ if isinstance(response, str):
314
+ logger.error(f"💥 RESPONSE LENGTH: {len(response)}")
315
+ logger.error(f"💥 FIRST 200 CHARS: {repr(response[:200])}")
316
+ logger.error(f"💥 LAST 200 CHARS: {repr(response[-200:])}")
317
+ raise ValueError(f"Invalid JSON response from agent: {e}")
318
+ except Exception as e:
319
+ logger.error(f"💥 GENERAL PARSING ERROR: {e}")
320
+ logger.error(f"💥 RESPONSE THAT CAUSED ERROR: {response}")
321
+ raise
322
+
323
+
324
+ class QualityReviewAgent(BaseAgentWrapper):
325
+ """Single-pass quality review agent for lightweight validation and fixes."""
326
+
327
+ def __init__(self, openai_client: AsyncOpenAI, model: str):
328
+ config = AgentConfig(
329
+ name="quality_reviewer",
330
+ instructions=(
331
+ "You are a meticulous flashcard reviewer. Review each card for factual accuracy, clarity,"
332
+ " atomic scope, and answer quality. When needed, revise the card while keeping it concise and"
333
+ " faithful to the original intent. Always respond with a JSON object containing:"
334
+ ' {"approved": bool, "reason": string, "revised_card": object or null}.'
335
+ " The revised card must follow the input schema with fields card_type, front.question,"
336
+ " back.answer/explanation/example, and metadata."
337
+ ),
338
+ model=model,
339
+ temperature=0.2,
340
+ timeout=45.0,
341
+ retry_attempts=2,
342
+ enable_tracing=False,
343
+ )
344
+ super().__init__(config, openai_client)
345
+
346
+ async def review_card(self, card: Card) -> Tuple[Optional[Card], bool, str]:
347
+ """Review a card and optionally return a revised version."""
348
+
349
+ card_payload = {
350
+ "card_type": card.card_type,
351
+ "front": {"question": card.front.question if card.front else ""},
352
+ "back": {
353
+ "answer": card.back.answer if card.back else "",
354
+ "explanation": card.back.explanation if card.back else "",
355
+ "example": card.back.example if card.back else "",
356
+ },
357
+ "metadata": card.metadata or {},
358
+ }
359
+
360
+ user_input = (
361
+ "Review the following flashcard. Approve it if it is accurate, clear, and atomic."
362
+ " If improvements are needed, provide a revised_card with the corrections applied.\n\n"
363
+ "Flashcard JSON:\n"
364
+ f"{json.dumps(card_payload, ensure_ascii=False)}\n\n"
365
+ "Respond with JSON matching this schema:\n"
366
+ '{\n "approved": true | false,\n "reason": "short explanation",\n'
367
+ ' "revised_card": { ... } | null\n}'
368
+ )
369
+
370
+ try:
371
+ response, _ = await self.execute(user_input)
372
+ except Exception as e:
373
+ logger.error(f"Quality review failed to execute: {e}")
374
+ return card, True, "Review failed; keeping original card"
375
+
376
+ try:
377
+ parsed = json.loads(response) if isinstance(response, str) else response
378
+ except Exception as e:
379
+ logger.warning(f"Failed to parse review response as JSON: {e}")
380
+ return card, True, "Reviewer returned invalid JSON; keeping original"
381
+
382
+ approved = bool(parsed.get("approved", True))
383
+ reason = str(parsed.get("reason", ""))
384
+ revised_payload = parsed.get("revised_card")
385
+
386
+ revised_card: Optional[Card] = None
387
+ if isinstance(revised_payload, dict):
388
+ try:
389
+ metadata = revised_payload.get("metadata", {}) or {}
390
+ revised_subject = metadata.get("subject") or (card.metadata or {}).get(
391
+ "subject",
392
+ "general",
393
+ )
394
+ revised_topic = metadata.get("topic") or (card.metadata or {}).get(
395
+ "topic",
396
+ "General Concepts",
397
+ )
398
+ revised_card = card_dict_to_card(
399
+ revised_payload, revised_topic, revised_subject
400
+ )
401
+ except Exception as e:
402
+ logger.warning(f"Failed to build revised card from review payload: {e}")
403
+ revised_card = None
404
+
405
+ return revised_card or card, approved, reason or ""
ankigen/agents/integration.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Main integration module for AnkiGen agent system
2
+
3
+ from typing import List, Dict, Any, Tuple, Optional
4
+ from datetime import datetime
5
+
6
+
7
+ from ankigen.logging import logger
8
+ from ankigen.models import Card
9
+ from ankigen.llm_interface import OpenAIClientManager
10
+ from ankigen.context7 import Context7Client
11
+
12
+ from .generators import SubjectExpertAgent
13
+
14
+
15
+ class AgentOrchestrator:
16
+ """Main orchestrator for the AnkiGen agent system"""
17
+
18
+ def __init__(self, client_manager: OpenAIClientManager):
19
+ self.client_manager = client_manager
20
+ self.openai_client = None
21
+
22
+ self.subject_expert = None
23
+
24
+ async def initialize(
25
+ self,
26
+ api_key: str,
27
+ model_overrides: Dict[str, str] = None,
28
+ reasoning_overrides: Dict[str, Optional[str]] = None,
29
+ ):
30
+ """Initialize the agent system"""
31
+ try:
32
+ # Initialize OpenAI client
33
+ await self.client_manager.initialize_client(api_key)
34
+ self.openai_client = self.client_manager.get_client()
35
+
36
+ # Set up model overrides if provided
37
+ config_manager = None
38
+ if model_overrides:
39
+ from ankigen.agents.config import get_config_manager
40
+
41
+ config_manager = get_config_manager()
42
+ config_manager.update_models(model_overrides)
43
+ logger.info(f"Applied model overrides: {model_overrides}")
44
+
45
+ if reasoning_overrides:
46
+ if config_manager is None:
47
+ from ankigen.agents.config import get_config_manager
48
+
49
+ config_manager = get_config_manager()
50
+ for agent_name, effort in reasoning_overrides.items():
51
+ config_manager.update_agent_config(
52
+ agent_name, reasoning_effort=effort
53
+ )
54
+ logger.info(f"Applied reasoning overrides: {reasoning_overrides}")
55
+
56
+ logger.info("Agent system initialized successfully (simplified pipeline)")
57
+
58
+ except Exception as e:
59
+ logger.error(f"Failed to initialize agent system: {e}")
60
+ raise
61
+
62
+ async def generate_cards_with_agents(
63
+ self,
64
+ topic: str,
65
+ subject: str = "general",
66
+ num_cards: int = 5,
67
+ difficulty: str = "intermediate",
68
+ context: Dict[str, Any] = None,
69
+ library_name: Optional[str] = None,
70
+ library_topic: Optional[str] = None,
71
+ generate_cloze: bool = False,
72
+ topics_list: Optional[List[str]] = None,
73
+ cards_per_topic: int = 8,
74
+ ) -> Tuple[List[Card], Dict[str, Any]]:
75
+ """Generate cards using the agent system.
76
+
77
+ If topics_list is provided, generates cards for each subtopic separately
78
+ to ensure comprehensive coverage. Otherwise falls back to single-topic mode.
79
+ """
80
+ start_time = datetime.now()
81
+
82
+ try:
83
+ if not self.openai_client:
84
+ raise ValueError("Agent system not initialized")
85
+
86
+ # Enhance context with library documentation if requested
87
+ enhanced_context = context or {}
88
+ library_docs = None
89
+
90
+ if library_name:
91
+ library_docs = await self._fetch_library_docs(
92
+ library_name, library_topic, num_cards
93
+ )
94
+ if library_docs:
95
+ enhanced_context["library_documentation"] = library_docs
96
+ enhanced_context["library_name"] = library_name
97
+
98
+ # Generate cards - either per-topic or single-topic mode
99
+ if topics_list and len(topics_list) > 0:
100
+ logger.info(
101
+ f"Starting multi-topic generation: {len(topics_list)} topics, "
102
+ f"{cards_per_topic} cards each for '{topic}'"
103
+ )
104
+ cards = await self._generate_cards_per_topic(
105
+ main_subject=topic,
106
+ subject=subject,
107
+ topics_list=topics_list,
108
+ cards_per_topic=cards_per_topic,
109
+ difficulty=difficulty,
110
+ context=enhanced_context,
111
+ generate_cloze=generate_cloze,
112
+ )
113
+ else:
114
+ # Fallback to single-topic mode
115
+ logger.info(f"Starting single-topic generation: {topic} ({subject})")
116
+ cards = await self._generation_phase(
117
+ topic=topic,
118
+ subject=subject,
119
+ num_cards=num_cards,
120
+ difficulty=difficulty,
121
+ context=enhanced_context,
122
+ generate_cloze=generate_cloze,
123
+ )
124
+
125
+ # Collect metadata
126
+ metadata = {
127
+ "generation_method": "agent_system",
128
+ "generation_time": (datetime.now() - start_time).total_seconds(),
129
+ "cards_generated": len(cards),
130
+ "topic": topic,
131
+ "subject": subject,
132
+ "difficulty": difficulty,
133
+ "library_name": library_name if library_name else None,
134
+ "library_docs_used": bool(library_docs),
135
+ "topics_list": topics_list,
136
+ "multi_topic_mode": topics_list is not None and len(topics_list) > 0,
137
+ }
138
+
139
+ logger.info(
140
+ f"Agent-based generation complete: {len(cards)} cards generated"
141
+ )
142
+ return cards, metadata
143
+
144
+ except Exception as e:
145
+ logger.error(f"Agent-based generation failed: {e}")
146
+ raise
147
+
148
+ async def _fetch_library_docs(
149
+ self, library_name: str, library_topic: Optional[str], num_cards: int
150
+ ) -> Optional[str]:
151
+ """Fetch library documentation from Context7."""
152
+ logger.info(f"Fetching library documentation for: {library_name}")
153
+ try:
154
+ context7_client = Context7Client()
155
+
156
+ # Dynamic token allocation based on card generation needs
157
+ base_tokens = 8000
158
+ if num_cards > 40:
159
+ token_limit = 12000
160
+ elif num_cards > 20:
161
+ token_limit = 10000
162
+ else:
163
+ token_limit = base_tokens
164
+
165
+ if library_topic:
166
+ token_limit = int(token_limit * 0.8)
167
+
168
+ logger.info(
169
+ f"Fetching {token_limit} tokens of documentation"
170
+ + (f" for topic: {library_topic}" if library_topic else "")
171
+ )
172
+
173
+ library_docs = await context7_client.fetch_library_documentation(
174
+ library_name, topic=library_topic, tokens=token_limit
175
+ )
176
+
177
+ if library_docs:
178
+ logger.info(
179
+ f"Added {len(library_docs)} chars of {library_name} documentation to context"
180
+ )
181
+ return library_docs
182
+ else:
183
+ logger.warning(
184
+ f"Could not fetch documentation for library: {library_name}"
185
+ )
186
+ return None
187
+ except Exception as e:
188
+ logger.error(f"Error fetching library documentation: {e}")
189
+ return None
190
+
191
+ async def _generate_cards_per_topic(
192
+ self,
193
+ main_subject: str,
194
+ subject: str,
195
+ topics_list: List[str],
196
+ cards_per_topic: int,
197
+ difficulty: str,
198
+ context: Dict[str, Any],
199
+ generate_cloze: bool,
200
+ ) -> List[Card]:
201
+ """Generate cards for each topic in the topics_list."""
202
+ all_cards: List[Card] = []
203
+ total_topics = len(topics_list)
204
+
205
+ for i, subtopic in enumerate(topics_list):
206
+ topic_num = i + 1
207
+ logger.info(
208
+ f"Generating topic {topic_num}/{total_topics}: {subtopic} "
209
+ f"({cards_per_topic} cards)"
210
+ )
211
+
212
+ # Add topic context
213
+ topic_context = {
214
+ **context,
215
+ "main_subject": main_subject,
216
+ "topic_index": topic_num,
217
+ "total_topics": total_topics,
218
+ "current_subtopic": subtopic,
219
+ }
220
+
221
+ cards = await self._generation_phase(
222
+ topic=subtopic,
223
+ subject=subject,
224
+ num_cards=cards_per_topic,
225
+ difficulty=difficulty,
226
+ context=topic_context,
227
+ generate_cloze=generate_cloze,
228
+ )
229
+
230
+ all_cards.extend(cards)
231
+ logger.info(
232
+ f"Topic {topic_num}/{total_topics} complete: {len(cards)} cards. "
233
+ f"Total: {len(all_cards)}"
234
+ )
235
+
236
+ return all_cards
237
+
238
+ async def _generation_phase(
239
+ self,
240
+ topic: str,
241
+ subject: str,
242
+ num_cards: int,
243
+ difficulty: str,
244
+ context: Dict[str, Any] = None,
245
+ generate_cloze: bool = False,
246
+ ) -> List[Card]:
247
+ """Execute the card generation phase"""
248
+
249
+ if not self.subject_expert or self.subject_expert.subject != subject:
250
+ self.subject_expert = SubjectExpertAgent(self.openai_client, subject)
251
+
252
+ # Add difficulty and cloze preference to context
253
+ if context is None:
254
+ context = {}
255
+ context["difficulty"] = difficulty
256
+ context["generate_cloze"] = generate_cloze
257
+
258
+ cards = await self.subject_expert.generate_cards(
259
+ topic=topic, num_cards=num_cards, context=context
260
+ )
261
+
262
+ logger.info(f"Generation phase complete: {len(cards)} cards generated")
263
+ return cards
264
+
265
+ def get_performance_metrics(self) -> Dict[str, Any]:
266
+ """Get performance metrics for the agent system"""
267
+
268
+ # Basic performance info only
269
+ return {
270
+ "agents_enabled": True,
271
+ }
272
+
273
+
274
+ async def integrate_with_existing_workflow(
275
+ client_manager: OpenAIClientManager, api_key: str, **generation_params
276
+ ) -> Tuple[List[Card], Dict[str, Any]]:
277
+ """Integration point for existing AnkiGen workflow"""
278
+
279
+ # Agents are always enabled
280
+
281
+ # Initialize and use agent system
282
+ orchestrator = AgentOrchestrator(client_manager)
283
+ await orchestrator.initialize(api_key)
284
+
285
+ cards, metadata = await orchestrator.generate_cards_with_agents(**generation_params)
286
+
287
+ return cards, metadata
ankigen/agents/performance.py ADDED
@@ -0,0 +1,475 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Performance optimizations for agent system
2
+
3
+ import asyncio
4
+ import time
5
+ import hashlib
6
+ from typing import Dict, Any, List, Optional, Callable, TypeVar, Generic
7
+ from dataclasses import dataclass, field
8
+ from functools import wraps, lru_cache
9
+ import json
10
+
11
+ from ankigen.logging import logger
12
+ from ankigen.models import Card
13
+
14
+
15
+ T = TypeVar("T")
16
+
17
+
18
+ @dataclass
19
+ class CacheConfig:
20
+ """Configuration for agent response caching"""
21
+
22
+ enable_caching: bool = True
23
+ cache_ttl: int = 3600 # seconds
24
+ max_cache_size: int = 1000
25
+ cache_backend: str = "memory" # "memory" or "file"
26
+ cache_directory: Optional[str] = None
27
+
28
+ def __post_init__(self):
29
+ if self.cache_backend == "file" and not self.cache_directory:
30
+ self.cache_directory = "cache/agents"
31
+
32
+
33
+ @dataclass
34
+ class PerformanceConfig:
35
+ """Configuration for performance optimizations"""
36
+
37
+ enable_batch_processing: bool = True
38
+ max_batch_size: int = 10
39
+ batch_timeout: float = 2.0 # seconds
40
+ enable_parallel_execution: bool = True
41
+ max_concurrent_requests: int = 5
42
+ enable_request_deduplication: bool = True
43
+ enable_response_caching: bool = True
44
+ cache_config: CacheConfig = field(default_factory=CacheConfig)
45
+
46
+
47
+ @dataclass
48
+ class CacheEntry(Generic[T]):
49
+ """Cache entry with metadata"""
50
+
51
+ value: T
52
+ created_at: float
53
+ access_count: int = 0
54
+ last_accessed: float = field(default_factory=time.time)
55
+ cache_key: str = ""
56
+
57
+ def is_expired(self, ttl: int) -> bool:
58
+ """Check if cache entry is expired"""
59
+ return time.time() - self.created_at > ttl
60
+
61
+ def touch(self):
62
+ """Update access metadata"""
63
+ self.access_count += 1
64
+ self.last_accessed = time.time()
65
+
66
+
67
+ class MemoryCache(Generic[T]):
68
+ """In-memory cache with LRU eviction"""
69
+
70
+ def __init__(self, config: CacheConfig):
71
+ self.config = config
72
+ self._cache: Dict[str, CacheEntry[T]] = {}
73
+ self._access_order: List[str] = []
74
+ self._lock = asyncio.Lock()
75
+
76
+ async def get(self, key: str) -> Optional[T]:
77
+ """Get value from cache"""
78
+ async with self._lock:
79
+ entry = self._cache.get(key)
80
+ if not entry:
81
+ return None
82
+
83
+ if entry.is_expired(self.config.cache_ttl):
84
+ await self._remove(key)
85
+ return None
86
+
87
+ entry.touch()
88
+ self._update_access_order(key)
89
+
90
+ logger.debug(f"Cache hit for key: {key[:20]}...")
91
+ return entry.value
92
+
93
+ async def set(self, key: str, value: T) -> None:
94
+ """Set value in cache"""
95
+ async with self._lock:
96
+ # Check if we need to evict entries
97
+ if len(self._cache) >= self.config.max_cache_size:
98
+ await self._evict_lru()
99
+
100
+ entry = CacheEntry(value=value, created_at=time.time(), cache_key=key)
101
+
102
+ self._cache[key] = entry
103
+ self._update_access_order(key)
104
+
105
+ logger.debug(f"Cache set for key: {key[:20]}...")
106
+
107
+ async def remove(self, key: str) -> bool:
108
+ """Remove entry from cache"""
109
+ async with self._lock:
110
+ return await self._remove(key)
111
+
112
+ async def clear(self) -> None:
113
+ """Clear all cache entries"""
114
+ async with self._lock:
115
+ self._cache.clear()
116
+ self._access_order.clear()
117
+ logger.info("Cache cleared")
118
+
119
+ async def _remove(self, key: str) -> bool:
120
+ """Internal remove method"""
121
+ if key in self._cache:
122
+ del self._cache[key]
123
+ if key in self._access_order:
124
+ self._access_order.remove(key)
125
+ return True
126
+ return False
127
+
128
+ async def _evict_lru(self) -> None:
129
+ """Evict least recently used entries"""
130
+ if not self._access_order:
131
+ return
132
+
133
+ # Remove oldest entries
134
+ to_remove = self._access_order[: len(self._access_order) // 4] # Remove 25%
135
+ for key in to_remove:
136
+ await self._remove(key)
137
+
138
+ logger.debug(f"Evicted {len(to_remove)} cache entries")
139
+
140
+ def _update_access_order(self, key: str) -> None:
141
+ """Update access order for LRU tracking"""
142
+ if key in self._access_order:
143
+ self._access_order.remove(key)
144
+ self._access_order.append(key)
145
+
146
+ def get_stats(self) -> Dict[str, Any]:
147
+ """Get cache statistics"""
148
+ total_accesses = sum(entry.access_count for entry in self._cache.values())
149
+ return {
150
+ "entries": len(self._cache),
151
+ "max_size": self.config.max_cache_size,
152
+ "total_accesses": total_accesses,
153
+ "hit_rate": total_accesses / max(1, len(self._cache)),
154
+ }
155
+
156
+
157
+ class BatchProcessor:
158
+ """Batch processor for agent requests"""
159
+
160
+ def __init__(self, config: PerformanceConfig):
161
+ self.config = config
162
+ self._batches: Dict[str, List[Dict[str, Any]]] = {}
163
+ self._batch_timers: Dict[str, asyncio.Task] = {}
164
+ self._lock = asyncio.Lock()
165
+
166
+ async def add_request(
167
+ self, batch_key: str, request_data: Dict[str, Any], processor_func: Callable
168
+ ) -> Any:
169
+ """Add request to batch for processing"""
170
+
171
+ if not self.config.enable_batch_processing:
172
+ # Process immediately if batching is disabled
173
+ return await processor_func([request_data])
174
+
175
+ async with self._lock:
176
+ # Initialize batch if needed
177
+ if batch_key not in self._batches:
178
+ self._batches[batch_key] = []
179
+ self._start_batch_timer(batch_key, processor_func)
180
+
181
+ # Add request to batch
182
+ self._batches[batch_key].append(request_data)
183
+
184
+ # Process immediately if batch is full
185
+ if len(self._batches[batch_key]) >= self.config.max_batch_size:
186
+ return await self._process_batch(batch_key, processor_func)
187
+
188
+ # Wait for timer or batch completion
189
+ return await self._wait_for_batch_result(
190
+ batch_key, request_data, processor_func
191
+ )
192
+
193
+ def _start_batch_timer(self, batch_key: str, processor_func: Callable) -> None:
194
+ """Start timer for batch processing"""
195
+
196
+ async def timer():
197
+ await asyncio.sleep(self.config.batch_timeout)
198
+ async with self._lock:
199
+ if batch_key in self._batches and self._batches[batch_key]:
200
+ await self._process_batch(batch_key, processor_func)
201
+
202
+ self._batch_timers[batch_key] = asyncio.create_task(timer())
203
+
204
+ async def _process_batch(
205
+ self, batch_key: str, processor_func: Callable
206
+ ) -> List[Any]:
207
+ """Process accumulated batch"""
208
+ if batch_key not in self._batches:
209
+ return []
210
+
211
+ batch = self._batches.pop(batch_key)
212
+
213
+ # Cancel timer
214
+ if batch_key in self._batch_timers:
215
+ self._batch_timers[batch_key].cancel()
216
+ del self._batch_timers[batch_key]
217
+
218
+ if not batch:
219
+ return []
220
+
221
+ logger.debug(f"Processing batch {batch_key} with {len(batch)} requests")
222
+
223
+ try:
224
+ # Process the batch
225
+ results = await processor_func(batch)
226
+ return results if isinstance(results, list) else [results]
227
+
228
+ except Exception as e:
229
+ logger.error(f"Batch processing failed for {batch_key}: {e}")
230
+ raise
231
+
232
+ async def _wait_for_batch_result(
233
+ self, batch_key: str, request_data: Dict[str, Any], processor_func: Callable
234
+ ) -> Any:
235
+ """Wait for batch processing to complete"""
236
+ # This is a simplified implementation
237
+ # In a real implementation, you'd use events/conditions to coordinate
238
+ # between requests in the same batch
239
+
240
+ while batch_key in self._batches:
241
+ await asyncio.sleep(0.1)
242
+
243
+ # For now, process individually as fallback
244
+ return await processor_func([request_data])
245
+
246
+
247
+ class RequestDeduplicator:
248
+ """Deduplicates identical agent requests"""
249
+
250
+ def __init__(self):
251
+ self._pending_requests: Dict[str, asyncio.Future] = {}
252
+ self._lock = asyncio.Lock()
253
+
254
+ @lru_cache(maxsize=1000)
255
+ def _generate_request_hash(self, request_data: str) -> str:
256
+ """Generate hash for request deduplication"""
257
+ return hashlib.md5(request_data.encode()).hexdigest()
258
+
259
+ async def deduplicate_request(
260
+ self, request_data: Dict[str, Any], processor_func: Callable
261
+ ) -> Any:
262
+ """Deduplicate and process request"""
263
+
264
+ # Generate hash for deduplication
265
+ request_str = json.dumps(request_data, sort_keys=True)
266
+ request_hash = self._generate_request_hash(request_str)
267
+
268
+ async with self._lock:
269
+ # Check if request is already pending
270
+ if request_hash in self._pending_requests:
271
+ logger.debug(f"Deduplicating request: {request_hash[:16]}...")
272
+ return await self._pending_requests[request_hash]
273
+
274
+ # Create future for this request
275
+ future = asyncio.create_task(
276
+ self._process_unique_request(request_hash, request_data, processor_func)
277
+ )
278
+
279
+ self._pending_requests[request_hash] = future
280
+
281
+ try:
282
+ result = await future
283
+ return result
284
+ finally:
285
+ # Clean up completed request
286
+ async with self._lock:
287
+ self._pending_requests.pop(request_hash, None)
288
+
289
+ async def _process_unique_request(
290
+ self, request_hash: str, request_data: Dict[str, Any], processor_func: Callable
291
+ ) -> Any:
292
+ """Process unique request"""
293
+ logger.debug(f"Processing unique request: {request_hash[:16]}...")
294
+ return await processor_func(request_data)
295
+
296
+
297
+ class PerformanceOptimizer:
298
+ """Main performance optimization coordinator"""
299
+
300
+ def __init__(self, config: PerformanceConfig):
301
+ self.config = config
302
+ self.cache = (
303
+ MemoryCache(config.cache_config) if config.enable_response_caching else None
304
+ )
305
+ self.batch_processor = (
306
+ BatchProcessor(config) if config.enable_batch_processing else None
307
+ )
308
+ self.deduplicator = (
309
+ RequestDeduplicator() if config.enable_request_deduplication else None
310
+ )
311
+ self._semaphore = asyncio.Semaphore(config.max_concurrent_requests)
312
+
313
+ async def optimize_agent_call(
314
+ self,
315
+ agent_name: str,
316
+ request_data: Dict[str, Any],
317
+ processor_func: Callable,
318
+ cache_key_generator: Optional[Callable[[Dict[str, Any]], str]] = None,
319
+ ) -> Any:
320
+ """Optimize agent call with caching, batching, and deduplication"""
321
+
322
+ # Generate cache key
323
+ cache_key = None
324
+ if self.cache and cache_key_generator:
325
+ cache_key = cache_key_generator(request_data)
326
+
327
+ # Check cache first
328
+ cached_result = await self.cache.get(cache_key)
329
+ if cached_result is not None:
330
+ return cached_result
331
+
332
+ # Apply rate limiting
333
+ async with self._semaphore:
334
+ # Apply deduplication
335
+ if self.deduplicator and self.config.enable_request_deduplication:
336
+ result = await self.deduplicator.deduplicate_request(
337
+ request_data, processor_func
338
+ )
339
+ else:
340
+ result = await processor_func(request_data)
341
+
342
+ # Cache result
343
+ if self.cache and cache_key and result is not None:
344
+ await self.cache.set(cache_key, result)
345
+
346
+ return result
347
+
348
+ async def optimize_batch_processing(
349
+ self, batch_key: str, request_data: Dict[str, Any], processor_func: Callable
350
+ ) -> Any:
351
+ """Optimize using batch processing"""
352
+ if self.batch_processor:
353
+ return await self.batch_processor.add_request(
354
+ batch_key, request_data, processor_func
355
+ )
356
+ else:
357
+ return await processor_func([request_data])
358
+
359
+ def get_performance_stats(self) -> Dict[str, Any]:
360
+ """Get performance optimization statistics"""
361
+ stats = {
362
+ "config": {
363
+ "batch_processing": self.config.enable_batch_processing,
364
+ "parallel_execution": self.config.enable_parallel_execution,
365
+ "request_deduplication": self.config.enable_request_deduplication,
366
+ "response_caching": self.config.enable_response_caching,
367
+ },
368
+ "concurrency": {
369
+ "max_concurrent": self.config.max_concurrent_requests,
370
+ "current_available": self._semaphore._value,
371
+ },
372
+ }
373
+
374
+ if self.cache:
375
+ stats["cache"] = self.cache.get_stats()
376
+
377
+ return stats
378
+
379
+
380
+ # Global performance optimizer
381
+ _global_optimizer: Optional[PerformanceOptimizer] = None
382
+
383
+
384
+ def get_performance_optimizer(
385
+ config: Optional[PerformanceConfig] = None,
386
+ ) -> PerformanceOptimizer:
387
+ """Get global performance optimizer instance"""
388
+ global _global_optimizer
389
+ if _global_optimizer is None:
390
+ _global_optimizer = PerformanceOptimizer(config or PerformanceConfig())
391
+ return _global_optimizer
392
+
393
+
394
+ # Decorators for performance optimization
395
+ def cache_response(cache_key_func: Callable[[Any], str], ttl: int = 3600):
396
+ """Decorator to cache function responses"""
397
+
398
+ def decorator(func):
399
+ @wraps(func)
400
+ async def wrapper(*args, **kwargs):
401
+ optimizer = get_performance_optimizer()
402
+ if not optimizer.cache:
403
+ return await func(*args, **kwargs)
404
+
405
+ # Generate cache key
406
+ cache_key = cache_key_func(*args, **kwargs)
407
+
408
+ # Check cache
409
+ cached_result = await optimizer.cache.get(cache_key)
410
+ if cached_result is not None:
411
+ return cached_result
412
+
413
+ # Execute function
414
+ result = await func(*args, **kwargs)
415
+
416
+ # Cache result
417
+ if result is not None:
418
+ await optimizer.cache.set(cache_key, result)
419
+
420
+ return result
421
+
422
+ return wrapper
423
+
424
+ return decorator
425
+
426
+
427
+ def rate_limit(max_concurrent: int = 5):
428
+ """Decorator to apply rate limiting"""
429
+ semaphore = asyncio.Semaphore(max_concurrent)
430
+
431
+ def decorator(func):
432
+ @wraps(func)
433
+ async def wrapper(*args, **kwargs):
434
+ async with semaphore:
435
+ return await func(*args, **kwargs)
436
+
437
+ return wrapper
438
+
439
+ return decorator
440
+
441
+
442
+ # Utility functions for cache key generation
443
+ def generate_card_cache_key(
444
+ topic: str, subject: str, num_cards: int, difficulty: str, **kwargs
445
+ ) -> str:
446
+ """Generate cache key for card generation"""
447
+ key_data = {
448
+ "topic": topic,
449
+ "subject": subject,
450
+ "num_cards": num_cards,
451
+ "difficulty": difficulty,
452
+ "context": kwargs.get("context", {}),
453
+ }
454
+ key_str = json.dumps(key_data, sort_keys=True)
455
+ return f"cards:{hashlib.md5(key_str.encode()).hexdigest()}"
456
+
457
+
458
+ def generate_judgment_cache_key(
459
+ cards: List[Card], judgment_type: str = "general"
460
+ ) -> str:
461
+ """Generate cache key for card judgment"""
462
+ # Use card content to generate stable hash
463
+ card_data = []
464
+ for card in cards:
465
+ card_data.append(
466
+ {
467
+ "question": card.front.question,
468
+ "answer": card.back.answer,
469
+ "type": card.card_type,
470
+ }
471
+ )
472
+
473
+ key_data = {"cards": card_data, "judgment_type": judgment_type}
474
+ key_str = json.dumps(key_data, sort_keys=True)
475
+ return f"judgment:{hashlib.md5(key_str.encode()).hexdigest()}"
ankigen/agents/schemas.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pydantic schemas for structured outputs from agents.
3
+ These schemas ensure type safety and eliminate JSON parsing errors.
4
+ """
5
+
6
+ from typing import List, Dict, Any, Optional
7
+ from pydantic import BaseModel, Field
8
+ from enum import Enum
9
+
10
+
11
+ class DifficultyLevel(str, Enum):
12
+ """Difficulty levels for flashcards"""
13
+
14
+ BEGINNER = "beginner"
15
+ INTERMEDIATE = "intermediate"
16
+ ADVANCED = "advanced"
17
+
18
+
19
+ class CardType(str, Enum):
20
+ """Types of flashcards"""
21
+
22
+ BASIC = "basic"
23
+ CLOZE = "cloze"
24
+
25
+
26
+ class CardFrontSchema(BaseModel):
27
+ """Schema for the front of a flashcard"""
28
+
29
+ question: str = Field(..., description="The question or prompt for the flashcard")
30
+
31
+
32
+ class CardBackSchema(BaseModel):
33
+ """Schema for the back of a flashcard"""
34
+
35
+ answer: str = Field(..., description="The main answer to the question")
36
+ explanation: str = Field(..., description="Detailed explanation of the answer")
37
+ example: str = Field(..., description="A concrete example illustrating the concept")
38
+
39
+
40
+ class CardMetadataSchema(BaseModel):
41
+ """Schema for flashcard metadata"""
42
+
43
+ topic: str = Field(..., description="The main topic of the card")
44
+ subject: str = Field(..., description="The subject area (e.g., Biology, History)")
45
+ difficulty: DifficultyLevel = Field(..., description="The difficulty level")
46
+ tags: Optional[List[str]] = Field(
47
+ None, description="Relevant tags for categorization"
48
+ )
49
+ learning_outcomes: Optional[List[str]] = Field(
50
+ None, description="What the learner should achieve"
51
+ )
52
+ prerequisites: Optional[List[str]] = Field(
53
+ None, description="Required prior knowledge"
54
+ )
55
+ related_concepts: Optional[List[str]] = Field(
56
+ None, description="Related concepts to explore"
57
+ )
58
+ estimated_time: Optional[str] = Field(None, description="Estimated time to learn")
59
+ common_mistakes: Optional[List[str]] = Field(
60
+ None, description="Common mistakes to avoid"
61
+ )
62
+ memory_aids: Optional[List[str]] = Field(
63
+ None, description="Memory aids or mnemonics"
64
+ )
65
+ real_world_applications: Optional[List[str]] = Field(
66
+ None, description="Real-world applications"
67
+ )
68
+
69
+
70
+ class CardSchema(BaseModel):
71
+ """Complete schema for a flashcard"""
72
+
73
+ card_type: CardType = Field(..., description="The type of flashcard")
74
+ front: CardFrontSchema = Field(..., description="The front of the card")
75
+ back: CardBackSchema = Field(..., description="The back of the card")
76
+ metadata: CardMetadataSchema = Field(..., description="Metadata about the card")
77
+ enhancement_notes: Optional[str] = Field(
78
+ None, description="Notes about enhancements made"
79
+ )
80
+
81
+
82
+ class CardsGenerationSchema(BaseModel):
83
+ """Schema for multiple cards generation"""
84
+
85
+ cards: List[CardSchema] = Field(..., description="List of generated flashcards")
86
+
87
+
88
+ class JudgeDecisionSchema(BaseModel):
89
+ """Schema for judge decisions"""
90
+
91
+ approved: bool = Field(..., description="Whether the card is approved")
92
+ score: float = Field(
93
+ ..., ge=0.0, le=1.0, description="Quality score between 0 and 1"
94
+ )
95
+ feedback: str = Field(..., description="Detailed feedback about the card")
96
+ improvements: Optional[List[str]] = Field(
97
+ None, description="Suggested improvements"
98
+ )
99
+ reasoning: str = Field(..., description="Detailed reasoning for the decision")
100
+ confidence: float = Field(
101
+ ..., ge=0.0, le=1.0, description="Confidence in the decision"
102
+ )
103
+ metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
104
+
105
+
106
+ class EnhancementSchema(BaseModel):
107
+ """Schema for card enhancements"""
108
+
109
+ enhanced_card: CardSchema = Field(..., description="The enhanced flashcard")
110
+ enhancement_summary: str = Field(..., description="Summary of what was enhanced")
111
+ enhancement_details: Optional[Dict[str, Any]] = Field(
112
+ None, description="Detailed enhancement information"
113
+ )
114
+
115
+
116
+ class GenerationRequestSchema(BaseModel):
117
+ """Schema for generation requests"""
118
+
119
+ topic: str = Field(..., description="The topic to generate cards for")
120
+ subject: str = Field(..., description="The subject area")
121
+ num_cards: int = Field(..., ge=1, le=20, description="Number of cards to generate")
122
+ difficulty: DifficultyLevel = Field(..., description="Target difficulty level")
123
+ context: Optional[Dict[str, Any]] = Field(None, description="Additional context")
124
+ preferences: Optional[Dict[str, Any]] = Field(None, description="User preferences")
125
+
126
+
127
+ class TokenUsageSchema(BaseModel):
128
+ """Schema for token usage tracking"""
129
+
130
+ prompt_tokens: int = Field(..., ge=0, description="Number of tokens in the prompt")
131
+ completion_tokens: int = Field(
132
+ ..., ge=0, description="Number of tokens in the completion"
133
+ )
134
+ total_tokens: int = Field(..., ge=0, description="Total tokens used")
135
+ estimated_cost: float = Field(..., ge=0.0, description="Estimated cost in USD")
136
+ model: str = Field(..., description="Model used for the request")
137
+
138
+
139
+ class AutoConfigSchema(BaseModel):
140
+ """Schema for auto-configuration based on subject analysis"""
141
+
142
+ # What to search for in Context7
143
+ library_search_term: str = Field(
144
+ ...,
145
+ description="Library name to search for in Context7 (e.g., 'pandas', 'react', 'tensorflow')",
146
+ )
147
+
148
+ # Specific topic within the library (optional)
149
+ documentation_focus: Optional[str] = Field(
150
+ None,
151
+ description="Specific topic/area within the library documentation to focus on",
152
+ )
153
+
154
+ # Suggested settings based on subject analysis
155
+ topic_number: int = Field(
156
+ ..., ge=2, le=20, description="Number of topics to generate (2-20)"
157
+ )
158
+ topics_list: List[str] = Field(
159
+ ...,
160
+ min_length=2,
161
+ max_length=20,
162
+ description="List of distinct subtopics to cover, ordered by learning progression",
163
+ )
164
+ cards_per_topic: int = Field(
165
+ ..., ge=2, le=30, description="Number of cards per topic (2-30)"
166
+ )
167
+ learning_preferences: str = Field(
168
+ ..., description="Learning preferences and focus areas for card generation"
169
+ )
170
+ generate_cloze: bool = Field(
171
+ ...,
172
+ description="Whether to generate cloze cards (true for syntax/code, false for concepts)",
173
+ )
174
+ model_choice: str = Field(
175
+ ...,
176
+ description="Recommended model: 'gpt-5.2-auto', 'gpt-5.2-instant', or 'gpt-5.2-thinking'",
177
+ )
178
+
179
+ # Analysis metadata
180
+ subject_type: str = Field(
181
+ ...,
182
+ description="Type of subject: 'concepts', 'syntax', 'api', 'theory', 'practical'",
183
+ )
184
+ scope: str = Field(
185
+ ..., description="Scope of the subject: 'narrow', 'medium', 'broad'"
186
+ )
187
+ rationale: str = Field(
188
+ ..., description="Brief explanation of why these settings were chosen"
189
+ )
ankigen/agents/security.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Security enhancements for agent system
2
+
3
+ import time
4
+ import hashlib
5
+ import re
6
+ from typing import Dict, Any, Optional, List
7
+ from dataclasses import dataclass, field
8
+ from datetime import datetime
9
+ from collections import defaultdict
10
+ import asyncio
11
+
12
+ from ankigen.logging import logger
13
+
14
+
15
+ @dataclass
16
+ class RateLimitConfig:
17
+ """Configuration for rate limiting"""
18
+
19
+ requests_per_minute: int = 60
20
+ requests_per_hour: int = 1000
21
+ burst_limit: int = 10
22
+ cooldown_period: int = 300 # seconds
23
+
24
+
25
+ @dataclass
26
+ class SecurityConfig:
27
+ """Security configuration for agents"""
28
+
29
+ enable_input_validation: bool = True
30
+ enable_output_filtering: bool = True
31
+ enable_rate_limiting: bool = True
32
+ max_input_length: int = 10000
33
+ max_output_length: int = 50000
34
+ blocked_patterns: List[str] = field(default_factory=list)
35
+ allowed_file_extensions: List[str] = field(
36
+ default_factory=lambda: [".txt", ".md", ".json", ".yaml"]
37
+ )
38
+
39
+ def __post_init__(self):
40
+ if not self.blocked_patterns:
41
+ self.blocked_patterns = [
42
+ r"(?i)(api[_\-]?key|secret|password|token|credential)",
43
+ r"(?i)(sk-[a-zA-Z0-9]{48,})", # OpenAI API key pattern
44
+ r"(?i)(access[_\-]?token)",
45
+ r"(?i)(private[_\-]?key)",
46
+ r"(?i)(<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>)", # Script tags
47
+ r"(?i)(javascript:|data:|vbscript:)", # URL schemes
48
+ ]
49
+
50
+
51
+ class RateLimiter:
52
+ """Rate limiter for API calls and agent executions"""
53
+
54
+ def __init__(self, config: RateLimitConfig):
55
+ self.config = config
56
+ self._requests: Dict[str, List[float]] = defaultdict(list)
57
+ self._locks: Dict[str, asyncio.Lock] = defaultdict(asyncio.Lock)
58
+
59
+ async def check_rate_limit(self, identifier: str) -> bool:
60
+ """Check if request is within rate limits"""
61
+ async with self._locks[identifier]:
62
+ now = time.time()
63
+
64
+ # Clean old requests
65
+ self._requests[identifier] = [
66
+ req_time
67
+ for req_time in self._requests[identifier]
68
+ if now - req_time < 3600 # Keep last hour
69
+ ]
70
+
71
+ recent_requests = self._requests[identifier]
72
+
73
+ # Check burst limit (last minute)
74
+ last_minute = [req for req in recent_requests if now - req < 60]
75
+ if len(last_minute) >= self.config.burst_limit:
76
+ logger.warning(f"Burst limit exceeded for {identifier}")
77
+ return False
78
+
79
+ # Check per-minute limit
80
+ if len(last_minute) >= self.config.requests_per_minute:
81
+ logger.warning(f"Per-minute rate limit exceeded for {identifier}")
82
+ return False
83
+
84
+ # Check per-hour limit
85
+ if len(recent_requests) >= self.config.requests_per_hour:
86
+ logger.warning(f"Per-hour rate limit exceeded for {identifier}")
87
+ return False
88
+
89
+ # Record this request
90
+ self._requests[identifier].append(now)
91
+ return True
92
+
93
+ def get_reset_time(self, identifier: str) -> Optional[datetime]:
94
+ """Get when rate limits will reset for identifier"""
95
+ if identifier not in self._requests:
96
+ return None
97
+
98
+ now = time.time()
99
+ recent_requests = [req for req in self._requests[identifier] if now - req < 60]
100
+
101
+ if len(recent_requests) >= self.config.requests_per_minute:
102
+ oldest_request = min(recent_requests)
103
+ return datetime.fromtimestamp(oldest_request + 60)
104
+
105
+ return None
106
+
107
+
108
+ class SecurityValidator:
109
+ """Security validator for agent inputs and outputs"""
110
+
111
+ def __init__(self, config: SecurityConfig):
112
+ self.config = config
113
+ self._blocked_patterns = [
114
+ re.compile(pattern) for pattern in config.blocked_patterns
115
+ ]
116
+
117
+ def validate_input(self, input_text: str, source: str = "unknown") -> bool:
118
+ """Validate input for security issues"""
119
+ if not self.config.enable_input_validation:
120
+ return True
121
+
122
+ try:
123
+ # Check input length
124
+ if len(input_text) > self.config.max_input_length:
125
+ logger.warning(f"Input too long from {source}: {len(input_text)} chars")
126
+ return False
127
+
128
+ # Check for blocked patterns
129
+ for pattern in self._blocked_patterns:
130
+ if pattern.search(input_text):
131
+ logger.warning(f"Blocked pattern detected in input from {source}")
132
+ return False
133
+
134
+ # Check for suspicious content
135
+ if self._contains_suspicious_content(input_text):
136
+ logger.warning(f"Suspicious content detected in input from {source}")
137
+ return False
138
+
139
+ return True
140
+
141
+ except Exception as e:
142
+ logger.error(f"Error validating input from {source}: {e}")
143
+ return False
144
+
145
+ def validate_output(self, output_text: str, agent_name: str = "unknown") -> bool:
146
+ """Validate output for security issues"""
147
+ if not self.config.enable_output_filtering:
148
+ return True
149
+
150
+ try:
151
+ # Check output length
152
+ if len(output_text) > self.config.max_output_length:
153
+ logger.warning(
154
+ f"Output too long from {agent_name}: {len(output_text)} chars"
155
+ )
156
+ return False
157
+
158
+ # Check for leaked sensitive information
159
+ for pattern in self._blocked_patterns:
160
+ if pattern.search(output_text):
161
+ logger.warning(
162
+ f"Potential data leak detected in output from {agent_name}"
163
+ )
164
+ return False
165
+
166
+ return True
167
+
168
+ except Exception as e:
169
+ logger.error(f"Error validating output from {agent_name}: {e}")
170
+ return False
171
+
172
+ def sanitize_input(self, input_text: str) -> str:
173
+ """Sanitize input by removing potentially dangerous content"""
174
+ try:
175
+ # Remove HTML/XML tags
176
+ sanitized = re.sub(r"<[^>]+>", "", input_text)
177
+
178
+ # Remove suspicious URLs
179
+ sanitized = re.sub(
180
+ r"(?i)(javascript:|data:|vbscript:)[^\s]*", "[URL_REMOVED]", sanitized
181
+ )
182
+
183
+ # Truncate if too long
184
+ if len(sanitized) > self.config.max_input_length:
185
+ sanitized = sanitized[: self.config.max_input_length] + "...[TRUNCATED]"
186
+
187
+ return sanitized
188
+
189
+ except Exception as e:
190
+ logger.error(f"Error sanitizing input: {e}")
191
+ return input_text[:1000] # Return truncated original as fallback
192
+
193
+ def sanitize_output(self, output_text: str) -> str:
194
+ """Sanitize output by removing sensitive information"""
195
+ try:
196
+ sanitized = output_text
197
+
198
+ # Replace potential API keys or secrets
199
+ for pattern in self._blocked_patterns:
200
+ sanitized = pattern.sub("[REDACTED]", sanitized)
201
+
202
+ # Truncate if too long
203
+ if len(sanitized) > self.config.max_output_length:
204
+ sanitized = (
205
+ sanitized[: self.config.max_output_length] + "...[TRUNCATED]"
206
+ )
207
+
208
+ return sanitized
209
+
210
+ except Exception as e:
211
+ logger.error(f"Error sanitizing output: {e}")
212
+ return output_text[:5000] # Return truncated original as fallback
213
+
214
+ def _contains_suspicious_content(self, text: str) -> bool:
215
+ """Check for suspicious content patterns"""
216
+ suspicious_patterns = [
217
+ r"(?i)(\beval\s*\()", # eval() calls
218
+ r"(?i)(\bexec\s*\()", # exec() calls
219
+ r"(?i)(__import__)", # Dynamic imports
220
+ r"(?i)(subprocess|os\.system)", # System commands
221
+ r"(?i)(file://|ftp://)", # File/FTP URLs
222
+ r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", # IP addresses
223
+ ]
224
+
225
+ for pattern in suspicious_patterns:
226
+ if re.search(pattern, text):
227
+ return True
228
+
229
+ return False
230
+
231
+
232
+ class SecureAgentWrapper:
233
+ """Secure wrapper for agent execution with rate limiting and validation"""
234
+
235
+ def __init__(
236
+ self, base_agent, rate_limiter: RateLimiter, validator: SecurityValidator
237
+ ):
238
+ self.base_agent = base_agent
239
+ self.rate_limiter = rate_limiter
240
+ self.validator = validator
241
+ self._identifier = self._generate_identifier()
242
+
243
+ def _generate_identifier(self) -> str:
244
+ """Generate unique identifier for rate limiting"""
245
+ agent_name = getattr(self.base_agent, "config", {}).get("name", "unknown")
246
+ # Include agent name and some randomness for fairness
247
+ return hashlib.md5(f"{agent_name}_{id(self.base_agent)}".encode()).hexdigest()[
248
+ :16
249
+ ]
250
+
251
+ async def secure_execute(
252
+ self, user_input: str, context: Dict[str, Any] = None
253
+ ) -> Any:
254
+ """Execute agent with security checks and rate limiting"""
255
+
256
+ # Rate limiting check
257
+ if not await self.rate_limiter.check_rate_limit(self._identifier):
258
+ reset_time = self.rate_limiter.get_reset_time(self._identifier)
259
+ raise SecurityError(f"Rate limit exceeded. Reset at: {reset_time}")
260
+
261
+ # Input validation
262
+ if not self.validator.validate_input(user_input, self._identifier):
263
+ raise SecurityError("Input validation failed")
264
+
265
+ # Sanitize input
266
+ sanitized_input = self.validator.sanitize_input(user_input)
267
+
268
+ try:
269
+ # Execute the base agent
270
+ result = await self.base_agent.execute(sanitized_input, context)
271
+
272
+ # Validate output
273
+ if isinstance(result, str):
274
+ if not self.validator.validate_output(result, self._identifier):
275
+ raise SecurityError("Output validation failed")
276
+
277
+ # Sanitize output
278
+ result = self.validator.sanitize_output(result)
279
+
280
+ return result
281
+
282
+ except Exception as e:
283
+ logger.error(f"Secure execution failed for {self._identifier}: {e}")
284
+ raise
285
+
286
+
287
+ class SecurityError(Exception):
288
+ """Custom exception for security-related errors"""
289
+
290
+ pass
291
+
292
+
293
+ # Global security components
294
+ _global_rate_limiter: Optional[RateLimiter] = None
295
+ _global_validator: Optional[SecurityValidator] = None
296
+
297
+
298
+ def get_rate_limiter(config: Optional[RateLimitConfig] = None) -> RateLimiter:
299
+ """Get global rate limiter instance"""
300
+ global _global_rate_limiter
301
+ if _global_rate_limiter is None:
302
+ _global_rate_limiter = RateLimiter(config or RateLimitConfig())
303
+ return _global_rate_limiter
304
+
305
+
306
+ def get_security_validator(
307
+ config: Optional[SecurityConfig] = None,
308
+ ) -> SecurityValidator:
309
+ """Get global security validator instance"""
310
+ global _global_validator
311
+ if _global_validator is None:
312
+ _global_validator = SecurityValidator(config or SecurityConfig())
313
+ return _global_validator
314
+
315
+
316
+ def create_secure_agent(
317
+ base_agent,
318
+ rate_config: Optional[RateLimitConfig] = None,
319
+ security_config: Optional[SecurityConfig] = None,
320
+ ) -> SecureAgentWrapper:
321
+ """Create a secure wrapper for an agent"""
322
+ rate_limiter = get_rate_limiter(rate_config)
323
+ validator = get_security_validator(security_config)
324
+ return SecureAgentWrapper(base_agent, rate_limiter, validator)
325
+
326
+
327
+ # Configuration file permissions utility
328
+ def set_secure_file_permissions(file_path: str):
329
+ """Set secure permissions for configuration files"""
330
+ try:
331
+ import os
332
+ import stat
333
+
334
+ # Set read/write for owner only (0o600)
335
+ os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR)
336
+ logger.info(f"Set secure permissions for {file_path}")
337
+
338
+ except Exception as e:
339
+ logger.warning(f"Could not set secure permissions for {file_path}: {e}")
340
+
341
+
342
+ # Input validation utilities
343
+ def strip_html_tags(text: str) -> str:
344
+ """Strip HTML tags from text (improved version)"""
345
+ import html
346
+
347
+ # Decode HTML entities first
348
+ text = html.unescape(text)
349
+
350
+ # Remove HTML/XML tags
351
+ text = re.sub(r"<[^>]+>", "", text)
352
+
353
+ # Remove remaining HTML entities
354
+ text = re.sub(r"&[a-zA-Z0-9#]+;", "", text)
355
+
356
+ # Clean up whitespace
357
+ text = re.sub(r"\s+", " ", text).strip()
358
+
359
+ return text
360
+
361
+
362
+ def validate_api_key_format(api_key: str) -> bool:
363
+ """Validate OpenAI API key format without logging it"""
364
+ if not api_key:
365
+ return False
366
+
367
+ # Check basic format (starts with sk- and has correct length)
368
+ if not api_key.startswith("sk-"):
369
+ return False
370
+
371
+ if len(api_key) < 20: # Minimum reasonable length
372
+ return False
373
+
374
+ # Check for obvious fake keys
375
+ fake_patterns = ["test", "fake", "demo", "example", "placeholder"]
376
+ lower_key = api_key.lower()
377
+ if any(pattern in lower_key for pattern in fake_patterns):
378
+ return False
379
+
380
+ return True
381
+
382
+
383
+ # Logging security
384
+ def sanitize_for_logging(text: str, max_length: int = 100) -> str:
385
+ """Sanitize text for safe logging"""
386
+ if not text:
387
+ return "[EMPTY]"
388
+
389
+ # Remove potential secrets
390
+ validator = get_security_validator()
391
+ sanitized = validator.sanitize_output(text)
392
+
393
+ # Truncate for logging
394
+ if len(sanitized) > max_length:
395
+ sanitized = sanitized[:max_length] + "...[TRUNCATED]"
396
+
397
+ return sanitized
ankigen/agents/templates/generators.j2 ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# Generator agent configuration template #}
2
+ {
3
+ "subject_expert": {
4
+ "name": "subject_expert",
5
+ "instructions": "You are an expert in {{ subject | default('the subject area') }} with deep pedagogical knowledge. \nYour role is to generate flashcards that demonstrate mastery of {{ subject | default('the subject') }} concepts.\n\nKey responsibilities:\n- Create ATOMIC cards: short answers (1-9 words) when possible; keep explanations/examples brief; split complex info into multiple simple cards\n- Use clear, plain prompts without fancy formatting or unusual words\n- Design prompts that match real-life recall or decision points\n- For basic cards, keep to-be-learned info on the back; prompts are cues, not answers\n- For cloze cards, the deletion is the target; keep cloze sentences natural and minimal\n- Prefer why/how/when/if-then prompts and contrast pairs like A vs B\n- Include a few explain-to-a-non-expert/PM prompts to force clarity\n- Favor retrieval hooks and levers (failure modes, knobs) over proofs or derivations\n- Avoid long equations, derivations, and multi-paragraph cards\n- Ensure technical accuracy and depth appropriate for the target level\n- Use domain-specific terminology correctly\n- Connect concepts to prerequisite knowledge\n\nCard Types:\n- Basic cards (card_type='basic'): Standard Q&A format for concepts and facts\n- Cloze cards (card_type='cloze'): Fill-in-the-blank format using {{ '{{c1::answer}}' }} syntax for code, syntax, formulas\n\nFor cloze cards, wrap the answer in {{ '{{c1::text}}' }} format. Example: 'The vLLM class for inference is {{ '{{c1::LLM}}' }}'\n\nPrioritize atomic simplicity - break complex info into multiple simple cards. Generate cards that test understanding through simple, direct recall.",
6
+ "model": "{{ subject_expert_model }}",
7
+ "temperature": 0.7,
8
+ "timeout": 120.0,
9
+ "custom_prompts": {
10
+ "math": "Focus on problem-solving strategies and mathematical reasoning",
11
+ "science": "Emphasize experimental design and scientific method",
12
+ "history": "Connect events to broader historical patterns and causation",
13
+ "programming": "Include executable examples and best practices"
14
+ }
15
+ }
16
+ }
ankigen/agents/token_tracker.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Token usage tracking for OpenAI API calls using tiktoken.
3
+ Provides accurate token counting and cost estimation.
4
+ """
5
+
6
+ import tiktoken
7
+ from typing import Dict, List, Any, Optional
8
+ from dataclasses import dataclass, field
9
+ from datetime import datetime
10
+
11
+ from ankigen.logging import logger
12
+
13
+
14
+ @dataclass
15
+ class TokenUsage:
16
+ """Track token usage for a single request"""
17
+
18
+ prompt_tokens: int
19
+ completion_tokens: int
20
+ total_tokens: int
21
+ estimated_cost: Optional[float]
22
+ model: str
23
+ timestamp: datetime = field(default_factory=datetime.now)
24
+
25
+
26
+ class TokenTracker:
27
+ """Track token usage across multiple requests"""
28
+
29
+ def __init__(self):
30
+ self.usage_history: List[TokenUsage] = []
31
+ self.total_cost = 0.0
32
+ self.total_tokens = 0
33
+
34
+ def count_tokens_for_messages(
35
+ self, messages: List[Dict[str, str]], model: str
36
+ ) -> int:
37
+ """
38
+ Count total tokens for a list of chat messages using tiktoken.
39
+
40
+ Implements OpenAI's token counting algorithm for chat completions:
41
+ - Each message adds 3 tokens for role/content/structure overhead
42
+ - Message names add an additional token
43
+ - The entire message list adds 3 tokens for conversation wrapper
44
+
45
+ The encoding is selected based on the model:
46
+ - Attempts to use model-specific encoding via tiktoken
47
+ - Falls back to 'o200k_base' (GPT-4 Turbo encoding) for unknown models
48
+
49
+ Args:
50
+ messages: List of message dicts (each with 'role', 'content', optional 'name')
51
+ model: OpenAI model identifier (e.g., 'gpt-5.2', 'gpt-4o')
52
+
53
+ Returns:
54
+ Total tokens required to send these messages to the model
55
+ """
56
+ try:
57
+ encoding = tiktoken.encoding_for_model(model)
58
+ except KeyError:
59
+ encoding = tiktoken.get_encoding("o200k_base")
60
+
61
+ tokens_per_message = 3
62
+ tokens_per_name = 1
63
+
64
+ num_tokens = 0
65
+ for message in messages:
66
+ num_tokens += tokens_per_message
67
+ for key, value in message.items():
68
+ num_tokens += len(encoding.encode(str(value)))
69
+ if key == "name":
70
+ num_tokens += tokens_per_name
71
+
72
+ num_tokens += 3
73
+ return num_tokens
74
+
75
+ def count_tokens_for_text(self, text: str, model: str) -> int:
76
+ try:
77
+ encoding = tiktoken.encoding_for_model(model)
78
+ except KeyError:
79
+ encoding = tiktoken.get_encoding("o200k_base")
80
+
81
+ return len(encoding.encode(text))
82
+
83
+ def track_usage_from_response(
84
+ self, response_data, model: str
85
+ ) -> Optional[TokenUsage]:
86
+ try:
87
+ if hasattr(response_data, "usage"):
88
+ usage = response_data.usage
89
+ prompt_tokens = usage.prompt_tokens
90
+ completion_tokens = usage.completion_tokens
91
+
92
+ actual_cost = None
93
+ if hasattr(usage, "total_cost"):
94
+ actual_cost = usage.total_cost
95
+ elif hasattr(usage, "cost"):
96
+ actual_cost = usage.cost
97
+
98
+ return self.track_usage(
99
+ prompt_tokens, completion_tokens, model, actual_cost
100
+ )
101
+ return None
102
+ except Exception as e:
103
+ logger.error(f"Failed to track usage from response: {e}")
104
+ return None
105
+
106
+ def track_usage(
107
+ self,
108
+ prompt_tokens: int,
109
+ completion_tokens: int,
110
+ model: str,
111
+ actual_cost: Optional[float] = None,
112
+ ) -> TokenUsage:
113
+ total_tokens = prompt_tokens + completion_tokens
114
+
115
+ final_cost = actual_cost # Cost estimation removed - rely on API-provided costs
116
+
117
+ usage = TokenUsage(
118
+ prompt_tokens=prompt_tokens,
119
+ completion_tokens=completion_tokens,
120
+ total_tokens=total_tokens,
121
+ estimated_cost=final_cost,
122
+ model=model,
123
+ )
124
+
125
+ self.usage_history.append(usage)
126
+ if final_cost:
127
+ self.total_cost += final_cost
128
+ self.total_tokens += total_tokens
129
+
130
+ logger.info(
131
+ f"💰 Token usage - Model: {model}, Prompt: {prompt_tokens}, Completion: {completion_tokens}, Cost: ${final_cost:.4f}"
132
+ if final_cost
133
+ else f"💰 Token usage - Model: {model}, Prompt: {prompt_tokens}, Completion: {completion_tokens}"
134
+ )
135
+
136
+ return usage
137
+
138
+ def get_session_summary(self) -> Dict[str, Any]:
139
+ if not self.usage_history:
140
+ return {
141
+ "total_requests": 0,
142
+ "total_tokens": 0,
143
+ "total_cost": 0.0,
144
+ "by_model": {},
145
+ }
146
+
147
+ by_model = {}
148
+ for usage in self.usage_history:
149
+ if usage.model not in by_model:
150
+ by_model[usage.model] = {"requests": 0, "tokens": 0, "cost": 0.0}
151
+ by_model[usage.model]["requests"] += 1
152
+ by_model[usage.model]["tokens"] += usage.total_tokens
153
+ if usage.estimated_cost:
154
+ by_model[usage.model]["cost"] += usage.estimated_cost
155
+
156
+ return {
157
+ "total_requests": len(self.usage_history),
158
+ "total_tokens": self.total_tokens,
159
+ "total_cost": self.total_cost,
160
+ "by_model": by_model,
161
+ }
162
+
163
+ def get_session_usage(self) -> Dict[str, Any]:
164
+ return self.get_session_summary()
165
+
166
+ def reset_session(self):
167
+ self.usage_history.clear()
168
+ self.total_cost = 0.0
169
+ self.total_tokens = 0
170
+ logger.info("🔄 Token usage tracking reset")
171
+
172
+ def track_usage_from_agents_sdk(
173
+ self, usage_dict: Dict[str, Any], model: str
174
+ ) -> Optional[TokenUsage]:
175
+ """Track usage from OpenAI Agents SDK usage format"""
176
+ try:
177
+ if not usage_dict or usage_dict.get("total_tokens", 0) == 0:
178
+ return None
179
+
180
+ prompt_tokens = usage_dict.get("input_tokens", 0)
181
+ completion_tokens = usage_dict.get("output_tokens", 0)
182
+
183
+ return self.track_usage(prompt_tokens, completion_tokens, model)
184
+ except Exception as e:
185
+ logger.error(f"Failed to track usage from agents SDK: {e}")
186
+ return None
187
+
188
+
189
+ # Global token tracker instance
190
+ _global_tracker = TokenTracker()
191
+
192
+
193
+ def get_token_tracker() -> TokenTracker:
194
+ return _global_tracker
195
+
196
+
197
+ def track_agent_usage(
198
+ prompt_text: str,
199
+ completion_text: str,
200
+ model: str,
201
+ actual_cost: Optional[float] = None,
202
+ ) -> TokenUsage:
203
+ tracker = get_token_tracker()
204
+
205
+ prompt_tokens = tracker.count_tokens_for_text(prompt_text, model)
206
+ completion_tokens = tracker.count_tokens_for_text(completion_text, model)
207
+
208
+ return tracker.track_usage(prompt_tokens, completion_tokens, model, actual_cost)
209
+
210
+
211
+ def track_usage_from_openai_response(response_data, model: str) -> Optional[TokenUsage]:
212
+ tracker = get_token_tracker()
213
+ return tracker.track_usage_from_response(response_data, model)
214
+
215
+
216
+ def track_usage_from_agents_sdk(
217
+ usage_dict: Dict[str, Any], model: str
218
+ ) -> Optional[TokenUsage]:
219
+ """Track usage from OpenAI Agents SDK usage format"""
220
+ tracker = get_token_tracker()
221
+ return tracker.track_usage_from_agents_sdk(usage_dict, model)
ankigen/auto_config.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Auto-configuration service for intelligent settings population"""
2
+
3
+ from typing import Dict, Any
4
+ from openai import AsyncOpenAI
5
+
6
+ from ankigen.logging import logger
7
+ from ankigen.context7 import Context7Client
8
+ from ankigen.agents.schemas import AutoConfigSchema
9
+ from ankigen.llm_interface import structured_agent_call
10
+
11
+
12
+ class AutoConfigService:
13
+ """Service for analyzing subjects and auto-configuring flashcard generation settings"""
14
+
15
+ def __init__(self):
16
+ self.context7_client = Context7Client()
17
+
18
+ async def analyze_subject(
19
+ self,
20
+ subject: str,
21
+ openai_client: AsyncOpenAI,
22
+ target_topic_count: int | None = None,
23
+ ) -> AutoConfigSchema:
24
+ """Analyze a subject string and return configuration settings.
25
+
26
+ Args:
27
+ subject: The subject to analyze
28
+ openai_client: OpenAI client for LLM calls
29
+ target_topic_count: If provided, forces exactly this many topics in decomposition
30
+ """
31
+
32
+ # Build topic count instruction if override provided
33
+ topic_count_instruction = ""
34
+ if target_topic_count is not None:
35
+ topic_count_instruction = f"""
36
+ IMPORTANT OVERRIDE: The user has requested exactly {target_topic_count} topics.
37
+ You MUST set topic_number to {target_topic_count} and provide exactly {target_topic_count} items in topics_list.
38
+ Choose the {target_topic_count} most important/foundational subtopics for this subject.
39
+ """
40
+
41
+ system_prompt = f"""You are an educational content analyzer specializing in spaced repetition learning. Analyze the given subject and determine flashcard generation settings that focus on ESSENTIAL concepts.
42
+ {topic_count_instruction}
43
+
44
+ CRITICAL PRINCIPLE: Quality over quantity. Focus on fundamental concepts that unlock understanding, not trivial facts.
45
+
46
+ Consider:
47
+ 1. Extract any library/framework names for Context7 search (e.g., "pandas", "react", "tensorflow")
48
+ 2. IMPORTANT: Extract the specific documentation focus from the subject
49
+ - "Basic Pandas Dataframe" → documentation_focus: "dataframe basics, creation, indexing"
50
+ - "React hooks tutorial" → documentation_focus: "hooks, useState, useEffect"
51
+ - "Docker networking" → documentation_focus: "networking, network drivers, container communication"
52
+ 3. Identify the scope: narrow (specific feature), medium (several related topics), broad (full overview)
53
+ 4. Determine content type: concepts (theory/understanding), syntax (code/commands), api (library usage), practical (hands-on skills)
54
+ 5. TOPIC DECOMPOSITION: Break down the subject into distinct subtopics that together provide comprehensive coverage
55
+ 6. Recommend cloze cards for syntax/code, basic cards for concepts
56
+ 7. Choose model based on complexity: gpt-5.2-thinking for complex topics, gpt-5.2-instant for basic/simple, gpt-5.2-auto for mixed scope
57
+ - Valid model_choice values: "gpt-5.2-auto", "gpt-5.2-instant", "gpt-5.2-thinking"
58
+
59
+ TOPIC DECOMPOSITION (topics_list):
60
+ You MUST provide a topics_list - a list of distinct subtopics that together cover the subject comprehensively.
61
+ - Each topic should be specific and non-overlapping
62
+ - Order topics from foundational to advanced (learning progression)
63
+ - The number of topics should match topic_number
64
+
65
+ Examples:
66
+ - "React Hooks" → topics_list: ["useState fundamentals", "useEffect and lifecycle", "useRef and useContext", "custom hooks patterns", "performance with useMemo/useCallback", "testing hooks"]
67
+ - "Docker basics" → topics_list: ["containers vs VMs", "images and Dockerfile", "container lifecycle", "volumes and persistence", "networking fundamentals", "docker-compose basics"]
68
+ - "Machine Learning" → topics_list: ["supervised vs unsupervised", "regression models", "classification models", "model evaluation metrics", "overfitting and regularization", "feature engineering", "cross-validation"]
69
+
70
+ IMPORTANT - Focus on HIGH-VALUE topics:
71
+ - GOOD topics: Core concepts, fundamental principles, mental models, design patterns, key abstractions
72
+ - AVOID topics: Trivial commands (like "docker ps"), basic syntax that's easily googled, minor API details
73
+
74
+ Guidelines for settings (MINIMUM 30 cards total):
75
+ - Narrow/specific scope: 4-5 essential topics with 8-10 cards each (32-50 cards)
76
+ - Medium scope: 5-7 core topics with 7-9 cards each (35-63 cards)
77
+ - Broad scope: 6-8 fundamental topics with 6-8 cards each (36-64 cards)
78
+ - "Basic"/"Introduction" keywords: Start with fundamentals, 40-50 cards total
79
+ - "Complex" keywords: Deep dive into critical concepts, 45-60 cards
80
+
81
+ Learning preference suggestions:
82
+ - For basics: "Focus on fundamental concepts and mental models that form the foundation"
83
+ - For practical: "Emphasize core patterns and principles with real-world applications"
84
+ - For theory: "Build deep conceptual understanding with progressive complexity"
85
+
86
+ Return a JSON object matching the AutoConfigSchema."""
87
+
88
+ user_prompt = f"""Analyze this subject for flashcard generation: "{subject}"
89
+
90
+ Extract:
91
+ 1. The library name if mentioned
92
+ 2. The specific documentation focus (what aspects of the library to focus on)
93
+ 3. Suggested settings for effective learning
94
+
95
+ Provide a brief rationale for your choices."""
96
+
97
+ try:
98
+ config = await structured_agent_call(
99
+ openai_client=openai_client,
100
+ model="gpt-5.2",
101
+ instructions=system_prompt,
102
+ user_input=user_prompt,
103
+ output_type=AutoConfigSchema,
104
+ temperature=0.3, # Lower temperature for more consistent analysis
105
+ )
106
+
107
+ logger.info(
108
+ f"Subject analysis complete: library='{config.library_search_term}', "
109
+ f"topics={config.topic_number}, cards/topic={config.cards_per_topic}"
110
+ )
111
+ return config
112
+
113
+ except Exception as e:
114
+ logger.error(f"Failed to analyze subject: {e}")
115
+ # Return sensible defaults on error (still aim for good card count)
116
+ # Use the subject as a single topic as fallback
117
+ return AutoConfigSchema(
118
+ library_search_term="",
119
+ documentation_focus=None,
120
+ topic_number=6,
121
+ topics_list=[
122
+ f"{subject} - fundamentals",
123
+ f"{subject} - core concepts",
124
+ f"{subject} - practical applications",
125
+ f"{subject} - common patterns",
126
+ f"{subject} - best practices",
127
+ f"{subject} - advanced topics",
128
+ ],
129
+ cards_per_topic=8,
130
+ learning_preferences="Focus on fundamental concepts and core principles with practical examples",
131
+ generate_cloze=False,
132
+ model_choice="gpt-5.2-auto",
133
+ subject_type="concepts",
134
+ scope="medium",
135
+ rationale="Using default settings due to analysis error",
136
+ )
137
+
138
+ async def auto_configure(
139
+ self,
140
+ subject: str,
141
+ openai_client: AsyncOpenAI,
142
+ target_topic_count: int | None = None,
143
+ ) -> Dict[str, Any]:
144
+ """
145
+ Complete auto-configuration pipeline:
146
+ 1. Analyze subject with AI
147
+ 2. Search Context7 for library if detected
148
+ 3. Return complete configuration for UI
149
+
150
+ Args:
151
+ subject: The subject to analyze
152
+ openai_client: OpenAI client for LLM calls
153
+ target_topic_count: If provided, forces exactly this many topics
154
+ """
155
+
156
+ if not subject or not subject.strip():
157
+ logger.warning("Empty subject provided to auto_configure")
158
+ return {}
159
+
160
+ logger.info(f"Starting auto-configuration for subject: '{subject}'")
161
+
162
+ # Step 1: Analyze the subject
163
+ config = await self.analyze_subject(
164
+ subject, openai_client, target_topic_count=target_topic_count
165
+ )
166
+
167
+ # Step 2: Search Context7 for library if one was detected
168
+ library_id = None
169
+ if config.library_search_term:
170
+ logger.info(
171
+ f"Searching Context7 for library: '{config.library_search_term}'"
172
+ )
173
+ try:
174
+ library_id = await self.context7_client.resolve_library_id(
175
+ config.library_search_term
176
+ )
177
+ if library_id:
178
+ logger.info(f"Resolved library to Context7 ID: {library_id}")
179
+ else:
180
+ logger.warning(
181
+ f"Could not find library '{config.library_search_term}' in Context7"
182
+ )
183
+ except Exception as e:
184
+ logger.error(f"Context7 search failed: {e}")
185
+
186
+ # Step 3: Build complete configuration dict for UI
187
+ ui_config = {
188
+ "library_name": config.library_search_term if library_id else "",
189
+ "library_topic": config.documentation_focus or "",
190
+ "topic_number": config.topic_number,
191
+ "topics_list": config.topics_list,
192
+ "cards_per_topic": config.cards_per_topic,
193
+ "preference_prompt": config.learning_preferences,
194
+ "generate_cloze_checkbox": config.generate_cloze,
195
+ "model_choice": config.model_choice,
196
+ # Metadata for display
197
+ "analysis_metadata": {
198
+ "subject_type": config.subject_type,
199
+ "scope": config.scope,
200
+ "rationale": config.rationale,
201
+ "library_found": library_id is not None,
202
+ "context7_id": library_id,
203
+ },
204
+ }
205
+
206
+ logger.info(
207
+ f"Auto-configuration complete: library={'found' if library_id else 'not found'}, "
208
+ f"topics={config.topic_number}, model={config.model_choice}"
209
+ )
210
+
211
+ return ui_config
ankigen/card_generator.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Module for core card generation logic
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+ from typing import List, Dict, Any
6
+
7
+ # Imports from our core modules
8
+ from ankigen.utils import (
9
+ get_logger,
10
+ ResponseCache,
11
+ strip_html_tags,
12
+ )
13
+ from ankigen.llm_interface import OpenAIClientManager
14
+ from ankigen.models import (
15
+ Card,
16
+ ) # Import necessary Pydantic models
17
+
18
+ # Import agent system - required
19
+ from ankigen.agents.integration import AgentOrchestrator
20
+ from agents import set_tracing_disabled
21
+
22
+ logger = get_logger()
23
+
24
+ # Disable tracing to prevent metrics persistence issues
25
+ set_tracing_disabled(True)
26
+
27
+ AGENTS_AVAILABLE = True
28
+ logger.info("Agent system loaded successfully")
29
+
30
+ # --- Constants --- (Moved from app.py)
31
+ AVAILABLE_MODELS = [
32
+ {
33
+ "value": "gpt-5.2-auto",
34
+ "label": "GPT-5.2 Auto",
35
+ "description": "Adaptive reasoning",
36
+ },
37
+ {
38
+ "value": "gpt-5.2-instant",
39
+ "label": "GPT-5.2 Instant",
40
+ "description": "Fast, minimal reasoning",
41
+ },
42
+ {
43
+ "value": "gpt-5.2-thinking",
44
+ "label": "GPT-5.2 Thinking",
45
+ "description": "Higher reasoning effort",
46
+ },
47
+ ]
48
+
49
+ GENERATION_MODES = [
50
+ {
51
+ "value": "subject",
52
+ "label": "Single Subject",
53
+ "description": "Generate cards for a specific topic",
54
+ },
55
+ ]
56
+
57
+ # --- Core Functions --- (Moved and adapted from app.py)
58
+
59
+
60
+ # Legacy functions removed - all card generation now handled by agent system
61
+
62
+
63
+ def _parse_model_selection(model_selection: str) -> tuple[str, str | None]:
64
+ """Parse model selection into model name and reasoning effort."""
65
+ if not model_selection:
66
+ return "gpt-5.2", None
67
+
68
+ normalized = model_selection.strip().lower()
69
+ if normalized == "gpt-5.2-auto":
70
+ return "gpt-5.2", None
71
+ if normalized == "gpt-5.2-instant":
72
+ return "gpt-5.2", "none"
73
+ if normalized == "gpt-5.2-thinking":
74
+ return "gpt-5.2", "high"
75
+
76
+ if "gpt-5.2" in normalized:
77
+ if "instant" in normalized:
78
+ return "gpt-5.2", "none"
79
+ if "thinking" in normalized:
80
+ return "gpt-5.2", "high"
81
+ if "auto" in normalized:
82
+ return "gpt-5.2", None
83
+ return "gpt-5.2", None
84
+
85
+ # Fallback for direct model names
86
+ return model_selection, None
87
+
88
+
89
+ def _map_generation_mode_to_subject(generation_mode: str, subject: str) -> str:
90
+ """Map UI generation mode to agent subject."""
91
+ if generation_mode == "subject":
92
+ return subject if subject else "general"
93
+ elif generation_mode == "path":
94
+ return "curriculum_design"
95
+ elif generation_mode == "text":
96
+ return "content_analysis"
97
+ return "general"
98
+
99
+
100
+ def _build_generation_context(generation_mode: str, source_text: str) -> Dict[str, Any]:
101
+ """Build context dict for card generation."""
102
+ context: Dict[str, Any] = {}
103
+ if generation_mode == "text" and source_text:
104
+ context["source_text"] = source_text
105
+ return context
106
+
107
+
108
+ def _get_token_usage_html(token_tracker) -> str:
109
+ """Extract token usage and format as HTML."""
110
+ try:
111
+ if hasattr(token_tracker, "get_session_summary"):
112
+ token_usage = token_tracker.get_session_summary()
113
+ elif hasattr(token_tracker, "get_session_usage"):
114
+ token_usage = token_tracker.get_session_usage()
115
+ else:
116
+ raise AttributeError("TokenTracker has no session summary method")
117
+
118
+ return f"<div style='margin-top: 8px;'><b>Token Usage:</b> {token_usage['total_tokens']} tokens</div>"
119
+ except Exception as e:
120
+ logger.error(f"Token usage collection failed: {e}")
121
+ return "<div style='margin-top: 8px;'><b>Token Usage:</b> No usage data</div>"
122
+
123
+
124
+ def _format_cards_to_dataframe(
125
+ agent_cards: List[Card], subject: str
126
+ ) -> tuple[pd.DataFrame, str]:
127
+ """Format agent cards to DataFrame and generate message."""
128
+ formatted_cards = format_cards_for_dataframe(
129
+ agent_cards,
130
+ topic_name=subject if subject else "General",
131
+ start_index=1,
132
+ )
133
+ output_df = pd.DataFrame(formatted_cards, columns=get_dataframe_columns())
134
+ total_cards_message = f"<div><b>Cards Generated:</b> <span id='total-cards-count'>{len(output_df)}</span></div>"
135
+ return output_df, total_cards_message
136
+
137
+
138
+ async def orchestrate_card_generation(
139
+ client_manager: OpenAIClientManager,
140
+ cache: ResponseCache,
141
+ api_key_input: str,
142
+ subject: str,
143
+ generation_mode: str,
144
+ source_text: str,
145
+ url_input: str,
146
+ model_name: str,
147
+ topic_number: int,
148
+ cards_per_topic: int,
149
+ preference_prompt: str,
150
+ generate_cloze: bool,
151
+ use_llm_judge: bool = False,
152
+ library_name: str = None,
153
+ library_topic: str = None,
154
+ topics_list: List[str] = None,
155
+ ):
156
+ """Orchestrates the card generation process based on UI inputs."""
157
+ logger.info(f"Starting card generation orchestration in {generation_mode} mode")
158
+ logger.debug(
159
+ f"Parameters: mode={generation_mode}, topics={topic_number}, "
160
+ f"cards_per_topic={cards_per_topic}, cloze={generate_cloze}"
161
+ )
162
+
163
+ if not AGENTS_AVAILABLE:
164
+ logger.error("Agent system is required but not available")
165
+ gr.Error("Agent system is required but not available")
166
+ return pd.DataFrame(columns=get_dataframe_columns()), "Agent system error", ""
167
+
168
+ try:
169
+ from ankigen.agents.token_tracker import get_token_tracker
170
+
171
+ token_tracker = get_token_tracker()
172
+ orchestrator = AgentOrchestrator(client_manager)
173
+
174
+ model_name_resolved, reasoning_effort = _parse_model_selection(model_name)
175
+ logger.info(f"Using {model_name_resolved} for SubjectExpertAgent")
176
+ await orchestrator.initialize(
177
+ api_key_input,
178
+ {"subject_expert": model_name_resolved},
179
+ {"subject_expert": reasoning_effort},
180
+ )
181
+
182
+ agent_subject = _map_generation_mode_to_subject(generation_mode, subject)
183
+ context = _build_generation_context(generation_mode, source_text)
184
+ if preference_prompt:
185
+ context["learning_preferences"] = preference_prompt
186
+ total_cards_needed = topic_number * cards_per_topic
187
+
188
+ agent_cards, agent_metadata = await orchestrator.generate_cards_with_agents(
189
+ topic=subject if subject else "Mixed Topics",
190
+ subject=agent_subject,
191
+ num_cards=total_cards_needed,
192
+ difficulty="intermediate",
193
+ context=context,
194
+ library_name=library_name,
195
+ library_topic=library_topic,
196
+ generate_cloze=generate_cloze,
197
+ topics_list=topics_list,
198
+ cards_per_topic=cards_per_topic,
199
+ )
200
+
201
+ token_usage_html = _get_token_usage_html(token_tracker)
202
+
203
+ if agent_cards:
204
+ output_df, total_cards_message = _format_cards_to_dataframe(
205
+ agent_cards, subject
206
+ )
207
+ logger.info(f"Agent system generated {len(output_df)} cards successfully")
208
+ return output_df, total_cards_message, token_usage_html
209
+
210
+ logger.error("Agent system returned no cards")
211
+ gr.Error("Agent system returned no cards")
212
+ return (
213
+ pd.DataFrame(columns=get_dataframe_columns()),
214
+ "Agent system returned no cards.",
215
+ "",
216
+ )
217
+
218
+ except Exception as e:
219
+ logger.error(f"Agent system failed: {e}")
220
+ gr.Error(f"Agent system error: {str(e)}")
221
+ return (
222
+ pd.DataFrame(columns=get_dataframe_columns()),
223
+ f"Agent system error: {str(e)}",
224
+ "",
225
+ )
226
+
227
+
228
+ # Legacy helper functions removed - all processing now handled by agent system
229
+
230
+
231
+ # --- Formatting and Utility Functions --- (Moved and adapted)
232
+ def format_cards_for_dataframe(
233
+ cards: list[Card], topic_name: str, topic_index: int = 0, start_index: int = 1
234
+ ) -> list:
235
+ """Formats a list of Card objects into a list of dictionaries for DataFrame display.
236
+ Ensures all data is plain text.
237
+ """
238
+ formatted_cards = []
239
+ for i, card_obj in enumerate(cards):
240
+ actual_index = start_index + i
241
+ card_type = card_obj.card_type or "basic"
242
+ question = card_obj.front.question or ""
243
+ answer = card_obj.back.answer or ""
244
+ explanation = card_obj.back.explanation or ""
245
+ example = card_obj.back.example or ""
246
+
247
+ # Metadata processing
248
+ metadata = card_obj.metadata or {}
249
+ prerequisites = metadata.get("prerequisites", [])
250
+ learning_outcomes = metadata.get("learning_outcomes", [])
251
+ difficulty = metadata.get("difficulty", "N/A")
252
+ # Ensure list-based metadata are joined as plain strings for DataFrame
253
+ prerequisites_str = strip_html_tags(
254
+ ", ".join(prerequisites)
255
+ if isinstance(prerequisites, list)
256
+ else str(prerequisites)
257
+ )
258
+ learning_outcomes_str = strip_html_tags(
259
+ ", ".join(learning_outcomes)
260
+ if isinstance(learning_outcomes, list)
261
+ else str(learning_outcomes)
262
+ )
263
+ difficulty_str = strip_html_tags(str(difficulty))
264
+
265
+ formatted_card = {
266
+ "Index": (
267
+ f"{topic_index}.{actual_index}"
268
+ if topic_index > 0
269
+ else str(actual_index)
270
+ ),
271
+ "Topic": strip_html_tags(topic_name), # Ensure topic is also plain
272
+ "Card_Type": strip_html_tags(card_type),
273
+ "Question": question, # Already stripped during Card object creation
274
+ "Answer": answer, # Already stripped
275
+ "Explanation": explanation, # Already stripped
276
+ "Example": example, # Already stripped
277
+ "Prerequisites": prerequisites_str,
278
+ "Learning_Outcomes": learning_outcomes_str,
279
+ "Difficulty": difficulty_str, # Ensure difficulty is plain text
280
+ "Source_URL": strip_html_tags(
281
+ metadata.get("source_url", "")
282
+ ), # Ensure Source_URL is plain
283
+ }
284
+ formatted_cards.append(formatted_card)
285
+ return formatted_cards
286
+
287
+
288
+ def get_dataframe_columns() -> list[str]:
289
+ """Returns the standard list of columns for the Anki card DataFrame."""
290
+ return [
291
+ "Index",
292
+ "Topic",
293
+ "Card_Type",
294
+ "Question",
295
+ "Answer",
296
+ "Explanation",
297
+ "Example",
298
+ "Prerequisites",
299
+ "Learning_Outcomes",
300
+ "Difficulty",
301
+ "Source_URL",
302
+ ]
303
+
304
+
305
+ def generate_token_usage_html(token_usage=None):
306
+ """Generate HTML for token usage display"""
307
+ if token_usage and isinstance(token_usage, dict):
308
+ total_tokens = token_usage.get("total_tokens", 0)
309
+ return f"<div style='margin-top: 8px;'><b>Token Usage:</b> {total_tokens} tokens</div>"
310
+ else:
311
+ return "<div style='margin-top: 8px;'><b>Token Usage:</b> No usage data</div>"
ankigen/cli.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """CLI interface for AnkiGen - Generate Anki flashcards from the command line"""
2
+
3
+ import asyncio
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ import click
10
+ import pandas as pd
11
+ from rich.console import Console
12
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
13
+ from rich.table import Table
14
+ from rich.panel import Panel
15
+
16
+ from ankigen.agents.token_tracker import get_token_tracker
17
+ from ankigen.auto_config import AutoConfigService
18
+ from ankigen.card_generator import orchestrate_card_generation
19
+ from ankigen.exporters import export_dataframe_to_apkg, export_dataframe_to_csv
20
+ from ankigen.llm_interface import OpenAIClientManager
21
+ from ankigen.utils import ResponseCache, get_logger
22
+
23
+ console = Console()
24
+ logger = get_logger()
25
+
26
+
27
+ def get_api_key() -> str:
28
+ """Get OpenAI API key from env or prompt user"""
29
+ api_key = os.getenv("OPENAI_API_KEY")
30
+
31
+ if not api_key:
32
+ console.print("[yellow]OpenAI API key not found in environment[/yellow]")
33
+ api_key = click.prompt("Enter your OpenAI API key", hide_input=True)
34
+
35
+ return api_key
36
+
37
+
38
+ async def auto_configure_from_prompt(
39
+ prompt: str,
40
+ api_key: str,
41
+ override_topics: Optional[int] = None,
42
+ override_cards: Optional[int] = None,
43
+ override_model: Optional[str] = None,
44
+ ) -> dict:
45
+ """Auto-configure settings from a prompt using AI analysis"""
46
+
47
+ with Progress(
48
+ SpinnerColumn(),
49
+ TextColumn("[progress.description]{task.description}"),
50
+ console=console,
51
+ ) as progress:
52
+ progress.add_task("Analyzing subject...", total=None)
53
+
54
+ # Initialize client
55
+ client_manager = OpenAIClientManager()
56
+ await client_manager.initialize_client(api_key)
57
+ openai_client = client_manager.get_client()
58
+
59
+ # Get auto-config (pass topic count override so LLM decomposes correctly)
60
+ auto_config_service = AutoConfigService()
61
+ config = await auto_config_service.auto_configure(
62
+ prompt, openai_client, target_topic_count=override_topics
63
+ )
64
+
65
+ # Apply remaining overrides (topics already handled in auto_configure)
66
+ if override_cards is not None:
67
+ config["cards_per_topic"] = override_cards
68
+ if override_model is not None:
69
+ config["model_choice"] = override_model
70
+
71
+ # Display configuration
72
+ table = Table(
73
+ title="Auto-Configuration", show_header=True, header_style="bold cyan"
74
+ )
75
+ table.add_column("Setting", style="dim")
76
+ table.add_column("Value", style="green")
77
+
78
+ table.add_row("Topics", str(config.get("topic_number", "N/A")))
79
+ table.add_row("Cards per Topic", str(config.get("cards_per_topic", "N/A")))
80
+ table.add_row(
81
+ "Total Cards",
82
+ str(config.get("topic_number", 0) * config.get("cards_per_topic", 0)),
83
+ )
84
+ table.add_row("Model", config.get("model_choice", "N/A"))
85
+
86
+ if config.get("library_name"):
87
+ table.add_row("Library", config.get("library_name"))
88
+ if config.get("library_topic"):
89
+ table.add_row("Library Topic", config.get("library_topic"))
90
+
91
+ # Display discovered topics
92
+ if config.get("topics_list"):
93
+ topics = config["topics_list"]
94
+ # Show first few topics, indicate if there are more
95
+ if len(topics) <= 4:
96
+ topics_str = ", ".join(topics)
97
+ else:
98
+ topics_str = ", ".join(topics[:3]) + f", ... (+{len(topics) - 3} more)"
99
+ table.add_row("Subtopics", topics_str)
100
+
101
+ if config.get("preference_prompt"):
102
+ table.add_row(
103
+ "Learning Focus", config.get("preference_prompt", "")[:50] + "..."
104
+ )
105
+
106
+ console.print(table)
107
+
108
+ return config
109
+
110
+
111
+ async def generate_cards_from_config(
112
+ prompt: str,
113
+ config: dict,
114
+ api_key: str,
115
+ ) -> tuple:
116
+ """Generate cards using the configuration"""
117
+
118
+ client_manager = OpenAIClientManager()
119
+ response_cache = ResponseCache()
120
+
121
+ with Progress(
122
+ SpinnerColumn(),
123
+ TextColumn("[progress.description]{task.description}"),
124
+ BarColumn(),
125
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
126
+ console=console,
127
+ ) as progress:
128
+ task = progress.add_task(
129
+ f"Generating {config['topic_number'] * config['cards_per_topic']} cards...",
130
+ total=100,
131
+ )
132
+
133
+ # Generate cards
134
+ (
135
+ output_df,
136
+ total_cards_html,
137
+ token_usage_html,
138
+ ) = await orchestrate_card_generation(
139
+ client_manager=client_manager,
140
+ cache=response_cache,
141
+ api_key_input=api_key,
142
+ subject=prompt,
143
+ generation_mode="subject",
144
+ source_text="",
145
+ url_input="",
146
+ model_name=config.get("model_choice", "gpt-5.2-auto"),
147
+ topic_number=config.get("topic_number", 3),
148
+ cards_per_topic=config.get("cards_per_topic", 5),
149
+ preference_prompt=config.get("preference_prompt", ""),
150
+ generate_cloze=config.get("generate_cloze_checkbox", False),
151
+ library_name=config.get("library_name")
152
+ if config.get("library_name")
153
+ else None,
154
+ library_topic=config.get("library_topic")
155
+ if config.get("library_topic")
156
+ else None,
157
+ topics_list=config.get("topics_list"),
158
+ )
159
+
160
+ progress.update(task, completed=100)
161
+
162
+ return output_df, total_cards_html, token_usage_html
163
+
164
+
165
+ def export_cards(
166
+ df: pd.DataFrame,
167
+ output_path: str,
168
+ deck_name: str,
169
+ export_format: str = "apkg",
170
+ ) -> str:
171
+ """Export cards to file"""
172
+
173
+ with Progress(
174
+ SpinnerColumn(),
175
+ TextColumn("[progress.description]{task.description}"),
176
+ console=console,
177
+ ) as progress:
178
+ progress.add_task(f"Exporting to {export_format.upper()}...", total=None)
179
+
180
+ if export_format == "apkg":
181
+ # Ensure .apkg extension
182
+ if not output_path.endswith(".apkg"):
183
+ output_path = (
184
+ output_path.replace(".csv", ".apkg")
185
+ if ".csv" in output_path
186
+ else f"{output_path}.apkg"
187
+ )
188
+
189
+ exported_path = export_dataframe_to_apkg(df, output_path, deck_name)
190
+ else: # csv
191
+ # Ensure .csv extension
192
+ if not output_path.endswith(".csv"):
193
+ output_path = (
194
+ output_path.replace(".apkg", ".csv")
195
+ if ".apkg" in output_path
196
+ else f"{output_path}.csv"
197
+ )
198
+
199
+ exported_path = export_dataframe_to_csv(df, output_path)
200
+
201
+ return exported_path
202
+
203
+
204
+ @click.command()
205
+ @click.option(
206
+ "-p",
207
+ "--prompt",
208
+ required=True,
209
+ help="Subject or topic for flashcard generation (e.g., 'Basic SQL', 'React Hooks')",
210
+ )
211
+ @click.option(
212
+ "--topics",
213
+ type=int,
214
+ help="Number of topics (auto-detected if not specified)",
215
+ )
216
+ @click.option(
217
+ "--cards-per-topic",
218
+ type=int,
219
+ help="Number of cards per topic (auto-detected if not specified)",
220
+ )
221
+ @click.option(
222
+ "--model",
223
+ type=click.Choice(
224
+ ["gpt-5.2-auto", "gpt-5.2-instant", "gpt-5.2-thinking"],
225
+ case_sensitive=False,
226
+ ),
227
+ help="Model to use for generation (auto-selected if not specified)",
228
+ )
229
+ @click.option(
230
+ "-o",
231
+ "--output",
232
+ default="deck.apkg",
233
+ help="Output file path (default: deck.apkg)",
234
+ )
235
+ @click.option(
236
+ "--format",
237
+ "export_format",
238
+ type=click.Choice(["apkg", "csv"], case_sensitive=False),
239
+ default="apkg",
240
+ help="Export format (default: apkg)",
241
+ )
242
+ @click.option(
243
+ "--api-key",
244
+ envvar="OPENAI_API_KEY",
245
+ help="OpenAI API key (or set OPENAI_API_KEY env var)",
246
+ )
247
+ @click.option(
248
+ "--no-confirm",
249
+ is_flag=True,
250
+ help="Skip confirmation prompt",
251
+ )
252
+ def main(
253
+ prompt: str,
254
+ topics: Optional[int],
255
+ cards_per_topic: Optional[int],
256
+ model: Optional[str],
257
+ output: str,
258
+ export_format: str,
259
+ api_key: Optional[str],
260
+ no_confirm: bool,
261
+ ):
262
+ """
263
+ AnkiGen CLI - Generate Anki flashcards from the command line
264
+
265
+ Examples:
266
+
267
+ # Quick generation with auto-config
268
+ ankigen -p "Basic SQL"
269
+
270
+ # With custom settings
271
+ ankigen -p "React Hooks" --topics 5 --cards-per-topic 8 --output hooks.apkg
272
+
273
+ # Export to CSV
274
+ ankigen -p "Docker basics" --format csv -o docker.csv
275
+ """
276
+
277
+ # Print header
278
+ console.print(
279
+ Panel.fit(
280
+ "[bold cyan]AnkiGen CLI[/bold cyan]\n[dim]Generate Anki flashcards with AI[/dim]",
281
+ border_style="cyan",
282
+ )
283
+ )
284
+ console.print()
285
+
286
+ # Get API key
287
+ if not api_key:
288
+ api_key = get_api_key()
289
+
290
+ # Run async workflow
291
+ async def workflow():
292
+ try:
293
+ # Step 1: Auto-configure
294
+ console.print(f"[bold]Subject:[/bold] {prompt}\n")
295
+ config = await auto_configure_from_prompt(
296
+ prompt=prompt,
297
+ api_key=api_key,
298
+ override_topics=topics,
299
+ override_cards=cards_per_topic,
300
+ override_model=model,
301
+ )
302
+
303
+ # Step 2: Confirm (unless --no-confirm)
304
+ if not no_confirm:
305
+ console.print()
306
+ if not click.confirm("Proceed with card generation?", default=True):
307
+ console.print("[yellow]Cancelled[/yellow]")
308
+ return
309
+
310
+ console.print()
311
+
312
+ # Step 3: Generate cards
313
+ df, total_html, token_html = await generate_cards_from_config(
314
+ prompt=prompt,
315
+ config=config,
316
+ api_key=api_key,
317
+ )
318
+
319
+ if df.empty:
320
+ console.print("[red]✗[/red] No cards generated")
321
+ sys.exit(1)
322
+
323
+ # Step 4: Export
324
+ console.print()
325
+ deck_name = f"AnkiGen - {prompt}"
326
+ exported_path = export_cards(
327
+ df=df,
328
+ output_path=output,
329
+ deck_name=deck_name,
330
+ export_format=export_format,
331
+ )
332
+
333
+ # Step 5: Success summary
334
+ console.print()
335
+ file_size = Path(exported_path).stat().st_size / 1024 # KB
336
+
337
+ summary = Table.grid(padding=(0, 2))
338
+ summary.add_row("[green]✓[/green] Success!", "")
339
+ summary.add_row("Cards Generated:", f"[bold]{len(df)}[/bold]")
340
+ summary.add_row("Output File:", f"[bold]{exported_path}[/bold]")
341
+ summary.add_row("File Size:", f"{file_size:.1f} KB")
342
+
343
+ # Get token usage from tracker
344
+ tracker = get_token_tracker()
345
+ session = tracker.get_session_summary()
346
+ if session["total_tokens"] > 0:
347
+ # Calculate totals across all models
348
+ total_input = sum(u.prompt_tokens for u in tracker.usage_history)
349
+ total_output = sum(u.completion_tokens for u in tracker.usage_history)
350
+ summary.add_row(
351
+ "Tokens:",
352
+ f"{total_input:,} in / {total_output:,} out ({session['total_tokens']:,} total)",
353
+ )
354
+
355
+ console.print(
356
+ Panel(summary, border_style="green", title="Generation Complete")
357
+ )
358
+
359
+ except KeyboardInterrupt:
360
+ console.print("\n[yellow]Cancelled by user[/yellow]")
361
+ sys.exit(130)
362
+ except Exception as e:
363
+ logger.error(f"CLI error: {e}", exc_info=True)
364
+ console.print(f"[red]✗ Error:[/red] {str(e)}")
365
+ sys.exit(1)
366
+
367
+ # Run the async workflow
368
+ asyncio.run(workflow())
369
+
370
+
371
+ if __name__ == "__main__":
372
+ main()
ankigen/context7.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Context7 integration for library documentation"""
2
+
3
+ import asyncio
4
+ import subprocess
5
+ import json
6
+ from typing import Optional, Dict, Any
7
+ from tenacity import (
8
+ retry,
9
+ stop_after_attempt,
10
+ wait_exponential,
11
+ retry_if_exception_type,
12
+ )
13
+ from ankigen.logging import logger
14
+ from ankigen.exceptions import (
15
+ ValidationError,
16
+ )
17
+
18
+ MAX_STRING_LENGTH = 200 # Prevent excessively long inputs
19
+ SUBPROCESS_TIMEOUT = 60.0 # 60 second timeout for Context7 calls
20
+
21
+
22
+ class Context7Client:
23
+ """Context7 MCP client for fetching library documentation"""
24
+
25
+ def __init__(self):
26
+ pass # No state needed - each call creates fresh subprocess
27
+
28
+ @retry(
29
+ stop=stop_after_attempt(3),
30
+ wait=wait_exponential(multiplier=1, min=2, max=10),
31
+ retry=retry_if_exception_type((TimeoutError, ConnectionError)),
32
+ reraise=True,
33
+ )
34
+ async def call_context7_tool(
35
+ self, tool_name: str, args: Dict[str, Any]
36
+ ) -> Optional[Dict[str, Any]]:
37
+ """Call a Context7 tool via direct JSONRPC with retry logic"""
38
+ try:
39
+ # Build the JSONRPC request
40
+ request = {
41
+ "jsonrpc": "2.0",
42
+ "id": 1,
43
+ "method": "tools/call",
44
+ "params": {"name": tool_name, "arguments": args},
45
+ }
46
+
47
+ # Call the Context7 server
48
+ process = await asyncio.create_subprocess_exec(
49
+ "npx",
50
+ "@upstash/context7-mcp",
51
+ stdin=subprocess.PIPE,
52
+ stdout=subprocess.PIPE,
53
+ stderr=subprocess.PIPE,
54
+ )
55
+
56
+ # Send initialization first
57
+ init_request = {
58
+ "jsonrpc": "2.0",
59
+ "id": 0,
60
+ "method": "initialize",
61
+ "params": {
62
+ "protocolVersion": "2025-06-18",
63
+ "capabilities": {},
64
+ "clientInfo": {"name": "ankigen", "version": "1.0.0"},
65
+ },
66
+ }
67
+
68
+ # Send both requests with timeout protection
69
+ # Optimize: Use list join for string concatenation
70
+ input_data = "\n".join([json.dumps(init_request), json.dumps(request), ""])
71
+ try:
72
+ stdout, stderr = await asyncio.wait_for(
73
+ process.communicate(input=input_data.encode()),
74
+ timeout=SUBPROCESS_TIMEOUT,
75
+ )
76
+ except asyncio.TimeoutError:
77
+ # Proper process cleanup on timeout
78
+ try:
79
+ if process.returncode is None: # Process still running
80
+ process.kill()
81
+ # Wait for process to actually terminate
82
+ await asyncio.wait_for(process.wait(), timeout=5.0)
83
+ except Exception as cleanup_error:
84
+ logger.error(f"Error during process cleanup: {cleanup_error}")
85
+ raise TimeoutError(
86
+ f"Context7 subprocess timed out after {SUBPROCESS_TIMEOUT}s"
87
+ )
88
+ except Exception:
89
+ # Clean up process on any other error
90
+ try:
91
+ if process.returncode is None:
92
+ process.kill()
93
+ await asyncio.wait_for(process.wait(), timeout=5.0)
94
+ except Exception:
95
+ pass # Best effort cleanup
96
+ raise
97
+
98
+ # Parse responses
99
+ responses = stdout.decode().strip().split("\n")
100
+ if len(responses) >= 2:
101
+ # Skip init response, get tool response
102
+ tool_response = json.loads(responses[1])
103
+
104
+ if "result" in tool_response:
105
+ result = tool_response["result"]
106
+ # Extract content from the result
107
+ if "content" in result and result["content"]:
108
+ content_item = result["content"][0]
109
+ if "text" in content_item:
110
+ return {"text": content_item["text"], "success": True}
111
+ elif "type" in content_item and content_item["type"] == "text":
112
+ return {
113
+ "text": content_item.get("text", ""),
114
+ "success": True,
115
+ }
116
+ return {"error": "No content in response", "success": False}
117
+ elif "error" in tool_response:
118
+ return {"error": tool_response["error"], "success": False}
119
+
120
+ return {"error": "Invalid response format", "success": False}
121
+
122
+ except Exception as e:
123
+ logger.error(f"Error calling Context7 tool {tool_name}: {e}")
124
+ return {"error": str(e), "success": False}
125
+
126
+ def _parse_library_response(self, text: str) -> list[Dict[str, Any]]:
127
+ """Parse Context7 response text into list of library dicts.
128
+
129
+ Args:
130
+ text: Raw text response from Context7
131
+
132
+ Returns:
133
+ List of library dicts with keys: title, id, snippets, trust
134
+ """
135
+ libraries = []
136
+ lines = text.split("\n")
137
+ current_lib: Dict[str, Any] = {}
138
+
139
+ for line in lines:
140
+ line = line.strip()
141
+
142
+ if line.startswith("- Title:"):
143
+ if current_lib and current_lib.get("id"):
144
+ libraries.append(current_lib)
145
+ current_lib = {"title": line.replace("- Title:", "").strip().lower()}
146
+
147
+ elif line.startswith("- Context7-compatible library ID:"):
148
+ lib_id = line.replace("- Context7-compatible library ID:", "").strip()
149
+ if current_lib is not None:
150
+ current_lib["id"] = lib_id
151
+
152
+ elif line.startswith("- Code Snippets:"):
153
+ snippets_str = line.replace("- Code Snippets:", "").strip()
154
+ try:
155
+ if current_lib is not None:
156
+ current_lib["snippets"] = int(snippets_str)
157
+ except ValueError:
158
+ pass
159
+
160
+ elif line.startswith("- Trust Score:"):
161
+ score_str = line.replace("- Trust Score:", "").strip()
162
+ try:
163
+ if current_lib is not None:
164
+ current_lib["trust"] = float(score_str)
165
+ except ValueError:
166
+ pass
167
+
168
+ if current_lib and current_lib.get("id"):
169
+ libraries.append(current_lib)
170
+
171
+ return libraries
172
+
173
+ def _score_library(self, lib: Dict[str, Any], search_term: str) -> float:
174
+ """Score a library based on how well it matches the search term.
175
+
176
+ Args:
177
+ lib: Library dict with title, id, snippets, trust
178
+ search_term: Lowercase search term
179
+
180
+ Returns:
181
+ Score (higher is better match)
182
+ """
183
+ score = 0.0
184
+ lib_title = lib.get("title", "")
185
+ lib_id = lib["id"].lower()
186
+
187
+ # Exact title match gets highest priority
188
+ if lib_title == search_term:
189
+ score += 10000
190
+ elif lib_id == f"/{search_term}-dev/{search_term}":
191
+ score += 5000
192
+ elif f"/{search_term}/" in lib_id or lib_id.endswith(f"/{search_term}"):
193
+ score += 2000
194
+ elif search_term in lib_title:
195
+ if lib_title == search_term:
196
+ score += 1000
197
+ elif lib_title.startswith(search_term):
198
+ score += 200
199
+ else:
200
+ score += 50
201
+
202
+ # Bonus for code snippets (indicates main library)
203
+ snippets = lib.get("snippets", 0)
204
+ score += snippets / 10
205
+
206
+ # Bonus for trust score (high trust = official/authoritative)
207
+ trust = lib.get("trust", 0)
208
+ score += trust * 100
209
+
210
+ return score
211
+
212
+ def _select_best_library(
213
+ self, libraries: list[Dict[str, Any]], search_term: str
214
+ ) -> Optional[Dict[str, Any]]:
215
+ """Select the best matching library from a list.
216
+
217
+ Args:
218
+ libraries: List of library dicts
219
+ search_term: Lowercase search term
220
+
221
+ Returns:
222
+ Best matching library dict, or None if no match
223
+ """
224
+ best_lib = None
225
+ best_score = -1.0
226
+
227
+ for lib in libraries:
228
+ score = self._score_library(lib, search_term)
229
+
230
+ if search_term in lib.get("title", "") or search_term in lib["id"].lower():
231
+ logger.debug(
232
+ f"Scoring {lib['id']}: title='{lib.get('title', '')}', "
233
+ f"snippets={lib.get('snippets', 0)}, trust={lib.get('trust', 0)}, score={score:.2f}"
234
+ )
235
+
236
+ if score > best_score:
237
+ best_score = score
238
+ best_lib = lib
239
+
240
+ if best_lib:
241
+ logger.info(
242
+ f"Selected library: {best_lib['id']} (title: {best_lib.get('title', 'unknown')}, "
243
+ f"snippets: {best_lib.get('snippets', 0)}, trust: {best_lib.get('trust', 0)}, "
244
+ f"score: {best_score:.2f})"
245
+ )
246
+
247
+ return best_lib
248
+
249
+ async def resolve_library_id(self, library_name: str) -> Optional[str]:
250
+ """Resolve a library name to a Context7-compatible ID"""
251
+ logger.info(f"Resolving library ID for: {library_name}")
252
+
253
+ result = await self.call_context7_tool(
254
+ "resolve-library-id", {"libraryName": library_name}
255
+ )
256
+
257
+ if not (result and result.get("success") and result.get("text")):
258
+ logger.warning(f"Could not resolve library ID for '{library_name}'")
259
+ return None
260
+
261
+ libraries = self._parse_library_response(result["text"])
262
+ if not libraries:
263
+ logger.warning(f"Could not resolve library ID for '{library_name}'")
264
+ return None
265
+
266
+ best_lib = self._select_best_library(libraries, library_name.lower())
267
+ if best_lib:
268
+ logger.info(f"Resolved '{library_name}' to ID: {best_lib['id']}")
269
+ return best_lib["id"]
270
+
271
+ logger.warning(f"Could not resolve library ID for '{library_name}'")
272
+ return None
273
+
274
+ async def get_library_docs(
275
+ self, library_id: str, topic: Optional[str] = None, tokens: int = 5000
276
+ ) -> Optional[str]:
277
+ """Get documentation for a library"""
278
+ # Security: Validate library_id (should start with /)
279
+ if (
280
+ not library_id
281
+ or not library_id.startswith("/")
282
+ or len(library_id) > MAX_STRING_LENGTH
283
+ ):
284
+ logger.error(f"Invalid library ID format (security): '{library_id}'")
285
+ raise ValidationError("Invalid library ID format")
286
+
287
+ logger.info(
288
+ f"Fetching docs for: {library_id}" + (f" (topic: {topic})" if topic else "")
289
+ )
290
+
291
+ args = {"context7CompatibleLibraryID": library_id, "tokens": tokens}
292
+ if topic:
293
+ args["topic"] = topic
294
+
295
+ result = await self.call_context7_tool("get-library-docs", args)
296
+
297
+ if result and result.get("success") and result.get("text"):
298
+ docs = result["text"]
299
+ logger.info(f"Retrieved {len(docs)} characters of documentation")
300
+ return docs
301
+
302
+ logger.warning(f"Could not fetch docs for '{library_id}'")
303
+ return None
304
+
305
+ async def fetch_library_documentation(
306
+ self, library_name: str, topic: Optional[str] = None, tokens: int = 5000
307
+ ) -> Optional[str]:
308
+ """Convenience method to resolve and fetch docs in one call"""
309
+ library_id = await self.resolve_library_id(library_name)
310
+ if not library_id:
311
+ return None
312
+
313
+ return await self.get_library_docs(library_id, topic, tokens)
314
+
315
+
316
+ async def test_context7() -> None:
317
+ """Test the Context7 integration"""
318
+ client = Context7Client()
319
+
320
+ print("Testing Context7 integration...")
321
+
322
+ # Test resolving a library
323
+ library_id = await client.resolve_library_id("react")
324
+ if library_id:
325
+ print(f"✓ Resolved 'react' to ID: {library_id}")
326
+
327
+ # Test fetching docs
328
+ docs = await client.get_library_docs(library_id, topic="hooks", tokens=2000)
329
+ if docs:
330
+ print(f"✓ Fetched {len(docs)} characters of documentation")
331
+ print(f"Preview: {docs[:300]}...")
332
+ else:
333
+ print("✗ Failed to fetch documentation")
334
+ else:
335
+ print("✗ Failed to resolve library ID")
336
+
337
+
338
+ if __name__ == "__main__":
339
+ asyncio.run(test_context7())
ankigen/exceptions.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Custom exceptions for AnkiGen application.
2
+
3
+ This module provides a hierarchy of custom exceptions to standardize
4
+ error handling across the codebase.
5
+ """
6
+
7
+
8
+ class AnkigenError(Exception):
9
+ """Base exception for all AnkiGen errors."""
10
+
11
+ pass
12
+
13
+
14
+ class ValidationError(AnkigenError):
15
+ """Raised when input validation fails."""
16
+
17
+ pass
18
+
19
+
20
+ class SecurityError(AnkigenError):
21
+ """Raised when a security check fails (SSRF, command injection, etc.)."""
22
+
23
+ pass
24
+
25
+
26
+ class APIError(AnkigenError):
27
+ """Base exception for API-related errors."""
28
+
29
+ pass
30
+
31
+
32
+ class OpenAIAPIError(APIError):
33
+ """Raised when OpenAI API calls fail."""
34
+
35
+ pass
36
+
37
+
38
+ class Context7APIError(APIError):
39
+ """Raised when Context7 API calls fail."""
40
+
41
+ pass
42
+
43
+
44
+ class ExportError(AnkigenError):
45
+ """Base exception for export-related errors."""
46
+
47
+ pass
48
+
49
+
50
+ class CardGenerationError(AnkigenError):
51
+ """Raised when card generation fails."""
52
+
53
+ pass
54
+
55
+
56
+ class ConfigurationError(AnkigenError):
57
+ """Raised when configuration is invalid or missing."""
58
+
59
+ pass
60
+
61
+
62
+ def handle_exception(
63
+ exc: Exception,
64
+ logger,
65
+ message: str,
66
+ reraise: bool = True,
67
+ reraise_as: type[Exception] | None = None,
68
+ ) -> None:
69
+ """Standardized exception handler.
70
+
71
+ Args:
72
+ exc: The exception to handle
73
+ logger: Logger instance to use
74
+ message: Error message to log
75
+ reraise: Whether to re-raise the exception
76
+ reraise_as: Optional exception type to wrap and re-raise as
77
+
78
+ Raises:
79
+ The original exception or wrapped exception if reraise is True
80
+ """
81
+ logger.error(f"{message}: {exc}", exc_info=True)
82
+
83
+ if reraise:
84
+ if reraise_as:
85
+ raise reraise_as(f"{message}: {exc}") from exc
86
+ raise
ankigen/exporters.py ADDED
@@ -0,0 +1,943 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Module for CSV and APKG export functions
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+ import genanki
6
+ import random
7
+ import html
8
+ from typing import List, Dict, Any, Optional
9
+ import csv
10
+ from datetime import datetime
11
+ import os
12
+
13
+ from ankigen.utils import get_logger, strip_html_tags
14
+
15
+ logger = get_logger()
16
+
17
+
18
+ # --- Helper function for formatting fields ---
19
+ def _format_field_as_string(value: Any) -> str:
20
+ if isinstance(value, list) or isinstance(value, tuple):
21
+ return ", ".join(str(item).strip() for item in value if str(item).strip())
22
+ if pd.isna(value) or value is None:
23
+ return ""
24
+ return str(value).strip()
25
+
26
+
27
+ def _generate_timestamped_filename(
28
+ base_name: str, extension: str, include_timestamp: bool = True
29
+ ) -> str:
30
+ """Generate a filename with optional timestamp.
31
+
32
+ Args:
33
+ base_name: The base name for the file (without extension)
34
+ extension: File extension (e.g., 'csv', 'apkg')
35
+ include_timestamp: Whether to include timestamp in filename
36
+
37
+ Returns:
38
+ Generated filename with extension
39
+ """
40
+ if include_timestamp:
41
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
42
+ return f"{base_name}_{timestamp}.{extension}"
43
+ return f"{base_name}.{extension}"
44
+
45
+
46
+ def _ensure_output_directory(filepath: str) -> None:
47
+ """Ensure the output directory exists for the given filepath.
48
+
49
+ Args:
50
+ filepath: Full path to the file
51
+
52
+ Creates the directory if it doesn't exist.
53
+ """
54
+ output_dir = os.path.dirname(filepath)
55
+ if output_dir and not os.path.exists(output_dir):
56
+ os.makedirs(output_dir)
57
+ logger.info(f"Created output directory: {output_dir}")
58
+
59
+
60
+ def _validate_non_empty_data(data: Any, data_type: str) -> None:
61
+ """Validate that data is not empty.
62
+
63
+ Args:
64
+ data: The data to validate (list, DataFrame, etc.)
65
+ data_type: Description of data type for error messages
66
+
67
+ Raises:
68
+ ValueError: If data is empty or None
69
+ """
70
+ if data is None:
71
+ raise ValueError(f"No {data_type} provided to export.")
72
+ if isinstance(data, list) and not data:
73
+ raise ValueError(f"No {data_type} provided to export.")
74
+ if isinstance(data, pd.DataFrame) and data.empty:
75
+ raise ValueError(f"No {data_type} available to export.")
76
+
77
+
78
+ # --- Constants for APKG Generation (Subtask 10) ---
79
+ ANKI_BASIC_MODEL_NAME = "AnkiGen Basic"
80
+ ANKI_CLOZE_MODEL_NAME = "AnkiGen Cloze"
81
+
82
+ # It's good practice to generate unique IDs. These are examples.
83
+ # Real applications might use a persistent way to store/retrieve these if models are updated.
84
+ DEFAULT_BASIC_MODEL_ID = random.randrange(1 << 30, 1 << 31)
85
+ DEFAULT_CLOZE_MODEL_ID = random.randrange(1 << 30, 1 << 31)
86
+
87
+ # --- Shared CSS with dark mode support ---
88
+ CARD_CSS = """
89
+ /* CSS Variables - Light Mode (default) */
90
+ .card {
91
+ --bg-card: #ffffff;
92
+ --bg-answer: #f0f9ff;
93
+ --bg-explanation: #f0fdf4;
94
+ --bg-example: #fefce8;
95
+ --bg-back-extra: #eef2ff;
96
+ --bg-prereq: #f8fafc;
97
+ --bg-code: #2d2d2d;
98
+
99
+ --text-primary: #1a1a1a;
100
+ --text-secondary: #4b5563;
101
+ --text-muted: #666666;
102
+ --text-heading: #1f2937;
103
+ --text-code: #f8f8f2;
104
+
105
+ --accent-blue: #2563eb;
106
+ --accent-blue-light: #60a5fa;
107
+ --accent-green: #4ade80;
108
+ --accent-yellow: #facc15;
109
+ --accent-indigo: #818cf8;
110
+ --accent-red: #ef4444;
111
+
112
+ --border-light: #e5e7eb;
113
+ --border-dashed: #cbd5e1;
114
+
115
+ --shadow: rgba(0, 0, 0, 0.05);
116
+ }
117
+
118
+ /* Dark Mode Overrides */
119
+ .nightMode .card,
120
+ .night_mode .card {
121
+ --bg-card: #1e1e1e;
122
+ --bg-answer: #1e293b;
123
+ --bg-explanation: #14291a;
124
+ --bg-example: #292518;
125
+ --bg-back-extra: #1e1b2e;
126
+ --bg-prereq: #262626;
127
+ --bg-code: #0d0d0d;
128
+
129
+ --text-primary: #e4e4e7;
130
+ --text-secondary: #a1a1aa;
131
+ --text-muted: #9ca3af;
132
+ --text-heading: #f4f4f5;
133
+ --text-code: #f8f8f2;
134
+
135
+ --accent-blue: #60a5fa;
136
+ --accent-blue-light: #93c5fd;
137
+ --accent-green: #4ade80;
138
+ --accent-yellow: #fde047;
139
+ --accent-indigo: #a5b4fc;
140
+ --accent-red: #f87171;
141
+
142
+ --border-light: #3f3f46;
143
+ --border-dashed: #52525b;
144
+
145
+ --shadow: rgba(0, 0, 0, 0.3);
146
+ }
147
+
148
+ /* Base styles */
149
+ .card {
150
+ font-family: 'Inter', system-ui, -apple-system, sans-serif;
151
+ font-size: 16px;
152
+ line-height: 1.6;
153
+ color: var(--text-primary);
154
+ max-width: 800px;
155
+ margin: 0 auto;
156
+ padding: 20px;
157
+ background: var(--bg-card);
158
+ }
159
+
160
+ @media (max-width: 768px) {
161
+ .card {
162
+ font-size: 14px;
163
+ padding: 15px;
164
+ }
165
+ }
166
+
167
+ /* Question side */
168
+ .question-side {
169
+ position: relative;
170
+ min-height: 200px;
171
+ }
172
+
173
+ .difficulty-indicator {
174
+ position: absolute;
175
+ top: 10px;
176
+ right: 10px;
177
+ width: 10px;
178
+ height: 10px;
179
+ border-radius: 50%;
180
+ }
181
+
182
+ .difficulty-indicator.beginner { background: var(--accent-green); }
183
+ .difficulty-indicator.intermediate { background: var(--accent-yellow); }
184
+ .difficulty-indicator.advanced { background: var(--accent-red); }
185
+
186
+ .question {
187
+ font-size: 1.3em;
188
+ font-weight: 600;
189
+ color: var(--accent-blue);
190
+ margin-bottom: 1.5em;
191
+ }
192
+
193
+ .prerequisites {
194
+ margin-top: 1em;
195
+ font-size: 0.9em;
196
+ color: var(--text-muted);
197
+ }
198
+
199
+ .prerequisites-toggle {
200
+ color: var(--accent-blue);
201
+ cursor: pointer;
202
+ text-decoration: underline;
203
+ }
204
+
205
+ .prerequisites-content {
206
+ display: none;
207
+ margin-top: 0.5em;
208
+ padding: 0.5em;
209
+ background: var(--bg-prereq);
210
+ border-radius: 4px;
211
+ }
212
+
213
+ .prerequisites.show .prerequisites-content {
214
+ display: block;
215
+ }
216
+
217
+ /* Answer side sections */
218
+ .answer-section,
219
+ .explanation-section,
220
+ .example-section,
221
+ .back-extra-section {
222
+ margin: 1.5em 0;
223
+ padding: 1.2em;
224
+ border-radius: 8px;
225
+ box-shadow: 0 2px 4px var(--shadow);
226
+ }
227
+
228
+ .answer-section {
229
+ background: var(--bg-answer);
230
+ border-left: 4px solid var(--accent-blue);
231
+ }
232
+
233
+ .back-extra-section {
234
+ background: var(--bg-back-extra);
235
+ border-left: 4px solid var(--accent-indigo);
236
+ }
237
+
238
+ .explanation-section {
239
+ background: var(--bg-explanation);
240
+ border-left: 4px solid var(--accent-green);
241
+ }
242
+
243
+ .example-section {
244
+ background: var(--bg-example);
245
+ border-left: 4px solid var(--accent-yellow);
246
+ }
247
+
248
+ .example-section pre {
249
+ background-color: var(--bg-code);
250
+ color: var(--text-code);
251
+ padding: 1em;
252
+ border-radius: 0.3em;
253
+ overflow-x: auto;
254
+ font-family: 'Consolas', 'Monaco', 'Menlo', monospace;
255
+ font-size: 0.9em;
256
+ line-height: 1.4;
257
+ }
258
+
259
+ .example-section code {
260
+ font-family: 'Consolas', 'Monaco', 'Menlo', monospace;
261
+ }
262
+
263
+ .metadata-section {
264
+ margin-top: 2em;
265
+ padding-top: 1em;
266
+ border-top: 1px solid var(--border-light);
267
+ font-size: 0.9em;
268
+ color: var(--text-secondary);
269
+ }
270
+
271
+ .metadata-section h3 {
272
+ font-size: 1em;
273
+ color: var(--text-heading);
274
+ margin-bottom: 0.5em;
275
+ }
276
+
277
+ .metadata-section > div {
278
+ margin-bottom: 0.8em;
279
+ }
280
+
281
+ .source-url a {
282
+ color: var(--accent-blue);
283
+ text-decoration: none;
284
+ }
285
+ .source-url a:hover {
286
+ text-decoration: underline;
287
+ }
288
+
289
+ /* Cloze deletion styles */
290
+ .cloze {
291
+ font-weight: bold;
292
+ color: var(--accent-blue);
293
+ }
294
+
295
+ /* General utility */
296
+ hr {
297
+ border: none;
298
+ border-top: 1px dashed var(--border-dashed);
299
+ margin: 1.5em 0;
300
+ }
301
+
302
+ /* Rich text field styling */
303
+ .field ul, .field ol {
304
+ margin-left: 1.5em;
305
+ padding-left: 0.5em;
306
+ }
307
+ .field li {
308
+ margin-bottom: 0.3em;
309
+ }
310
+
311
+ /* Responsive design */
312
+ @media (max-width: 640px) {
313
+ .answer-section,
314
+ .explanation-section,
315
+ .example-section,
316
+ .back-extra-section {
317
+ padding: 1em;
318
+ margin: 1em 0;
319
+ }
320
+ }
321
+
322
+ /* Animations */
323
+ @keyframes fadeIn {
324
+ from { opacity: 0; }
325
+ to { opacity: 1; }
326
+ }
327
+
328
+ .card {
329
+ animation: fadeIn 0.3s ease-in-out;
330
+ }
331
+ """
332
+
333
+ # --- Full Model Definitions ---
334
+
335
+ BASIC_MODEL = genanki.Model(
336
+ DEFAULT_BASIC_MODEL_ID, # Use the generated ID
337
+ ANKI_BASIC_MODEL_NAME, # Use the constant name
338
+ fields=[
339
+ {"name": "Question"},
340
+ {"name": "Answer"},
341
+ {"name": "Explanation"},
342
+ {"name": "Example"},
343
+ {"name": "Prerequisites"},
344
+ {"name": "Learning_Outcomes"},
345
+ {"name": "Difficulty"},
346
+ {"name": "SourceURL"}, # Added for consistency if used by template
347
+ {"name": "TagsStr"}, # Added for consistency if used by template
348
+ ],
349
+ templates=[
350
+ {
351
+ "name": "Card 1",
352
+ "qfmt": """
353
+ <div class=\"card question-side\">
354
+ <div class=\"difficulty-indicator {{Difficulty}}\"></div>
355
+ <div class=\"content\">
356
+ <div class=\"question\">{{Question}}</div>
357
+ <div class=\"prerequisites\" onclick=\"event.stopPropagation();\">
358
+ <div class=\"prerequisites-toggle\">Show Prerequisites</div>
359
+ <div class=\"prerequisites-content\">{{Prerequisites}}</div>
360
+ </div>
361
+ </div>
362
+ </div>
363
+ <script>
364
+ document.querySelector('.prerequisites-toggle').addEventListener('click', function(e) {
365
+ e.stopPropagation();
366
+ this.parentElement.classList.toggle('show');
367
+ });
368
+ </script>
369
+ """,
370
+ "afmt": """
371
+ <div class=\"card answer-side\">
372
+ <div class=\"content\">
373
+ <div class=\"question-section\">
374
+ <div class=\"question\">{{Question}}</div>
375
+ <div class=\"prerequisites\">
376
+ <strong>Prerequisites:</strong> {{Prerequisites}}
377
+ </div>
378
+ </div>
379
+ <hr>
380
+
381
+ <div class=\"answer-section\">
382
+ <h3>Answer</h3>
383
+ <div class=\"answer\">{{Answer}}</div>
384
+ </div>
385
+
386
+ <div class=\"explanation-section\">
387
+ <h3>Explanation</h3>
388
+ <div class=\"explanation-text\">{{Explanation}}</div>
389
+ </div>
390
+
391
+ <div class=\"example-section\">
392
+ <h3>Example</h3>
393
+ <div class=\"example-text\">{{Example}}</div>
394
+ <!-- Example field might contain pre/code or plain text -->
395
+ <!-- Handled by how HTML is put into the Example field -->
396
+ </div>
397
+
398
+ <div class=\"metadata-section\">
399
+ <div class=\"learning-outcomes\">
400
+ <h3>Learning Outcomes</h3>
401
+ <div>{{Learning_Outcomes}}</div>
402
+ </div>
403
+
404
+
405
+ <div class=\"difficulty\">
406
+ <h3>Difficulty Level</h3>
407
+ <div>{{Difficulty}}</div>
408
+ </div>
409
+ {{#SourceURL}}<div class=\"source-url\"><small>Source: <a href=\"{{SourceURL}}\">{{SourceURL}}</a></small></div>{{/SourceURL}}
410
+ </div>
411
+ </div>
412
+ </div>
413
+ """,
414
+ }
415
+ ],
416
+ css=CARD_CSS,
417
+ )
418
+
419
+ CLOZE_MODEL = genanki.Model(
420
+ DEFAULT_CLOZE_MODEL_ID, # Use the generated ID
421
+ ANKI_CLOZE_MODEL_NAME, # Use the constant name
422
+ fields=[
423
+ {"name": "Text"},
424
+ {"name": "Back Extra"},
425
+ {"name": "Explanation"},
426
+ {"name": "Example"},
427
+ {"name": "Prerequisites"},
428
+ {"name": "Learning_Outcomes"},
429
+ {"name": "Difficulty"},
430
+ {"name": "SourceURL"},
431
+ {"name": "TagsStr"},
432
+ ],
433
+ templates=[
434
+ {
435
+ "name": "Cloze Card",
436
+ "qfmt": """
437
+ <div class=\"card question-side\">
438
+ <div class=\"difficulty-indicator {{Difficulty}}\"></div>
439
+ <div class=\"content\">
440
+ <div class=\"question\">{{cloze:Text}}</div>
441
+ <div class=\"prerequisites\" onclick=\"event.stopPropagation();\">
442
+ <div class=\"prerequisites-toggle\">Show Prerequisites</div>
443
+ <div class=\"prerequisites-content\">{{Prerequisites}}</div>
444
+ </div>
445
+ </div>
446
+ </div>
447
+ <script>
448
+ document.querySelector('.prerequisites-toggle').addEventListener('click', function(e) {
449
+ e.stopPropagation();
450
+ this.parentElement.classList.toggle('show');
451
+ });
452
+ </script>
453
+ """,
454
+ "afmt": """
455
+ <div class=\"card answer-side\">
456
+ <div class=\"content\">
457
+ <div class=\"question-section\">
458
+ <div class=\"question\">{{cloze:Text}}</div>
459
+ <div class=\"prerequisites\">
460
+ <strong>Prerequisites:</strong> {{Prerequisites}}
461
+ </div>
462
+ </div>
463
+ <hr>
464
+
465
+ {{#Back Extra}}
466
+ <div class=\"back-extra-section\">
467
+ <h3>Additional Information</h3>
468
+ <div class=\"back-extra-text\">{{Back Extra}}</div>
469
+ </div>
470
+ {{/Back Extra}}
471
+
472
+ <div class=\"explanation-section\">
473
+ <h3>Explanation</h3>
474
+ <div class=\"explanation-text\">{{Explanation}}</div>
475
+ </div>
476
+
477
+ <div class=\"example-section\">
478
+ <h3>Example</h3>
479
+ <div class=\"example-text\">{{Example}}</div>
480
+ </div>
481
+
482
+ <div class=\"metadata-section\">
483
+ <div class=\"learning-outcomes\">
484
+ <h3>Learning Outcomes</h3>
485
+ <div>{{Learning_Outcomes}}</div>
486
+ </div>
487
+
488
+
489
+ <div class=\"difficulty\">
490
+ <h3>Difficulty Level</h3>
491
+ <div>{{Difficulty}}</div>
492
+ </div>
493
+ {{#SourceURL}}<div class=\"source-url\"><small>Source: <a href=\"{{SourceURL}}\">{{SourceURL}}</a></small></div>{{/SourceURL}}
494
+ </div>
495
+ </div>
496
+ </div>
497
+ """,
498
+ }
499
+ ],
500
+ css=CARD_CSS,
501
+ model_type=1, # Cloze model type
502
+ )
503
+
504
+
505
+ # --- Helper functions for APKG (Subtask 10) ---
506
+ def _get_or_create_model(
507
+ model_id: int,
508
+ name: str,
509
+ fields: List[Dict[str, str]],
510
+ templates: List[Dict[str, str]],
511
+ ) -> genanki.Model:
512
+ return genanki.Model(model_id, name, fields=fields, templates=templates)
513
+
514
+
515
+ # --- New CSV Exporter for List of Dictionaries ---
516
+
517
+
518
+ def export_cards_to_csv(
519
+ cards: List[Dict[str, Any]], filename: Optional[str] = None
520
+ ) -> str:
521
+ """Export a list of card dictionaries to a CSV file.
522
+
523
+ Args:
524
+ cards: A list of dictionaries, where each dictionary represents a card
525
+ and should contain 'front' and 'back' keys. Other keys like
526
+ 'tags' and 'note_type' are optional.
527
+ filename: Optional. The desired filename/path for the CSV.
528
+ If None, a timestamped filename will be generated.
529
+
530
+ Returns:
531
+ The path to the generated CSV file.
532
+
533
+ Raises:
534
+ IOError: If there is an issue writing to the file.
535
+ KeyError: If a card dictionary is missing essential keys like 'front' or 'back'.
536
+ ValueError: If the cards list is empty or not provided.
537
+ """
538
+ # Validation using helper
539
+ _validate_non_empty_data(cards, "cards")
540
+
541
+ # Filename generation using helper
542
+ if not filename:
543
+ filename = _generate_timestamped_filename("ankigen_cards", "csv")
544
+ logger.info(f"No filename provided, generated: {filename}")
545
+
546
+ # Ensure output directory exists using helper
547
+ _ensure_output_directory(filename)
548
+
549
+ # Define the fieldnames expected in the CSV.
550
+ fieldnames = ["front", "back", "tags", "note_type"]
551
+
552
+ try:
553
+ logger.info(f"Attempting to export {len(cards)} cards to {filename}")
554
+ with open(filename, "w", newline="", encoding="utf-8") as csvfile:
555
+ writer = csv.DictWriter(
556
+ csvfile, fieldnames=fieldnames, extrasaction="ignore"
557
+ )
558
+ writer.writeheader()
559
+ for i, card in enumerate(cards):
560
+ try:
561
+ # Ensure mandatory fields exist
562
+ if "front" not in card or "back" not in card:
563
+ raise KeyError(
564
+ f"Card at index {i} is missing 'front' or 'back' key."
565
+ )
566
+
567
+ row_to_write = {
568
+ "front": card["front"],
569
+ "back": card["back"],
570
+ "tags": card.get("tags", ""),
571
+ "note_type": card.get("note_type", "Basic"),
572
+ }
573
+ writer.writerow(row_to_write)
574
+ except KeyError as e_inner:
575
+ logger.error(
576
+ f"Skipping card due to KeyError: {e_inner}. Card data: {card}"
577
+ )
578
+ continue
579
+ logger.info(f"Successfully exported cards to {filename}")
580
+ return filename
581
+ except IOError as e_io:
582
+ logger.error(f"IOError during CSV export to {filename}: {e_io}", exc_info=True)
583
+ raise
584
+ except Exception as e_general:
585
+ logger.error(
586
+ f"Unexpected error during CSV export to {filename}: {e_general}",
587
+ exc_info=True,
588
+ )
589
+ raise
590
+
591
+
592
+ def export_cards_to_apkg(
593
+ cards: List[Dict[str, Any]],
594
+ filename: Optional[str] = None,
595
+ deck_name: str = "Ankigen Generated Cards",
596
+ ) -> str:
597
+ """Exports a list of card dictionaries to an Anki .apkg file.
598
+
599
+ Args:
600
+ cards: List of dictionaries, where each dictionary represents a card.
601
+ It's expected that these dicts are prepared by export_dataframe_to_apkg
602
+ and contain keys like 'Question', 'Answer', 'Explanation', etc.
603
+ filename: The full path (including filename) for the exported file.
604
+ If None, a default filename will be generated in the current directory.
605
+ deck_name: The name of the deck if exporting to .apkg format.
606
+
607
+ Returns:
608
+ The path to the exported file.
609
+ """
610
+ logger.info(f"Starting APKG export for {len(cards)} cards to deck '{deck_name}'.")
611
+
612
+ # Validation using helper - note this now raises ValueError instead of gr.Error
613
+ _validate_non_empty_data(cards, "cards")
614
+
615
+ # Filename generation using helper
616
+ if not filename:
617
+ filename = _generate_timestamped_filename("ankigen_deck", "apkg")
618
+ elif not filename.lower().endswith(".apkg"):
619
+ filename += ".apkg"
620
+
621
+ # Ensure output directory exists using helper
622
+ _ensure_output_directory(filename)
623
+
624
+ anki_basic_model = BASIC_MODEL
625
+ anki_cloze_model = CLOZE_MODEL
626
+
627
+ deck_id = random.randrange(1 << 30, 1 << 31)
628
+ anki_deck = genanki.Deck(deck_id, deck_name)
629
+
630
+ notes_added_count = 0
631
+ for card_dict in cards:
632
+ note_type = card_dict.get("note_type", "Basic")
633
+ tags_for_note_object = card_dict.get("tags_for_note_object", [])
634
+
635
+ # Extract all potential fields, defaulting to empty strings
636
+ # Security: Sanitize HTML to prevent XSS when viewing cards in Anki
637
+ question = html.escape(card_dict.get("Question", ""))
638
+ answer = html.escape(card_dict.get("Answer", ""))
639
+ explanation = html.escape(card_dict.get("Explanation", ""))
640
+ example = html.escape(card_dict.get("Example", ""))
641
+ prerequisites = html.escape(card_dict.get("Prerequisites", ""))
642
+ learning_outcomes = html.escape(card_dict.get("Learning_Outcomes", ""))
643
+ difficulty = html.escape(card_dict.get("Difficulty", ""))
644
+ source_url = html.escape(card_dict.get("SourceURL", ""))
645
+ tags_str_field = html.escape(card_dict.get("TagsStr", ""))
646
+
647
+ if not question:
648
+ logger.error(
649
+ f"SKIPPING CARD DUE TO EMPTY 'Question' (front/text) field. Card data: {card_dict}"
650
+ )
651
+ continue
652
+
653
+ try:
654
+ if note_type.lower() == "cloze":
655
+ # CLOZE_MODEL fields
656
+ note_fields = [
657
+ question, # Text
658
+ answer, # Back Extra
659
+ explanation,
660
+ example,
661
+ prerequisites,
662
+ learning_outcomes,
663
+ difficulty,
664
+ source_url,
665
+ tags_str_field,
666
+ ]
667
+ note = genanki.Note(
668
+ model=anki_cloze_model,
669
+ fields=note_fields,
670
+ tags=tags_for_note_object,
671
+ )
672
+ else: # Basic
673
+ # BASIC_MODEL fields
674
+ note_fields = [
675
+ question,
676
+ answer,
677
+ explanation,
678
+ example,
679
+ prerequisites,
680
+ learning_outcomes,
681
+ difficulty,
682
+ source_url,
683
+ tags_str_field,
684
+ ]
685
+ note = genanki.Note(
686
+ model=anki_basic_model,
687
+ fields=note_fields,
688
+ tags=tags_for_note_object,
689
+ )
690
+ anki_deck.add_note(note)
691
+ notes_added_count += 1
692
+ except Exception as e:
693
+ logger.error(
694
+ f"Failed to create genanki.Note for card: {card_dict}. Error: {e}",
695
+ exc_info=True,
696
+ )
697
+ logger.warning(f"Skipping card due to error: Question='{question[:50]}...'")
698
+
699
+ if notes_added_count == 0:
700
+ logger.error(
701
+ "No valid notes could be created from the provided cards. APKG generation aborted."
702
+ )
703
+ raise gr.Error("Failed to create any valid Anki notes from the input.")
704
+
705
+ logger.info(
706
+ f"Added {notes_added_count} notes to deck '{deck_name}'. Proceeding to package."
707
+ )
708
+
709
+ # Package and write
710
+ package = genanki.Package(anki_deck)
711
+ try:
712
+ package.write_to_file(filename)
713
+ logger.info(f"Successfully exported Anki deck to {filename}")
714
+ except Exception as e:
715
+ logger.error(f"Failed to write .apkg file to {filename}: {e}", exc_info=True)
716
+ raise IOError(f"Could not write .apkg file: {e}")
717
+
718
+ return filename
719
+
720
+
721
+ def export_cards_from_crawled_content(
722
+ cards: List[Dict[str, Any]],
723
+ output_path: Optional[
724
+ str
725
+ ] = None, # Changed from filename to output_path for clarity
726
+ export_format: str = "csv", # Added export_format parameter
727
+ deck_name: str = "Ankigen Generated Cards",
728
+ ) -> str:
729
+ """Exports cards (list of dicts) to the specified format (CSV or APKG).
730
+
731
+ Args:
732
+ cards: List of dictionaries, where each dictionary represents a card.
733
+ Expected keys: 'front', 'back'. Optional: 'tags' (space-separated string), 'source_url', 'note_type' ('Basic' or 'Cloze').
734
+ output_path: The full path (including filename) for the exported file.
735
+ If None, a default filename will be generated in the current directory.
736
+ export_format: The desired format, either 'csv' or 'apkg'.
737
+ deck_name: The name of the deck if exporting to .apkg format.
738
+
739
+ Returns:
740
+ The path to the exported file.
741
+ """
742
+ if not cards:
743
+ logger.warning("No cards provided to export_cards_from_crawled_content.")
744
+ # MODIFIED: Raise error immediately if no cards, as per test expectation
745
+ raise ValueError("No cards provided to export.")
746
+
747
+ logger.info(
748
+ f"Exporting {len(cards)} cards to format '{export_format}' with deck name '{deck_name}'."
749
+ )
750
+
751
+ if export_format.lower() == "csv":
752
+ return export_cards_to_csv(cards, filename=output_path)
753
+ elif export_format.lower() == "apkg":
754
+ return export_cards_to_apkg(cards, filename=output_path, deck_name=deck_name)
755
+ else:
756
+ supported_formats = ["csv", "apkg"]
757
+ logger.error(
758
+ f"Unsupported export format: {export_format}. Supported formats: {supported_formats}"
759
+ )
760
+ # MODIFIED: Updated error message to include supported formats
761
+ raise ValueError(
762
+ f"Unsupported export format: {export_format}. Supported formats: {supported_formats}"
763
+ )
764
+
765
+
766
+ # --- New DataFrame CSV Exporter (Subtask 11) ---
767
+ def export_dataframe_to_csv(
768
+ data: Optional[pd.DataFrame],
769
+ filename_suggestion: Optional[str] = "ankigen_cards.csv",
770
+ ) -> Optional[str]:
771
+ """Exports a Pandas DataFrame to a CSV file, designed for Gradio download.
772
+
773
+ Args:
774
+ data: The Pandas DataFrame to export.
775
+ filename_suggestion: A suggestion for the base filename (e.g., from subject).
776
+
777
+ Returns:
778
+ The path to the temporary CSV file, or None if an error occurs or data is empty.
779
+ """
780
+ logger.info(
781
+ f"Attempting to export DataFrame to CSV. Suggested filename: {filename_suggestion}"
782
+ )
783
+
784
+ # Validation using helper
785
+ try:
786
+ _validate_non_empty_data(data, "card data")
787
+ except ValueError:
788
+ logger.warning(
789
+ "No data provided to export_dataframe_to_csv. Skipping CSV export."
790
+ )
791
+ raise gr.Error("No card data available")
792
+
793
+ try:
794
+ # Generate filename from suggestion
795
+ base_name_from_suggestion = "ankigen_cards" # Default base part
796
+
797
+ # Sanitize and use the suggestion (e.g., subject name) if provided
798
+ if filename_suggestion and isinstance(filename_suggestion, str):
799
+ # Remove .csv if present, then sanitize
800
+ processed_suggestion = filename_suggestion.removesuffix(".csv")
801
+ safe_suggestion = (
802
+ processed_suggestion.replace(" ", "_")
803
+ .replace("/", "-")
804
+ .replace("\\", "-")
805
+ )
806
+ if safe_suggestion:
807
+ base_name_from_suggestion = f"ankigen_{safe_suggestion[:50]}"
808
+
809
+ # Generate timestamped filename using helper
810
+ final_filename = _generate_timestamped_filename(
811
+ base_name_from_suggestion, "csv"
812
+ )
813
+
814
+ # Ensure output directory exists using helper
815
+ _ensure_output_directory(final_filename)
816
+
817
+ data.to_csv(final_filename, index=False)
818
+ logger.info(f"Successfully exported DataFrame to CSV: {final_filename}")
819
+ gr.Info(f"CSV ready for download: {os.path.basename(final_filename)}")
820
+ return final_filename
821
+ except Exception as e:
822
+ logger.error(f"Error exporting DataFrame to CSV: {e}", exc_info=True)
823
+ gr.Error(f"Error exporting DataFrame to CSV: {e}")
824
+ return None
825
+
826
+
827
+ # --- New DataFrame to APKG Exporter (for Main Generator Tab) ---
828
+ def export_dataframe_to_apkg(
829
+ df: pd.DataFrame,
830
+ output_path: Optional[str],
831
+ deck_name: str,
832
+ ) -> str:
833
+ """Exports a DataFrame of cards to an Anki .apkg file."""
834
+ # Validation using helper
835
+ _validate_non_empty_data(df, "cards in DataFrame")
836
+
837
+ logger.info(
838
+ f"Starting APKG export for DataFrame with {len(df)} rows to deck '{deck_name}'. Output: {output_path}"
839
+ )
840
+
841
+ cards_for_apkg: List[Dict[str, Any]] = []
842
+ for _, row in df.iterrows():
843
+ try:
844
+ note_type_val = (
845
+ _format_field_as_string(row.get("Card_Type", "Basic")) or "Basic"
846
+ )
847
+ topic = _format_field_as_string(row.get("Topic", ""))
848
+ difficulty_raw = _format_field_as_string(row.get("Difficulty", ""))
849
+ difficulty_plain_for_tag = strip_html_tags(difficulty_raw)
850
+
851
+ tags_list_for_note_obj = []
852
+ if topic:
853
+ tags_list_for_note_obj.append(topic.replace(" ", "_").replace(",", "_"))
854
+ if difficulty_plain_for_tag:
855
+ safe_difficulty_tag = difficulty_plain_for_tag.replace(" ", "_")
856
+ tags_list_for_note_obj.append(safe_difficulty_tag)
857
+
858
+ tags_str_for_field = " ".join(tags_list_for_note_obj)
859
+
860
+ card_data_for_note = {
861
+ "note_type": note_type_val,
862
+ "tags_for_note_object": tags_list_for_note_obj,
863
+ "TagsStr": tags_str_for_field,
864
+ "Question": _format_field_as_string(row.get("Question", "")),
865
+ "Answer": _format_field_as_string(row.get("Answer", "")),
866
+ "Explanation": _format_field_as_string(row.get("Explanation", "")),
867
+ "Example": _format_field_as_string(row.get("Example", "")),
868
+ "Prerequisites": _format_field_as_string(row.get("Prerequisites", "")),
869
+ "Learning_Outcomes": _format_field_as_string(
870
+ row.get("Learning_Outcomes", "")
871
+ ),
872
+ "Difficulty": difficulty_raw,
873
+ "SourceURL": _format_field_as_string(row.get("Source_URL", "")),
874
+ }
875
+ cards_for_apkg.append(card_data_for_note)
876
+ except Exception as e:
877
+ logger.error(
878
+ f"Error processing DataFrame row for APKG: {row}. Error: {e}",
879
+ exc_info=True,
880
+ )
881
+ continue
882
+
883
+ if not cards_for_apkg:
884
+ logger.error("No cards could be processed from DataFrame for APKG export.")
885
+ raise ValueError("No processable cards found in DataFrame for APKG export.")
886
+
887
+ return export_cards_to_apkg(
888
+ cards_for_apkg, filename=output_path, deck_name=deck_name
889
+ )
890
+
891
+
892
+ # --- Compatibility Exports for Tests and Legacy Code ---
893
+ # These aliases ensure that tests expecting these names will find them.
894
+
895
+ # Export functions under expected names
896
+ export_csv = (
897
+ export_dataframe_to_csv # Update this to export_dataframe_to_csv for compatibility
898
+ )
899
+
900
+
901
+ # MODIFIED: export_deck is now a wrapper to provide a default deck_name
902
+ def export_deck(
903
+ df: pd.DataFrame,
904
+ output_path: Optional[str] = None,
905
+ deck_name: str = "Ankigen Generated Cards",
906
+ ) -> str:
907
+ """Alias for exporting a DataFrame to APKG, providing a default deck name."""
908
+ if df is None or df.empty:
909
+ logger.warning("export_deck called with None or empty DataFrame.")
910
+ # Match the error type and message expected by tests
911
+ raise gr.Error("No card data available")
912
+
913
+ # Original logic to call export_dataframe_to_apkg
914
+ # Ensure all necessary parameters for export_dataframe_to_apkg are correctly passed.
915
+ # The export_dataframe_to_apkg function itself will handle its specific error conditions.
916
+ # The 'output_path' for export_dataframe_to_apkg needs to be handled.
917
+ # If 'output_path' is None here, export_cards_to_apkg (called by export_dataframe_to_apkg)
918
+ # will generate a default filename.
919
+
920
+ # If output_path is not provided to export_deck, it's None.
921
+ # export_dataframe_to_apkg expects output_path: Optional[str].
922
+ # And export_cards_to_apkg (which it calls) also handles Optional[str] filename.
923
+ # So, passing output_path directly should be fine.
924
+
925
+ return export_dataframe_to_apkg(df, output_path=output_path, deck_name=deck_name)
926
+
927
+
928
+ export_dataframe_csv = export_dataframe_to_csv
929
+ export_dataframe_apkg = export_dataframe_to_apkg
930
+
931
+ __all__ = [
932
+ "BASIC_MODEL",
933
+ "CLOZE_MODEL",
934
+ "export_csv",
935
+ "export_deck",
936
+ "export_dataframe_csv",
937
+ "export_dataframe_apkg",
938
+ "export_cards_to_csv",
939
+ "export_cards_to_apkg",
940
+ "export_cards_from_crawled_content",
941
+ "export_dataframe_to_csv",
942
+ "export_dataframe_to_apkg",
943
+ ]
ankigen/llm_interface.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Module for OpenAI client management and API call logic
2
+
3
+ import asyncio
4
+ import time
5
+ from typing import Optional, TypeVar
6
+
7
+ from agents import Agent, ModelSettings, Runner, set_default_openai_client
8
+ from openai import (
9
+ APIConnectionError,
10
+ APIStatusError,
11
+ AsyncOpenAI,
12
+ OpenAIError,
13
+ RateLimitError,
14
+ )
15
+ from pydantic import BaseModel
16
+
17
+ from ankigen.logging import logger
18
+ from ankigen.utils import ResponseCache
19
+
20
+ T = TypeVar("T", bound=BaseModel)
21
+
22
+
23
+ class OpenAIClientManager:
24
+ """Manages the AsyncOpenAI client instance."""
25
+
26
+ def __init__(self):
27
+ self._client: Optional[AsyncOpenAI] = None
28
+ self._api_key: Optional[str] = None
29
+
30
+ async def initialize_client(self, api_key: str):
31
+ """Initializes the AsyncOpenAI client with the given API key."""
32
+ if not api_key or not api_key.startswith("sk-"):
33
+ logger.error("Invalid OpenAI API key provided for client initialization.")
34
+ raise ValueError("Invalid OpenAI API key format.")
35
+ self._api_key = api_key
36
+ try:
37
+ self._client = AsyncOpenAI(api_key=self._api_key)
38
+ logger.info("AsyncOpenAI client initialized successfully.")
39
+ except OpenAIError as e: # Catch specific OpenAI errors
40
+ logger.error(f"Failed to initialize AsyncOpenAI client: {e}", exc_info=True)
41
+ self._client = None # Ensure client is None on failure
42
+ raise # Re-raise the OpenAIError to be caught by UI
43
+ except Exception as e: # Catch any other unexpected errors
44
+ logger.error(
45
+ f"An unexpected error occurred during AsyncOpenAI client initialization: {e}",
46
+ exc_info=True,
47
+ )
48
+ self._client = None
49
+ raise RuntimeError("Unexpected error initializing AsyncOpenAI client.")
50
+
51
+ def get_client(self) -> AsyncOpenAI:
52
+ """Returns the initialized AsyncOpenAI client. Raises error if not initialized."""
53
+ if self._client is None:
54
+ logger.error(
55
+ "AsyncOpenAI client accessed before initialization or after a failed initialization."
56
+ )
57
+ raise RuntimeError(
58
+ "AsyncOpenAI client is not initialized. Please provide a valid API key."
59
+ )
60
+ return self._client
61
+
62
+ def __enter__(self):
63
+ """Context manager entry."""
64
+ return self
65
+
66
+ def __exit__(self, exc_type, exc_val, exc_tb):
67
+ """Context manager exit - cleanup resources."""
68
+ self.close()
69
+ return False
70
+
71
+ async def __aenter__(self):
72
+ """Async context manager entry."""
73
+ return self
74
+
75
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
76
+ """Async context manager exit - cleanup resources."""
77
+ await self.aclose()
78
+ return False
79
+
80
+ def close(self) -> None:
81
+ """Close the OpenAI client synchronously."""
82
+ if self._client:
83
+ try:
84
+ # OpenAI client has a close method for cleanup
85
+ if hasattr(self._client, "close"):
86
+ self._client.close()
87
+ logger.debug("OpenAI client closed")
88
+ except Exception as e:
89
+ logger.warning(f"Error closing OpenAI client: {e}")
90
+ finally:
91
+ self._client = None
92
+
93
+ async def aclose(self) -> None:
94
+ """Close the OpenAI client asynchronously."""
95
+ if self._client:
96
+ try:
97
+ # OpenAI async client has an aclose method
98
+ if hasattr(self._client, "aclose"):
99
+ await self._client.aclose()
100
+ elif hasattr(self._client, "close"):
101
+ self._client.close()
102
+ logger.debug("OpenAI client closed (async)")
103
+ except Exception as e:
104
+ logger.warning(f"Error closing OpenAI client: {e}")
105
+ finally:
106
+ self._client = None
107
+
108
+
109
+ # --- Agents SDK Utility ---
110
+
111
+
112
+ async def structured_agent_call(
113
+ openai_client: AsyncOpenAI,
114
+ model: str,
115
+ instructions: str,
116
+ user_input: str,
117
+ output_type: type[T],
118
+ cache: Optional[ResponseCache] = None,
119
+ cache_key: Optional[str] = None,
120
+ temperature: float = 0.7,
121
+ timeout: float = 120.0,
122
+ retry_attempts: int = 3,
123
+ ) -> T:
124
+ """
125
+ Make a single-turn structured output call using the agents SDK.
126
+
127
+ This is a lightweight wrapper for simple structured output calls,
128
+ not intended for complex multi-agent workflows.
129
+
130
+ Args:
131
+ openai_client: AsyncOpenAI client instance
132
+ model: Model name (e.g., "gpt-5.2", "gpt-5.2-chat-latest")
133
+ instructions: System instructions for the agent
134
+ user_input: User prompt/input
135
+ output_type: Pydantic model class for structured output
136
+ cache: Optional ResponseCache instance
137
+ cache_key: Cache key (required if cache is provided)
138
+ temperature: Model temperature (default 0.7)
139
+ timeout: Request timeout in seconds (default 120)
140
+ retry_attempts: Number of retry attempts (default 3)
141
+
142
+ Returns:
143
+ Instance of output_type with the structured response
144
+ """
145
+ # 1. Check cache first
146
+ if cache and cache_key:
147
+ cached = cache.get(cache_key, model)
148
+ if cached is not None:
149
+ logger.info(f"Using cached response for model {model}")
150
+ # Reconstruct Pydantic model from cached dict
151
+ if isinstance(cached, dict):
152
+ return output_type.model_validate(cached)
153
+ return cached
154
+
155
+ # 2. Set up the OpenAI client for agents SDK
156
+ set_default_openai_client(openai_client, use_for_tracing=False)
157
+
158
+ # 3. Build model settings with GPT-5.x reasoning support
159
+ model_settings_kwargs: dict = {"temperature": temperature}
160
+
161
+ # GPT-5.x (not chat-latest) supports reasoning_effort
162
+ if model.startswith("gpt-5") and "chat-latest" not in model:
163
+ from openai.types.shared import Reasoning
164
+
165
+ model_settings_kwargs["reasoning"] = Reasoning(effort="none")
166
+
167
+ model_settings = ModelSettings(**model_settings_kwargs)
168
+
169
+ # 4. Create agent with structured output
170
+ agent = Agent(
171
+ name="structured_output_agent",
172
+ instructions=instructions,
173
+ model=model,
174
+ model_settings=model_settings,
175
+ output_type=output_type,
176
+ )
177
+
178
+ # 5. Execute with retry and timeout
179
+ last_error: Optional[Exception] = None
180
+ for attempt in range(retry_attempts):
181
+ try:
182
+ result = await asyncio.wait_for(
183
+ Runner.run(agent, user_input),
184
+ timeout=timeout,
185
+ )
186
+
187
+ # 6. Extract structured output
188
+ output = result.final_output
189
+
190
+ # 7. Cache successful result (as dict for serialization)
191
+ if cache and cache_key and output is not None:
192
+ if isinstance(output, BaseModel):
193
+ cache.set(cache_key, model, output.model_dump())
194
+ else:
195
+ cache.set(cache_key, model, output)
196
+
197
+ logger.debug(f"Successfully received response from model {model}")
198
+ return output
199
+
200
+ except asyncio.TimeoutError as e:
201
+ last_error = e
202
+ if attempt < retry_attempts - 1:
203
+ wait_time = 4 * (2**attempt) # Exponential backoff
204
+ logger.warning(
205
+ f"Agent timed out (attempt {attempt + 1}/{retry_attempts}), "
206
+ f"retrying in {wait_time}s..."
207
+ )
208
+ await asyncio.sleep(wait_time)
209
+ continue
210
+ logger.error(f"Agent timed out after {retry_attempts} attempts")
211
+ raise
212
+ except Exception as e:
213
+ last_error = e
214
+ if attempt < retry_attempts - 1:
215
+ wait_time = 4 * (2**attempt)
216
+ logger.warning(
217
+ f"Agent failed (attempt {attempt + 1}/{retry_attempts}): {e}, "
218
+ f"retrying in {wait_time}s..."
219
+ )
220
+ await asyncio.sleep(wait_time)
221
+ continue
222
+ logger.error(f"Agent failed after {retry_attempts} attempts: {e}")
223
+ raise
224
+
225
+ raise RuntimeError(f"Retry loop exited without result: {last_error}")
226
+
227
+
228
+ # Generic schema for arbitrary JSON structured outputs
229
+ class GenericJsonOutput(BaseModel):
230
+ """Generic container for JSON output - allows any structure."""
231
+
232
+ model_config = {"extra": "allow"} # Allow arbitrary fields
233
+
234
+
235
+ async def structured_output_completion(
236
+ openai_client: AsyncOpenAI,
237
+ model: str,
238
+ response_format: dict, # Legacy parameter - kept for API compatibility
239
+ system_prompt: str,
240
+ user_prompt: str,
241
+ cache: ResponseCache,
242
+ ) -> Optional[dict]:
243
+ """
244
+ Makes an API call with structured output using agents SDK.
245
+
246
+ Note: response_format parameter is ignored - the agents SDK handles
247
+ JSON parsing automatically. For typed outputs, use structured_agent_call() directly.
248
+ """
249
+ cache_key = f"{system_prompt}:{user_prompt}"
250
+
251
+ # Ensure system_prompt includes JSON instruction
252
+ effective_system_prompt = system_prompt
253
+ if "JSON object matching the specified schema" not in system_prompt:
254
+ effective_system_prompt = f"{system_prompt}\nProvide your response as a JSON object matching the specified schema."
255
+
256
+ try:
257
+ result = await structured_agent_call(
258
+ openai_client=openai_client,
259
+ model=model,
260
+ instructions=effective_system_prompt.strip(),
261
+ user_input=user_prompt.strip(),
262
+ output_type=GenericJsonOutput,
263
+ cache=cache,
264
+ cache_key=cache_key,
265
+ temperature=0.7,
266
+ )
267
+
268
+ # Convert Pydantic model back to dict for backward compatibility
269
+ if isinstance(result, BaseModel):
270
+ return result.model_dump()
271
+ return result
272
+
273
+ except Exception as e:
274
+ logger.error(
275
+ f"structured_output_completion failed for model {model}: {e}",
276
+ exc_info=True,
277
+ )
278
+ raise # Re-raise unexpected errors
279
+
280
+
281
+ # Specific OpenAI exceptions to retry on
282
+ RETRYABLE_OPENAI_ERRORS = (
283
+ APIConnectionError,
284
+ RateLimitError,
285
+ APIStatusError, # Typically for 5xx server errors
286
+ )
287
+
288
+ # --- New OpenAIRateLimiter Class (Subtask 9.2) ---
289
+
290
+
291
+ class OpenAIRateLimiter:
292
+ """Manages token usage to proactively stay within (estimated) OpenAI rate limits."""
293
+
294
+ def __init__(self, tokens_per_minute: int = 60000): # Default, can be configured
295
+ self.tokens_per_minute_limit: int = tokens_per_minute
296
+ self.tokens_used_current_window: int = 0
297
+ self.current_window_start_time: float = time.monotonic()
298
+
299
+ async def wait_if_needed(self, estimated_tokens_for_request: int):
300
+ """Waits if adding the estimated tokens would exceed the rate limit for the current window."""
301
+ current_time = time.monotonic()
302
+
303
+ # Check if the 60-second window has passed
304
+ if current_time - self.current_window_start_time >= 60.0:
305
+ # Reset window and token count
306
+ self.current_window_start_time = current_time
307
+ self.tokens_used_current_window = 0
308
+ logger.debug("OpenAIRateLimiter: Window reset.")
309
+
310
+ # Check if the request would exceed the limit in the current window
311
+ if (
312
+ self.tokens_used_current_window + estimated_tokens_for_request
313
+ > self.tokens_per_minute_limit
314
+ ):
315
+ time_to_wait = (self.current_window_start_time + 60.0) - current_time
316
+ if time_to_wait > 0:
317
+ logger.info(
318
+ f"OpenAIRateLimiter: Approaching token limit. Waiting for {time_to_wait:.2f} seconds to reset window."
319
+ )
320
+ await asyncio.sleep(time_to_wait)
321
+ # After waiting for the window to reset, reset counters
322
+ self.current_window_start_time = time.monotonic() # New window starts now
323
+ self.tokens_used_current_window = 0
324
+ logger.debug("OpenAIRateLimiter: Window reset after waiting.")
325
+
326
+ # If we are here, it's safe to proceed (or we've waited and reset)
327
+ # Add tokens for the current request
328
+ self.tokens_used_current_window += estimated_tokens_for_request
329
+ logger.debug(
330
+ f"OpenAIRateLimiter: Tokens used in current window: {self.tokens_used_current_window}/{self.tokens_per_minute_limit}"
331
+ )
332
+
333
+
334
+ # Global instance of the rate limiter
335
+ # This assumes a single rate limit bucket for all calls from this application instance.
336
+ # More sophisticated scenarios might need per-model or per-key limiters.
337
+ openai_rate_limiter = OpenAIRateLimiter() # Using default 60k TPM for now
ankigen/logging.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import sys
4
+ from datetime import datetime
5
+
6
+
7
+ def setup_logger(name="ankigen", log_level=logging.INFO):
8
+ """Set up and return a logger with file and console handlers"""
9
+ # Create logger
10
+ logger = logging.getLogger(name)
11
+ logger.setLevel(log_level)
12
+
13
+ # Remove existing handlers if any
14
+ # This ensures that if setup_logger is called multiple times for the same logger name,
15
+ # it doesn't accumulate handlers.
16
+ if logger.hasHandlers():
17
+ logger.handlers.clear()
18
+
19
+ # Create formatter
20
+ formatter = logging.Formatter(
21
+ "%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s"
22
+ )
23
+
24
+ # Create console handler
25
+ console_handler = logging.StreamHandler(sys.stdout)
26
+ console_handler.setFormatter(formatter)
27
+ logger.addHandler(console_handler)
28
+
29
+ # Create file handler
30
+ # Logs will be stored in ~/.ankigen/logs/
31
+ # A new log file is created each day (e.g., ankigen_20231027.log)
32
+ log_dir = os.path.join(os.path.expanduser("~"), ".ankigen", "logs")
33
+ os.makedirs(log_dir, exist_ok=True)
34
+
35
+ timestamp = datetime.now().strftime("%Y%m%d")
36
+ log_file = os.path.join(log_dir, f"{name}_{timestamp}.log")
37
+
38
+ file_handler = logging.FileHandler(log_file)
39
+ file_handler.setFormatter(formatter)
40
+ logger.addHandler(file_handler)
41
+
42
+ return logger
43
+
44
+
45
+ # Create a default logger instance for easy import and use.
46
+ # Projects can also create their own named loggers using setup_logger(name="my_module_logger")
47
+ logger = setup_logger()
ankigen/models.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Optional
3
+
4
+ # Module for Pydantic data models
5
+
6
+
7
+ class Step(BaseModel):
8
+ explanation: str
9
+ output: str
10
+
11
+
12
+ class Subtopics(BaseModel):
13
+ steps: List[Step]
14
+ result: List[str]
15
+
16
+
17
+ class Topics(BaseModel):
18
+ result: List[Subtopics]
19
+
20
+
21
+ class CardFront(BaseModel):
22
+ question: Optional[str] = None
23
+
24
+
25
+ class CardBack(BaseModel):
26
+ answer: Optional[str] = None
27
+ explanation: str
28
+ example: str
29
+
30
+
31
+ class Card(BaseModel):
32
+ front: CardFront
33
+ back: CardBack
34
+ metadata: Optional[dict] = None
35
+ card_type: str = "basic" # Add card_type, default to basic
36
+
37
+
38
+ class CardList(BaseModel):
39
+ topic: str
40
+ cards: List[Card]
41
+
42
+
43
+ class ConceptBreakdown(BaseModel):
44
+ main_concept: str
45
+ prerequisites: List[str]
46
+ learning_outcomes: List[str]
47
+ difficulty_level: str # "beginner", "intermediate", "advanced"
48
+
49
+
50
+ class CardGeneration(BaseModel):
51
+ concept: str
52
+ thought_process: str
53
+ verification_steps: List[str]
54
+ card: Card
55
+
56
+
57
+ class LearningSequence(BaseModel):
58
+ topic: str
59
+ concepts: List[ConceptBreakdown]
60
+ cards: List[CardGeneration]
61
+ suggested_study_order: List[str]
62
+ review_recommendations: List[str]
ankigen/ui_logic.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Module for functions that build or manage UI sections/logic
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+ from typing import List
6
+
7
+ from ankigen.utils import get_logger
8
+ from ankigen.models import Card
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ def update_mode_visibility(mode: str, current_subject: str):
14
+ """Updates visibility and values of UI elements based on generation mode.
15
+
16
+ Currently only 'subject' mode is supported. This function is kept for
17
+ future extensibility.
18
+ """
19
+ # Define standard columns for empty DataFrames
20
+ main_output_df_columns = [
21
+ "Index",
22
+ "Topic",
23
+ "Card_Type",
24
+ "Question",
25
+ "Answer",
26
+ "Explanation",
27
+ "Example",
28
+ "Prerequisites",
29
+ "Learning_Outcomes",
30
+ "Difficulty",
31
+ ]
32
+
33
+ return (
34
+ gr.update(visible=True), # subject_mode (Group) - always visible
35
+ gr.update(visible=True), # cards_output - always visible
36
+ gr.update(value=current_subject), # subject textbox value
37
+ gr.update(
38
+ value=pd.DataFrame(columns=main_output_df_columns)
39
+ ), # output DataFrame
40
+ gr.update(
41
+ value="<div><b>Total Cards Generated:</b> <span id='total-cards-count'>0</span></div>",
42
+ visible=False,
43
+ ), # total_cards_html
44
+ )
45
+
46
+
47
+ # --- Card Preview and Editing Utilities ---
48
+
49
+
50
+ def cards_to_dataframe(cards: List[Card]) -> pd.DataFrame:
51
+ """Converts a list of Card objects to a Pandas DataFrame for UI display."""
52
+ data_for_df = []
53
+ for i, card in enumerate(cards):
54
+ # Extract tags from metadata if they exist
55
+ tags_list = card.metadata.get("tags", []) if card.metadata else []
56
+ tags_str = ", ".join(tags_list) if tags_list else ""
57
+
58
+ # Topic from metadata or a default
59
+ topic_str = card.metadata.get("topic", "N/A") if card.metadata else "N/A"
60
+
61
+ data_for_df.append(
62
+ {
63
+ "ID": i + 1, # 1-indexed ID for display
64
+ "Topic": topic_str,
65
+ "Front": card.front.question,
66
+ "Back": card.back.answer,
67
+ "Tags": tags_str,
68
+ "Card Type": card.card_type or "Basic",
69
+ "Explanation": card.back.explanation or "",
70
+ "Example": card.back.example or "",
71
+ "Source_URL": card.metadata.get("source_url", "")
72
+ if card.metadata
73
+ else "",
74
+ }
75
+ )
76
+ # Define all columns explicitly for consistent DataFrame structure
77
+ df_columns = [
78
+ "ID",
79
+ "Topic",
80
+ "Front",
81
+ "Back",
82
+ "Tags",
83
+ "Card Type",
84
+ "Explanation",
85
+ "Example",
86
+ "Source_URL",
87
+ ]
88
+ df = pd.DataFrame(data_for_df, columns=df_columns)
89
+ return df
90
+
91
+
92
+ def dataframe_to_cards(df: pd.DataFrame, original_cards: List[Card]) -> List[Card]:
93
+ """
94
+ Updates a list of Card objects based on edits from a Pandas DataFrame.
95
+ Assumes the DataFrame 'ID' column corresponds to the 1-based index of original_cards.
96
+ """
97
+ updated_cards: List[Card] = []
98
+ if df.empty and not original_cards:
99
+ return []
100
+ if df.empty and original_cards:
101
+ return []
102
+
103
+ for index, row in df.iterrows():
104
+ try:
105
+ card_id = int(row["ID"]) # DataFrame ID is 1-indexed
106
+ original_card_index = card_id - 1
107
+
108
+ if 0 <= original_card_index < len(original_cards):
109
+ card_to_update = original_cards[original_card_index]
110
+
111
+ new_front = card_to_update.front.copy(
112
+ update={
113
+ "question": str(row.get("Front", card_to_update.front.question))
114
+ }
115
+ )
116
+ new_back = card_to_update.back.copy(
117
+ update={
118
+ "answer": str(row.get("Back", card_to_update.back.answer)),
119
+ "explanation": str(
120
+ row.get("Explanation", card_to_update.back.explanation)
121
+ ),
122
+ "example": str(row.get("Example", card_to_update.back.example)),
123
+ }
124
+ )
125
+
126
+ tags_str = str(
127
+ row.get(
128
+ "Tags",
129
+ ",".join(
130
+ card_to_update.metadata.get("tags", [])
131
+ if card_to_update.metadata
132
+ else []
133
+ ),
134
+ )
135
+ )
136
+ new_tags = [t.strip() for t in tags_str.split(",") if t.strip()]
137
+
138
+ new_metadata = (
139
+ card_to_update.metadata.copy() if card_to_update.metadata else {}
140
+ )
141
+ new_metadata["tags"] = new_tags
142
+ new_metadata["topic"] = str(
143
+ row.get("Topic", new_metadata.get("topic", "N/A"))
144
+ )
145
+
146
+ updated_card = card_to_update.copy(
147
+ update={
148
+ "front": new_front,
149
+ "back": new_back,
150
+ "card_type": str(
151
+ row.get("Card Type", card_to_update.card_type or "Basic")
152
+ ),
153
+ "metadata": new_metadata,
154
+ }
155
+ )
156
+ updated_cards.append(updated_card)
157
+ else:
158
+ logger.warning(
159
+ f"Card ID {card_id} from DataFrame is out of bounds for original_cards list."
160
+ )
161
+ except (ValueError, KeyError, AttributeError) as e:
162
+ logger.error(
163
+ f"Error processing row {index} from DataFrame: {row}. Error: {e}"
164
+ )
165
+ if 0 <= original_card_index < len(original_cards):
166
+ updated_cards.append(original_cards[original_card_index])
167
+ continue
168
+ return updated_cards
ankigen/utils.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Module for utility functions (logging, caching, web fetching)
2
+
3
+ import logging
4
+ from logging.handlers import RotatingFileHandler
5
+ import sys
6
+ import hashlib
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+ from typing import Any, Optional
10
+ import time
11
+
12
+ # --- Logging Setup ---
13
+ _logger_instance = None
14
+
15
+
16
+ def setup_logging() -> logging.Logger:
17
+ """Configure logging to both file and console"""
18
+ global _logger_instance
19
+ if _logger_instance:
20
+ return _logger_instance
21
+
22
+ logger = logging.getLogger("ankigen")
23
+ logger.setLevel(logging.DEBUG) # Keep debug level for the root logger
24
+
25
+ # Prevent duplicate handlers if called multiple times (though get_logger should prevent this)
26
+ if logger.hasHandlers():
27
+ logger.handlers.clear()
28
+
29
+ detailed_formatter = logging.Formatter(
30
+ "%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s"
31
+ )
32
+ simple_formatter = logging.Formatter("%(levelname)s: %(message)s")
33
+
34
+ file_handler = RotatingFileHandler(
35
+ "ankigen.log", maxBytes=1024 * 1024, backupCount=5
36
+ )
37
+ file_handler.setLevel(logging.DEBUG) # File handler logs everything from DEBUG up
38
+ file_handler.setFormatter(detailed_formatter)
39
+
40
+ console_handler = logging.StreamHandler(sys.stdout)
41
+ console_handler.setLevel(logging.INFO) # Console handler logs INFO and above
42
+ console_handler.setFormatter(simple_formatter)
43
+
44
+ logger.addHandler(file_handler)
45
+ logger.addHandler(console_handler)
46
+
47
+ _logger_instance = logger
48
+ return logger
49
+
50
+
51
+ def get_logger() -> logging.Logger:
52
+ """Returns the initialized logger instance."""
53
+ if _logger_instance is None:
54
+ return setup_logging()
55
+ return _logger_instance
56
+
57
+
58
+ # Initialize logger when module is loaded
59
+ logger = get_logger()
60
+
61
+
62
+ # --- Caching ---
63
+ class ResponseCache:
64
+ """Simple and efficient LRU cache for API responses with proper eviction."""
65
+
66
+ def __init__(self, maxsize: int = 128):
67
+ self.maxsize = maxsize
68
+ self._cache = {} # {key: response}
69
+ self._access_order = [] # Track access order for LRU eviction
70
+ self.hits = 0
71
+ self.misses = 0
72
+
73
+ def get(self, prompt: str, model: str) -> Optional[Any]:
74
+ """Retrieve item from cache, updating LRU order."""
75
+ cache_key = self._create_key(prompt, model)
76
+
77
+ if cache_key in self._cache:
78
+ # Move to end (most recently used)
79
+ self._access_order.remove(cache_key)
80
+ self._access_order.append(cache_key)
81
+ self.hits += 1
82
+ logger.debug(
83
+ f"Cache HIT: {cache_key[:16]}... (hits={self.hits}, misses={self.misses})"
84
+ )
85
+ return self._cache[cache_key]
86
+
87
+ self.misses += 1
88
+ logger.debug(
89
+ f"Cache MISS: {cache_key[:16]}... (hits={self.hits}, misses={self.misses})"
90
+ )
91
+ return None
92
+
93
+ def set(self, prompt: str, model: str, response: Any):
94
+ """Store item in cache with LRU eviction when full."""
95
+ cache_key = self._create_key(prompt, model)
96
+
97
+ # If key exists, update and move to end
98
+ if cache_key in self._cache:
99
+ self._access_order.remove(cache_key)
100
+ # If cache is full, evict least recently used
101
+ elif len(self._cache) >= self.maxsize:
102
+ evicted_key = self._access_order.pop(0)
103
+ del self._cache[evicted_key]
104
+ logger.debug(
105
+ f"Cache EVICT: {evicted_key[:16]}... (size={len(self._cache)})"
106
+ )
107
+
108
+ self._cache[cache_key] = response
109
+ self._access_order.append(cache_key)
110
+ logger.debug(f"Cache SET: {cache_key[:16]}... (size={len(self._cache)})")
111
+
112
+ def clear(self) -> None:
113
+ """Clear all cache entries and statistics."""
114
+ self._cache.clear()
115
+ self._access_order.clear()
116
+ self.hits = 0
117
+ self.misses = 0
118
+ logger.debug("Cache CLEARED")
119
+
120
+ def _create_key(self, prompt: str, model: str) -> str:
121
+ """Create cache key from prompt and model (MD5 hash for size efficiency)."""
122
+ # Hash to keep keys manageable size while maintaining uniqueness
123
+ return hashlib.md5(f"{model}:{prompt}".encode("utf-8")).hexdigest()
124
+
125
+
126
+ # --- Web Content Fetching ---
127
+ def fetch_webpage_text(url: str) -> str:
128
+ """Fetches and extracts main text content from a URL."""
129
+ logger_util = get_logger() # Use the logger from this module
130
+ try:
131
+ logger_util.info(f"Fetching content from URL: {url}")
132
+ headers = {
133
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
134
+ }
135
+ response = requests.get(url, headers=headers, timeout=15)
136
+ response.raise_for_status()
137
+
138
+ logger_util.debug(f"Parsing HTML content for {url}")
139
+ try:
140
+ soup = BeautifulSoup(response.text, "lxml")
141
+ except ImportError: # Keep existing fallback
142
+ logger_util.warning("lxml not found, using html.parser instead.")
143
+ soup = BeautifulSoup(response.text, "html.parser")
144
+ except Exception as e: # Catch other BeautifulSoup init errors
145
+ logger_util.error(
146
+ f"BeautifulSoup initialization failed for {url}: {e}", exc_info=True
147
+ )
148
+ raise RuntimeError(f"Failed to parse HTML content for {url}.")
149
+
150
+ for script_or_style in soup(["script", "style"]):
151
+ script_or_style.extract()
152
+
153
+ main_content = soup.find("main")
154
+ if not main_content:
155
+ main_content = soup.find("article")
156
+
157
+ if main_content:
158
+ text = main_content.get_text()
159
+ logger_util.debug(f"Extracted text from <{main_content.name}> tag.")
160
+ else:
161
+ body = soup.find("body")
162
+ if body:
163
+ text = body.get_text()
164
+ logger_util.debug("Extracted text from <body> tag (fallback).")
165
+ else:
166
+ text = ""
167
+ logger_util.warning(f"Could not find <body> tag in {url}")
168
+
169
+ # Simpler text cleaning: join stripped lines
170
+ lines = (line.strip() for line in text.splitlines())
171
+ cleaned_text = "\n".join(line for line in lines if line)
172
+
173
+ if not cleaned_text:
174
+ logger_util.warning(f"Could not extract meaningful text from {url}")
175
+ return ""
176
+
177
+ logger_util.info(
178
+ f"Successfully extracted text from {url} (Length: {len(cleaned_text)} chars)"
179
+ )
180
+ return cleaned_text
181
+
182
+ except requests.exceptions.RequestException as e:
183
+ logger_util.error(f"Network error fetching URL {url}: {e}", exc_info=True)
184
+ raise ConnectionError(f"Could not fetch URL: {e}")
185
+ except Exception as e:
186
+ logger_util.error(f"Error processing URL {url}: {e}", exc_info=True)
187
+ if isinstance(e, (ValueError, ConnectionError, RuntimeError)):
188
+ raise e
189
+ else:
190
+ raise RuntimeError(
191
+ f"An unexpected error occurred while processing the URL: {e}"
192
+ )
193
+
194
+
195
+ # --- New Synchronous RateLimiter Class ---
196
+ class RateLimiter:
197
+ """A simple synchronous rate limiter."""
198
+
199
+ def __init__(self, requests_per_second: float):
200
+ if requests_per_second <= 0:
201
+ raise ValueError("Requests per second must be positive.")
202
+ self.min_interval_seconds: float = 1.0 / requests_per_second
203
+ self.last_request_timestamp: float = 0.0
204
+ # Use a lock if this were to be used by multiple threads, but for now assuming single thread access per instance
205
+
206
+ def wait(self) -> None:
207
+ """Blocks until it's safe to make the next request."""
208
+ current_time = time.monotonic() # Use monotonic clock for intervals
209
+ time_since_last_request = current_time - self.last_request_timestamp
210
+
211
+ if time_since_last_request < self.min_interval_seconds:
212
+ wait_duration = self.min_interval_seconds - time_since_last_request
213
+ # logger.debug(f"RateLimiter waiting for {wait_duration:.3f} seconds.") # Optional: add logging
214
+ time.sleep(wait_duration)
215
+
216
+ self.last_request_timestamp = time.monotonic()
217
+
218
+
219
+ # --- Existing Utility Functions (if any) ---
220
+ # def some_other_util_function():
221
+ # pass
222
+
223
+
224
+ def strip_html_tags(text: str) -> str:
225
+ """Removes HTML tags from a string using a safe, non-regex approach."""
226
+ if not isinstance(text, str):
227
+ return str(text) # Ensure it's a string, or return as is if not coercible
228
+
229
+ # Use BeautifulSoup for safe HTML parsing
230
+ soup = BeautifulSoup(text, "html.parser")
231
+ return soup.get_text().strip()
app.py CHANGED
@@ -7,23 +7,23 @@ from datetime import datetime
7
  import gradio as gr
8
  import pandas as pd
9
 
10
- from ankigen_core.card_generator import (
11
  AVAILABLE_MODELS,
12
  orchestrate_card_generation,
13
  ) # GENERATION_MODES is internal to card_generator
14
- from ankigen_core.exporters import (
15
  export_dataframe_to_apkg,
16
  export_dataframe_to_csv,
17
  ) # Anki models (BASIC_MODEL, CLOZE_MODEL) are internal to exporters
18
- from ankigen_core.llm_interface import (
19
  OpenAIClientManager,
20
  ) # structured_output_completion is internal to core modules
21
- from ankigen_core.ui_logic import update_mode_visibility
22
- from ankigen_core.utils import (
23
  ResponseCache,
24
  get_logger,
25
  ) # fetch_webpage_text is used by card_generator
26
- from ankigen_core.auto_config import AutoConfigService
27
 
28
  # --- Initialization ---
29
  logger = get_logger()
@@ -341,7 +341,7 @@ def create_ankigen_interface(theme=None, css=None, js=None):
341
  label="Download Deck", visible=False
342
  )
343
 
344
- # --- Event Handlers --- (Updated to use functions from ankigen_core)
345
  generation_mode.change(
346
  fn=update_mode_visibility,
347
  inputs=[
 
7
  import gradio as gr
8
  import pandas as pd
9
 
10
+ from ankigen.card_generator import (
11
  AVAILABLE_MODELS,
12
  orchestrate_card_generation,
13
  ) # GENERATION_MODES is internal to card_generator
14
+ from ankigen.exporters import (
15
  export_dataframe_to_apkg,
16
  export_dataframe_to_csv,
17
  ) # Anki models (BASIC_MODEL, CLOZE_MODEL) are internal to exporters
18
+ from ankigen.llm_interface import (
19
  OpenAIClientManager,
20
  ) # structured_output_completion is internal to core modules
21
+ from ankigen.ui_logic import update_mode_visibility
22
+ from ankigen.utils import (
23
  ResponseCache,
24
  get_logger,
25
  ) # fetch_webpage_text is used by card_generator
26
+ from ankigen.auto_config import AutoConfigService
27
 
28
  # --- Initialization ---
29
  logger = get_logger()
 
341
  label="Download Deck", visible=False
342
  )
343
 
344
+ # --- Event Handlers --- (Updated to use functions from ankigen)
345
  generation_mode.change(
346
  fn=update_mode_visibility,
347
  inputs=[
pyproject.toml CHANGED
@@ -49,10 +49,13 @@ cli = [
49
  ]
50
 
51
  [project.scripts]
52
- ankigen = "ankigen_core.cli:main"
53
 
54
  [tool.setuptools]
55
  py-modules = ["app"]
56
 
 
 
 
57
  [tool.pytest.ini_options]
58
  anyio_backend = "asyncio"
 
49
  ]
50
 
51
  [project.scripts]
52
+ ankigen = "ankigen.cli:main"
53
 
54
  [tool.setuptools]
55
  py-modules = ["app"]
56
 
57
+ [tool.setuptools.packages.find]
58
+ include = ["ankigen*"]
59
+
60
  [tool.pytest.ini_options]
61
  anyio_backend = "asyncio"