Upload folder using huggingface_hub
Browse files- README.md +6 -6
- ankigen/__init__.py +1 -0
- ankigen/agents/.env.example +199 -0
- ankigen/agents/__init__.py +12 -0
- ankigen/agents/base.py +257 -0
- ankigen/agents/config.py +260 -0
- ankigen/agents/generators.py +405 -0
- ankigen/agents/integration.py +287 -0
- ankigen/agents/performance.py +475 -0
- ankigen/agents/schemas.py +189 -0
- ankigen/agents/security.py +397 -0
- ankigen/agents/templates/generators.j2 +16 -0
- ankigen/agents/token_tracker.py +221 -0
- ankigen/auto_config.py +211 -0
- ankigen/card_generator.py +311 -0
- ankigen/cli.py +372 -0
- ankigen/context7.py +339 -0
- ankigen/exceptions.py +86 -0
- ankigen/exporters.py +943 -0
- ankigen/llm_interface.py +337 -0
- ankigen/logging.py +47 -0
- ankigen/models.py +62 -0
- ankigen/ui_logic.py +168 -0
- ankigen/utils.py +231 -0
- app.py +7 -7
- pyproject.toml +4 -1
README.md
CHANGED
|
@@ -56,21 +56,21 @@ Generate flashcards directly from your terminal with intelligent auto-configurat
|
|
| 56 |
|
| 57 |
```bash
|
| 58 |
# Quick generation (auto-detects best settings)
|
| 59 |
-
uv run python -m
|
| 60 |
|
| 61 |
# Custom settings
|
| 62 |
-
uv run python -m
|
| 63 |
--topics 5 \
|
| 64 |
--cards-per-topic 8 \
|
| 65 |
--output hooks.apkg
|
| 66 |
|
| 67 |
# Export to CSV
|
| 68 |
-
uv run python -m
|
| 69 |
--format csv \
|
| 70 |
-o docker.csv
|
| 71 |
|
| 72 |
# Skip confirmation prompt
|
| 73 |
-
uv run python -m
|
| 74 |
```
|
| 75 |
|
| 76 |
**CLI Options:**
|
|
@@ -100,7 +100,7 @@ uv run python -m ankigen_core.cli -p "Python Lists" --no-confirm
|
|
| 100 |
## Project Structure
|
| 101 |
|
| 102 |
- `app.py`: Main Gradio web application
|
| 103 |
-
- `
|
| 104 |
- `cli.py`: Command-line interface
|
| 105 |
- `agents/`: Agent system implementation
|
| 106 |
- `card_generator.py`: Card generation orchestration
|
|
@@ -123,7 +123,7 @@ uv run python -m ankigen_core.cli -p "Python Lists" --no-confirm
|
|
| 123 |
|
| 124 |
3. Run with coverage:
|
| 125 |
```bash
|
| 126 |
-
uv run pytest --cov=
|
| 127 |
```
|
| 128 |
|
| 129 |
## License
|
|
|
|
| 56 |
|
| 57 |
```bash
|
| 58 |
# Quick generation (auto-detects best settings)
|
| 59 |
+
uv run python -m ankigen.cli -p "Basic SQL"
|
| 60 |
|
| 61 |
# Custom settings
|
| 62 |
+
uv run python -m ankigen.cli -p "React Hooks" \
|
| 63 |
--topics 5 \
|
| 64 |
--cards-per-topic 8 \
|
| 65 |
--output hooks.apkg
|
| 66 |
|
| 67 |
# Export to CSV
|
| 68 |
+
uv run python -m ankigen.cli -p "Docker basics" \
|
| 69 |
--format csv \
|
| 70 |
-o docker.csv
|
| 71 |
|
| 72 |
# Skip confirmation prompt
|
| 73 |
+
uv run python -m ankigen.cli -p "Python Lists" --no-confirm
|
| 74 |
```
|
| 75 |
|
| 76 |
**CLI Options:**
|
|
|
|
| 100 |
## Project Structure
|
| 101 |
|
| 102 |
- `app.py`: Main Gradio web application
|
| 103 |
+
- `ankigen/`: Core logic modules
|
| 104 |
- `cli.py`: Command-line interface
|
| 105 |
- `agents/`: Agent system implementation
|
| 106 |
- `card_generator.py`: Card generation orchestration
|
|
|
|
| 123 |
|
| 124 |
3. Run with coverage:
|
| 125 |
```bash
|
| 126 |
+
uv run pytest --cov=ankigen tests/
|
| 127 |
```
|
| 128 |
|
| 129 |
## License
|
ankigen/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# This file marks ankigen as a Python package
|
ankigen/agents/.env.example
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AnkiGen Agent System Configuration
|
| 2 |
+
# Copy this file to .env and modify as needed
|
| 3 |
+
|
| 4 |
+
# =====================================
|
| 5 |
+
# AGENT OPERATING MODE
|
| 6 |
+
# =====================================
|
| 7 |
+
|
| 8 |
+
# Main operating mode: legacy, agent_only, hybrid, a_b_test
|
| 9 |
+
ANKIGEN_AGENT_MODE=hybrid
|
| 10 |
+
|
| 11 |
+
# A/B testing configuration (only used when mode=a_b_test)
|
| 12 |
+
ANKIGEN_AB_TEST_RATIO=0.5
|
| 13 |
+
ANKIGEN_AB_TEST_USER_HASH=
|
| 14 |
+
|
| 15 |
+
# =====================================
|
| 16 |
+
# GENERATION AGENTS
|
| 17 |
+
# =====================================
|
| 18 |
+
|
| 19 |
+
# Subject Expert Agent - domain-specific card generation
|
| 20 |
+
ANKIGEN_ENABLE_SUBJECT_EXPERT=true
|
| 21 |
+
|
| 22 |
+
# Pedagogical Agent - educational effectiveness review
|
| 23 |
+
ANKIGEN_ENABLE_PEDAGOGICAL_AGENT=false
|
| 24 |
+
|
| 25 |
+
# Content Structuring Agent - formatting and organization
|
| 26 |
+
ANKIGEN_ENABLE_CONTENT_STRUCTURING=false
|
| 27 |
+
|
| 28 |
+
# Generation Coordinator - orchestrates multi-agent workflows
|
| 29 |
+
ANKIGEN_ENABLE_GENERATION_COORDINATOR=false
|
| 30 |
+
|
| 31 |
+
# =====================================
|
| 32 |
+
# JUDGE AGENTS
|
| 33 |
+
# =====================================
|
| 34 |
+
|
| 35 |
+
# Content Accuracy Judge - fact-checking and accuracy
|
| 36 |
+
ANKIGEN_ENABLE_CONTENT_JUDGE=true
|
| 37 |
+
|
| 38 |
+
# Pedagogical Judge - educational effectiveness
|
| 39 |
+
ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE=false
|
| 40 |
+
|
| 41 |
+
# Clarity Judge - communication and readability
|
| 42 |
+
ANKIGEN_ENABLE_CLARITY_JUDGE=false
|
| 43 |
+
|
| 44 |
+
# Technical Judge - code and technical content
|
| 45 |
+
ANKIGEN_ENABLE_TECHNICAL_JUDGE=false
|
| 46 |
+
|
| 47 |
+
# Completeness Judge - quality standards and completeness
|
| 48 |
+
ANKIGEN_ENABLE_COMPLETENESS_JUDGE=false
|
| 49 |
+
|
| 50 |
+
# Judge Coordinator - orchestrates multi-judge workflows
|
| 51 |
+
ANKIGEN_ENABLE_JUDGE_COORDINATOR=false
|
| 52 |
+
|
| 53 |
+
# =====================================
|
| 54 |
+
# ENHANCEMENT AGENTS
|
| 55 |
+
# =====================================
|
| 56 |
+
|
| 57 |
+
# Revision Agent - improves rejected cards
|
| 58 |
+
ANKIGEN_ENABLE_REVISION_AGENT=false
|
| 59 |
+
|
| 60 |
+
# Enhancement Agent - enriches content and metadata
|
| 61 |
+
ANKIGEN_ENABLE_ENHANCEMENT_AGENT=false
|
| 62 |
+
|
| 63 |
+
# =====================================
|
| 64 |
+
# WORKFLOW FEATURES
|
| 65 |
+
# =====================================
|
| 66 |
+
|
| 67 |
+
# Multi-agent generation workflows
|
| 68 |
+
ANKIGEN_ENABLE_MULTI_AGENT_GEN=false
|
| 69 |
+
|
| 70 |
+
# Parallel judge execution
|
| 71 |
+
ANKIGEN_ENABLE_PARALLEL_JUDGING=true
|
| 72 |
+
|
| 73 |
+
# Agent handoff capabilities
|
| 74 |
+
ANKIGEN_ENABLE_AGENT_HANDOFFS=false
|
| 75 |
+
|
| 76 |
+
# Agent tracing and debugging
|
| 77 |
+
ANKIGEN_ENABLE_AGENT_TRACING=true
|
| 78 |
+
|
| 79 |
+
# =====================================
|
| 80 |
+
# PERFORMANCE SETTINGS
|
| 81 |
+
# =====================================
|
| 82 |
+
|
| 83 |
+
# Agent execution timeout (seconds)
|
| 84 |
+
ANKIGEN_AGENT_TIMEOUT=30.0
|
| 85 |
+
|
| 86 |
+
# Maximum retry attempts for failed agents
|
| 87 |
+
ANKIGEN_MAX_AGENT_RETRIES=3
|
| 88 |
+
|
| 89 |
+
# Enable response caching for efficiency
|
| 90 |
+
ANKIGEN_ENABLE_AGENT_CACHING=true
|
| 91 |
+
|
| 92 |
+
# =====================================
|
| 93 |
+
# QUALITY CONTROL
|
| 94 |
+
# =====================================
|
| 95 |
+
|
| 96 |
+
# Minimum judge consensus for card approval (0.0-1.0)
|
| 97 |
+
ANKIGEN_MIN_JUDGE_CONSENSUS=0.6
|
| 98 |
+
|
| 99 |
+
# Maximum revision iterations for rejected cards
|
| 100 |
+
ANKIGEN_MAX_REVISION_ITERATIONS=3
|
| 101 |
+
|
| 102 |
+
# =====================================
|
| 103 |
+
# PRESET CONFIGURATIONS
|
| 104 |
+
# =====================================
|
| 105 |
+
|
| 106 |
+
# Uncomment one of these preset configurations:
|
| 107 |
+
|
| 108 |
+
# MINIMAL SETUP - Single subject expert + content judge
|
| 109 |
+
# ANKIGEN_AGENT_MODE=hybrid
|
| 110 |
+
# ANKIGEN_ENABLE_SUBJECT_EXPERT=true
|
| 111 |
+
# ANKIGEN_ENABLE_CONTENT_JUDGE=true
|
| 112 |
+
# ANKIGEN_ENABLE_AGENT_TRACING=true
|
| 113 |
+
|
| 114 |
+
# QUALITY FOCUSED - Full judge pipeline
|
| 115 |
+
# ANKIGEN_AGENT_MODE=hybrid
|
| 116 |
+
# ANKIGEN_ENABLE_SUBJECT_EXPERT=true
|
| 117 |
+
# ANKIGEN_ENABLE_CONTENT_JUDGE=true
|
| 118 |
+
# ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE=true
|
| 119 |
+
# ANKIGEN_ENABLE_CLARITY_JUDGE=true
|
| 120 |
+
# ANKIGEN_ENABLE_COMPLETENESS_JUDGE=true
|
| 121 |
+
# ANKIGEN_ENABLE_JUDGE_COORDINATOR=true
|
| 122 |
+
# ANKIGEN_ENABLE_PARALLEL_JUDGING=true
|
| 123 |
+
# ANKIGEN_MIN_JUDGE_CONSENSUS=0.7
|
| 124 |
+
|
| 125 |
+
# FULL PIPELINE - All agents enabled
|
| 126 |
+
# ANKIGEN_AGENT_MODE=agent_only
|
| 127 |
+
# ANKIGEN_ENABLE_SUBJECT_EXPERT=true
|
| 128 |
+
# ANKIGEN_ENABLE_PEDAGOGICAL_AGENT=true
|
| 129 |
+
# ANKIGEN_ENABLE_CONTENT_STRUCTURING=true
|
| 130 |
+
# ANKIGEN_ENABLE_GENERATION_COORDINATOR=true
|
| 131 |
+
# ANKIGEN_ENABLE_CONTENT_JUDGE=true
|
| 132 |
+
# ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE=true
|
| 133 |
+
# ANKIGEN_ENABLE_CLARITY_JUDGE=true
|
| 134 |
+
# ANKIGEN_ENABLE_TECHNICAL_JUDGE=true
|
| 135 |
+
# ANKIGEN_ENABLE_COMPLETENESS_JUDGE=true
|
| 136 |
+
# ANKIGEN_ENABLE_JUDGE_COORDINATOR=true
|
| 137 |
+
# ANKIGEN_ENABLE_REVISION_AGENT=true
|
| 138 |
+
# ANKIGEN_ENABLE_ENHANCEMENT_AGENT=true
|
| 139 |
+
# ANKIGEN_ENABLE_PARALLEL_JUDGING=true
|
| 140 |
+
# ANKIGEN_ENABLE_AGENT_HANDOFFS=true
|
| 141 |
+
|
| 142 |
+
# A/B TESTING SETUP - Compare agents vs legacy
|
| 143 |
+
# ANKIGEN_AGENT_MODE=a_b_test
|
| 144 |
+
# ANKIGEN_AB_TEST_RATIO=0.5
|
| 145 |
+
# ANKIGEN_ENABLE_SUBJECT_EXPERT=true
|
| 146 |
+
# ANKIGEN_ENABLE_CONTENT_JUDGE=true
|
| 147 |
+
# ANKIGEN_ENABLE_AGENT_TRACING=true
|
| 148 |
+
|
| 149 |
+
# =====================================
|
| 150 |
+
# MONITORING & DEBUGGING
|
| 151 |
+
# =====================================
|
| 152 |
+
|
| 153 |
+
# Agent metrics persistence directory
|
| 154 |
+
# ANKIGEN_METRICS_DIR=metrics/agents
|
| 155 |
+
|
| 156 |
+
# Agent configuration directory
|
| 157 |
+
# ANKIGEN_CONFIG_DIR=config/agents
|
| 158 |
+
|
| 159 |
+
# Enable detailed debug logging
|
| 160 |
+
# ANKIGEN_DEBUG_MODE=false
|
| 161 |
+
|
| 162 |
+
# =====================================
|
| 163 |
+
# COST OPTIMIZATION
|
| 164 |
+
# =====================================
|
| 165 |
+
|
| 166 |
+
# Model preferences for different agent types
|
| 167 |
+
# ANKIGEN_GENERATION_MODEL=gpt-4o
|
| 168 |
+
# ANKIGEN_JUDGE_MODEL=gpt-4o-mini
|
| 169 |
+
# ANKIGEN_CRITICAL_JUDGE_MODEL=gpt-4o
|
| 170 |
+
|
| 171 |
+
# Token usage limits per request
|
| 172 |
+
# ANKIGEN_MAX_INPUT_TOKENS=4000
|
| 173 |
+
# ANKIGEN_MAX_OUTPUT_TOKENS=2000
|
| 174 |
+
|
| 175 |
+
# =====================================
|
| 176 |
+
# NOTES
|
| 177 |
+
# =====================================
|
| 178 |
+
|
| 179 |
+
# Performance Impact:
|
| 180 |
+
# - Each enabled agent adds processing time and cost
|
| 181 |
+
# - Parallel judging reduces latency but increases concurrent API calls
|
| 182 |
+
# - Caching significantly improves performance for similar requests
|
| 183 |
+
|
| 184 |
+
# Quality vs Speed:
|
| 185 |
+
# - More judges = better quality but slower generation
|
| 186 |
+
# - Agent coordination adds overhead but improves consistency
|
| 187 |
+
# - Enhancement agents provide best quality but highest cost
|
| 188 |
+
|
| 189 |
+
# Recommended Starting Configuration:
|
| 190 |
+
# 1. Start with hybrid mode + subject expert + content judge
|
| 191 |
+
# 2. Enable A/B testing to compare with legacy system
|
| 192 |
+
# 3. Gradually add more agents based on quality needs
|
| 193 |
+
# 4. Monitor metrics and adjust consensus thresholds
|
| 194 |
+
|
| 195 |
+
# Cost Considerations:
|
| 196 |
+
# - Subject Expert: ~2-3x cost of legacy (higher quality)
|
| 197 |
+
# - Judge Pipeline: ~1.5-2x additional cost (significant quality improvement)
|
| 198 |
+
# - Enhancement Pipeline: ~1.2-1.5x additional cost (marginal improvement)
|
| 199 |
+
# - Full pipeline: ~4-6x cost of legacy (maximum quality)
|
ankigen/agents/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Agent system for AnkiGen agentic workflows
|
| 2 |
+
|
| 3 |
+
from .base import BaseAgentWrapper, AgentConfig
|
| 4 |
+
from .generators import SubjectExpertAgent
|
| 5 |
+
from .config import AgentConfigManager
|
| 6 |
+
|
| 7 |
+
__all__ = [
|
| 8 |
+
"BaseAgentWrapper",
|
| 9 |
+
"AgentConfig",
|
| 10 |
+
"SubjectExpertAgent",
|
| 11 |
+
"AgentConfigManager",
|
| 12 |
+
]
|
ankigen/agents/base.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Base agent wrapper and configuration classes
|
| 2 |
+
|
| 3 |
+
from typing import Dict, Any, Optional, List
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
import asyncio
|
| 7 |
+
import json
|
| 8 |
+
from openai import AsyncOpenAI
|
| 9 |
+
from agents import Agent, Runner, ModelSettings
|
| 10 |
+
|
| 11 |
+
from ankigen.logging import logger
|
| 12 |
+
from .token_tracker import track_usage_from_agents_sdk
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def parse_agent_json_response(response: Any) -> Dict[str, Any]:
|
| 16 |
+
"""Parse agent response, handling markdown code blocks if present"""
|
| 17 |
+
if isinstance(response, str):
|
| 18 |
+
# Strip markdown code blocks
|
| 19 |
+
response = response.strip()
|
| 20 |
+
if response.startswith("```json"):
|
| 21 |
+
response = response[7:] # Remove ```json
|
| 22 |
+
if response.startswith("```"):
|
| 23 |
+
response = response[3:] # Remove ```
|
| 24 |
+
if response.endswith("```"):
|
| 25 |
+
response = response[:-3] # Remove trailing ```
|
| 26 |
+
response = response.strip()
|
| 27 |
+
|
| 28 |
+
return json.loads(response)
|
| 29 |
+
else:
|
| 30 |
+
return response
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@dataclass
|
| 34 |
+
class AgentConfig:
|
| 35 |
+
"""Configuration for individual agents"""
|
| 36 |
+
|
| 37 |
+
name: str
|
| 38 |
+
instructions: str
|
| 39 |
+
model: str = "gpt-5.2"
|
| 40 |
+
reasoning_effort: Optional[str] = None
|
| 41 |
+
temperature: float = 0.7
|
| 42 |
+
max_tokens: Optional[int] = None
|
| 43 |
+
timeout: float = 30.0
|
| 44 |
+
retry_attempts: int = 3
|
| 45 |
+
enable_tracing: bool = True
|
| 46 |
+
custom_prompts: Optional[Dict[str, str]] = None
|
| 47 |
+
output_type: Optional[type] = None # For structured outputs
|
| 48 |
+
|
| 49 |
+
def __post_init__(self):
|
| 50 |
+
if self.custom_prompts is None:
|
| 51 |
+
self.custom_prompts = {}
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class BaseAgentWrapper:
|
| 55 |
+
"""Base wrapper for OpenAI Agents SDK integration"""
|
| 56 |
+
|
| 57 |
+
def __init__(self, config: AgentConfig, openai_client: AsyncOpenAI):
|
| 58 |
+
self.config = config
|
| 59 |
+
self.openai_client = openai_client
|
| 60 |
+
self.agent = None
|
| 61 |
+
self.runner = None
|
| 62 |
+
|
| 63 |
+
async def initialize(self):
|
| 64 |
+
"""Initialize the OpenAI agent with structured output support"""
|
| 65 |
+
try:
|
| 66 |
+
# Set the default OpenAI client for the agents SDK
|
| 67 |
+
from agents import set_default_openai_client
|
| 68 |
+
|
| 69 |
+
set_default_openai_client(self.openai_client, use_for_tracing=False)
|
| 70 |
+
|
| 71 |
+
# Create model settings with temperature and optional reasoning effort
|
| 72 |
+
model_settings_kwargs = {"temperature": self.config.temperature}
|
| 73 |
+
effort = self.config.reasoning_effort
|
| 74 |
+
if effort in ("auto", "", None):
|
| 75 |
+
effort = None
|
| 76 |
+
|
| 77 |
+
# GPT-5.x (not chat-latest) supports reasoning_effort
|
| 78 |
+
if (
|
| 79 |
+
effort
|
| 80 |
+
and self.config.model.startswith("gpt-5")
|
| 81 |
+
and "chat-latest" not in self.config.model
|
| 82 |
+
):
|
| 83 |
+
from openai.types.shared import Reasoning
|
| 84 |
+
|
| 85 |
+
model_settings_kwargs["reasoning"] = Reasoning(effort=effort)
|
| 86 |
+
|
| 87 |
+
model_settings = ModelSettings(**model_settings_kwargs)
|
| 88 |
+
|
| 89 |
+
# Use clean instructions without JSON formatting hacks
|
| 90 |
+
clean_instructions = self.config.instructions
|
| 91 |
+
|
| 92 |
+
# Create agent with structured output if output_type is provided
|
| 93 |
+
if self.config.output_type:
|
| 94 |
+
self.agent = Agent(
|
| 95 |
+
name=self.config.name,
|
| 96 |
+
instructions=clean_instructions,
|
| 97 |
+
model=self.config.model,
|
| 98 |
+
model_settings=model_settings,
|
| 99 |
+
output_type=self.config.output_type,
|
| 100 |
+
)
|
| 101 |
+
logger.info(
|
| 102 |
+
f"Initialized agent with structured output: {self.config.name} -> {self.config.output_type}"
|
| 103 |
+
)
|
| 104 |
+
else:
|
| 105 |
+
self.agent = Agent(
|
| 106 |
+
name=self.config.name,
|
| 107 |
+
instructions=clean_instructions,
|
| 108 |
+
model=self.config.model,
|
| 109 |
+
model_settings=model_settings,
|
| 110 |
+
)
|
| 111 |
+
logger.info(
|
| 112 |
+
f"Initialized agent (no structured output): {self.config.name}"
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
except Exception as e:
|
| 116 |
+
logger.error(f"Failed to initialize agent {self.config.name}: {e}")
|
| 117 |
+
raise
|
| 118 |
+
|
| 119 |
+
def _enhance_input_with_context(
|
| 120 |
+
self, user_input: str, context: Optional[Dict[str, Any]]
|
| 121 |
+
) -> str:
|
| 122 |
+
"""Add context to user input if provided."""
|
| 123 |
+
if context is None:
|
| 124 |
+
return user_input
|
| 125 |
+
context_str = "\n".join([f"{k}: {v}" for k, v in context.items()])
|
| 126 |
+
return f"{user_input}\n\nContext:\n{context_str}"
|
| 127 |
+
|
| 128 |
+
async def _execute_with_retry(self, enhanced_input: str) -> Any:
|
| 129 |
+
"""Execute agent with retry logic on timeout."""
|
| 130 |
+
for attempt in range(self.config.retry_attempts):
|
| 131 |
+
try:
|
| 132 |
+
result = await asyncio.wait_for(
|
| 133 |
+
Runner.run(
|
| 134 |
+
starting_agent=self.agent,
|
| 135 |
+
input=enhanced_input,
|
| 136 |
+
),
|
| 137 |
+
timeout=self.config.timeout,
|
| 138 |
+
)
|
| 139 |
+
return result
|
| 140 |
+
except asyncio.TimeoutError:
|
| 141 |
+
if attempt < self.config.retry_attempts - 1:
|
| 142 |
+
logger.warning(
|
| 143 |
+
f"Agent {self.config.name} timed out "
|
| 144 |
+
f"(attempt {attempt + 1}/{self.config.retry_attempts}), retrying..."
|
| 145 |
+
)
|
| 146 |
+
continue
|
| 147 |
+
logger.error(
|
| 148 |
+
f"Agent {self.config.name} timed out after {self.config.retry_attempts} attempts"
|
| 149 |
+
)
|
| 150 |
+
raise
|
| 151 |
+
raise RuntimeError("Retry loop exited without result")
|
| 152 |
+
|
| 153 |
+
def _extract_and_track_usage(self, result: Any) -> Dict[str, Any]:
|
| 154 |
+
"""Extract usage info from result and track it."""
|
| 155 |
+
total_usage = {
|
| 156 |
+
"input_tokens": 0,
|
| 157 |
+
"output_tokens": 0,
|
| 158 |
+
"total_tokens": 0,
|
| 159 |
+
"requests": 0,
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
if hasattr(result, "raw_responses") and result.raw_responses:
|
| 163 |
+
for response in result.raw_responses:
|
| 164 |
+
if hasattr(response, "usage") and response.usage:
|
| 165 |
+
total_usage["input_tokens"] += response.usage.input_tokens
|
| 166 |
+
total_usage["output_tokens"] += response.usage.output_tokens
|
| 167 |
+
total_usage["total_tokens"] += response.usage.total_tokens
|
| 168 |
+
total_usage["requests"] += response.usage.requests
|
| 169 |
+
|
| 170 |
+
track_usage_from_agents_sdk(total_usage, self.config.model)
|
| 171 |
+
logger.info(f"Agent usage: {total_usage}")
|
| 172 |
+
|
| 173 |
+
return total_usage
|
| 174 |
+
|
| 175 |
+
def _extract_output(self, result: Any) -> Any:
|
| 176 |
+
"""Extract final output from agent result."""
|
| 177 |
+
if not (hasattr(result, "new_items") and result.new_items):
|
| 178 |
+
return str(result)
|
| 179 |
+
|
| 180 |
+
from agents.items import ItemHelpers
|
| 181 |
+
|
| 182 |
+
text_output = ItemHelpers.text_message_outputs(result.new_items)
|
| 183 |
+
|
| 184 |
+
if self.config.output_type and self.config.output_type is not str:
|
| 185 |
+
logger.info(
|
| 186 |
+
f"Structured output: {type(text_output)} -> {self.config.output_type}"
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
return text_output
|
| 190 |
+
|
| 191 |
+
async def execute(
|
| 192 |
+
self, user_input: str, context: Optional[Dict[str, Any]] = None
|
| 193 |
+
) -> tuple[Any, Dict[str, Any]]:
|
| 194 |
+
"""Execute the agent with user input and optional context."""
|
| 195 |
+
if not self.agent:
|
| 196 |
+
await self.initialize()
|
| 197 |
+
|
| 198 |
+
if self.agent is None:
|
| 199 |
+
raise ValueError("Agent not initialized")
|
| 200 |
+
|
| 201 |
+
enhanced_input = self._enhance_input_with_context(user_input, context)
|
| 202 |
+
|
| 203 |
+
logger.info(f"Executing agent: {self.config.name}")
|
| 204 |
+
logger.info(f"Input: {enhanced_input[:200]}...")
|
| 205 |
+
|
| 206 |
+
import time
|
| 207 |
+
|
| 208 |
+
start_time = time.time()
|
| 209 |
+
|
| 210 |
+
try:
|
| 211 |
+
result = await self._execute_with_retry(enhanced_input)
|
| 212 |
+
execution_time = time.time() - start_time
|
| 213 |
+
logger.info(f"Agent {self.config.name} executed in {execution_time:.2f}s")
|
| 214 |
+
|
| 215 |
+
total_usage = self._extract_and_track_usage(result)
|
| 216 |
+
output = self._extract_output(result)
|
| 217 |
+
|
| 218 |
+
return output, total_usage
|
| 219 |
+
|
| 220 |
+
except asyncio.TimeoutError:
|
| 221 |
+
logger.error(
|
| 222 |
+
f"Agent {self.config.name} timed out after {self.config.timeout}s"
|
| 223 |
+
)
|
| 224 |
+
raise
|
| 225 |
+
except Exception as e:
|
| 226 |
+
logger.error(f"Agent {self.config.name} execution failed: {e}")
|
| 227 |
+
raise
|
| 228 |
+
|
| 229 |
+
async def handoff_to(
|
| 230 |
+
self, target_agent: "BaseAgentWrapper", context: Dict[str, Any]
|
| 231 |
+
) -> Any:
|
| 232 |
+
"""Hand off execution to another agent with context"""
|
| 233 |
+
logger.info(
|
| 234 |
+
f"Handing off from {self.config.name} to {target_agent.config.name}"
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
# Prepare handoff context
|
| 238 |
+
handoff_context = {
|
| 239 |
+
"from_agent": self.config.name,
|
| 240 |
+
"handoff_reason": context.get("reason", "Standard workflow handoff"),
|
| 241 |
+
**context,
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
# Execute the target agent
|
| 245 |
+
return await target_agent.execute(
|
| 246 |
+
context.get("user_input", "Continue processing"), handoff_context
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
class AgentResponse(BaseModel):
|
| 251 |
+
"""Standard response format for agents"""
|
| 252 |
+
|
| 253 |
+
success: bool
|
| 254 |
+
data: Any
|
| 255 |
+
agent_name: str
|
| 256 |
+
metadata: Dict[str, Any] = {}
|
| 257 |
+
errors: List[str] = []
|
ankigen/agents/config.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Agent configuration management system
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from typing import Dict, Any, Optional, List
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from dataclasses import dataclass, asdict
|
| 7 |
+
from jinja2 import Environment, FileSystemLoader
|
| 8 |
+
|
| 9 |
+
from ankigen.logging import logger
|
| 10 |
+
from .base import AgentConfig
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@dataclass
|
| 14 |
+
class AgentPromptTemplate:
|
| 15 |
+
"""Template for agent prompts with variables"""
|
| 16 |
+
|
| 17 |
+
system_prompt: str
|
| 18 |
+
user_prompt_template: str
|
| 19 |
+
variables: Optional[Dict[str, str]] = None
|
| 20 |
+
|
| 21 |
+
def __post_init__(self):
|
| 22 |
+
if self.variables is None:
|
| 23 |
+
self.variables = {}
|
| 24 |
+
|
| 25 |
+
def render_system_prompt(self, **kwargs) -> str:
|
| 26 |
+
"""Render system prompt with provided variables"""
|
| 27 |
+
try:
|
| 28 |
+
variables = self.variables or {}
|
| 29 |
+
return self.system_prompt.format(**{**variables, **kwargs})
|
| 30 |
+
except KeyError as e:
|
| 31 |
+
logger.error(f"Missing variable in system prompt template: {e}")
|
| 32 |
+
return self.system_prompt
|
| 33 |
+
|
| 34 |
+
def render_user_prompt(self, **kwargs) -> str:
|
| 35 |
+
"""Render user prompt template with provided variables"""
|
| 36 |
+
try:
|
| 37 |
+
variables = self.variables or {}
|
| 38 |
+
return self.user_prompt_template.format(**{**variables, **kwargs})
|
| 39 |
+
except KeyError as e:
|
| 40 |
+
logger.error(f"Missing variable in user prompt template: {e}")
|
| 41 |
+
return self.user_prompt_template
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class AgentConfigManager:
|
| 45 |
+
"""Manages agent configurations using Jinja templates and runtime updates"""
|
| 46 |
+
|
| 47 |
+
def __init__(
|
| 48 |
+
self,
|
| 49 |
+
model_overrides: Optional[Dict[str, str]] = None,
|
| 50 |
+
template_vars: Optional[Dict[str, Any]] = None,
|
| 51 |
+
):
|
| 52 |
+
self.model_overrides = model_overrides or {}
|
| 53 |
+
self.template_vars = template_vars or {}
|
| 54 |
+
self.configs: Dict[str, AgentConfig] = {}
|
| 55 |
+
self.prompt_templates: Dict[str, AgentPromptTemplate] = {}
|
| 56 |
+
|
| 57 |
+
template_dir = Path(__file__).parent / "templates"
|
| 58 |
+
self.jinja_env = Environment(loader=FileSystemLoader(template_dir))
|
| 59 |
+
self._load_default_configs()
|
| 60 |
+
|
| 61 |
+
def update_models(self, model_overrides: Dict[str, str]):
|
| 62 |
+
"""Update model selections and regenerate configs"""
|
| 63 |
+
self.model_overrides = model_overrides
|
| 64 |
+
self._load_default_configs()
|
| 65 |
+
logger.info(f"Updated model overrides: {model_overrides}")
|
| 66 |
+
|
| 67 |
+
def update_template_vars(self, template_vars: Dict[str, Any]):
|
| 68 |
+
logger.info(
|
| 69 |
+
"Template vars are no longer used in the simplified agent pipeline."
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
def _load_default_configs(self):
|
| 73 |
+
"""Load all default configurations from Jinja templates"""
|
| 74 |
+
try:
|
| 75 |
+
self._load_configs_from_template("generators.j2")
|
| 76 |
+
self.prompt_templates.clear()
|
| 77 |
+
logger.info(
|
| 78 |
+
f"Loaded {len(self.configs)} agent configurations from Jinja templates"
|
| 79 |
+
)
|
| 80 |
+
except Exception as e:
|
| 81 |
+
logger.error(f"Failed to load agent configurations from templates: {e}")
|
| 82 |
+
|
| 83 |
+
def _get_model_for_agent(self, agent_name: str, default_model: str) -> str:
|
| 84 |
+
"""Get model for agent, using override if available"""
|
| 85 |
+
return self.model_overrides.get(agent_name, default_model)
|
| 86 |
+
|
| 87 |
+
def _load_configs_from_template(self, template_name: str):
|
| 88 |
+
"""Load agent configurations from a Jinja template"""
|
| 89 |
+
try:
|
| 90 |
+
template = self.jinja_env.get_template(template_name)
|
| 91 |
+
|
| 92 |
+
# Default models for each agent type
|
| 93 |
+
default_models = {
|
| 94 |
+
"subject_expert_model": "gpt-5.2",
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
# Simple mapping: agent_name -> agent_name_model
|
| 98 |
+
model_vars = {}
|
| 99 |
+
for agent_name, model in self.model_overrides.items():
|
| 100 |
+
model_vars[f"{agent_name}_model"] = model
|
| 101 |
+
|
| 102 |
+
# Merge all template variables with defaults
|
| 103 |
+
render_vars = {**default_models, **self.template_vars, **model_vars}
|
| 104 |
+
|
| 105 |
+
logger.info(f"Rendering template {template_name} with vars: {render_vars}")
|
| 106 |
+
rendered_json = template.render(**render_vars)
|
| 107 |
+
config_data = json.loads(rendered_json)
|
| 108 |
+
|
| 109 |
+
# Create AgentConfig objects from the rendered data
|
| 110 |
+
for agent_name, agent_data in config_data.items():
|
| 111 |
+
config = AgentConfig(
|
| 112 |
+
name=agent_data.get("name", agent_name),
|
| 113 |
+
instructions=agent_data.get("instructions", ""),
|
| 114 |
+
model=agent_data.get("model", "gpt-5.2"),
|
| 115 |
+
reasoning_effort=agent_data.get("reasoning_effort"),
|
| 116 |
+
temperature=agent_data.get("temperature", 0.7),
|
| 117 |
+
max_tokens=agent_data.get("max_tokens"),
|
| 118 |
+
timeout=agent_data.get("timeout", 30.0),
|
| 119 |
+
retry_attempts=agent_data.get("retry_attempts", 3),
|
| 120 |
+
enable_tracing=agent_data.get("enable_tracing", True),
|
| 121 |
+
custom_prompts=agent_data.get("custom_prompts", {}),
|
| 122 |
+
)
|
| 123 |
+
self.configs[agent_name] = config
|
| 124 |
+
logger.info(f"Loaded config for {agent_name}: model={config.model}")
|
| 125 |
+
|
| 126 |
+
except Exception as e:
|
| 127 |
+
logger.error(f"Failed to load configs from template {template_name}: {e}")
|
| 128 |
+
|
| 129 |
+
def get_agent_config(self, agent_name: str) -> Optional[AgentConfig]:
|
| 130 |
+
"""Get configuration for a specific agent"""
|
| 131 |
+
return self.configs.get(agent_name)
|
| 132 |
+
|
| 133 |
+
def get_config(self, agent_name: str) -> Optional[AgentConfig]:
|
| 134 |
+
"""Alias for get_agent_config for compatibility"""
|
| 135 |
+
return self.get_agent_config(agent_name)
|
| 136 |
+
|
| 137 |
+
def get_prompt_template(self, template_name: str) -> Optional[AgentPromptTemplate]:
|
| 138 |
+
"""Get a prompt template by name"""
|
| 139 |
+
return self.prompt_templates.get(template_name)
|
| 140 |
+
|
| 141 |
+
def update_agent_config(self, agent_name: str, **kwargs):
|
| 142 |
+
"""Update an agent's configuration at runtime"""
|
| 143 |
+
if agent_name in self.configs:
|
| 144 |
+
config = self.configs[agent_name]
|
| 145 |
+
for key, value in kwargs.items():
|
| 146 |
+
if hasattr(config, key):
|
| 147 |
+
setattr(config, key, value)
|
| 148 |
+
logger.info(f"Updated {agent_name} config: {key} = {value}")
|
| 149 |
+
|
| 150 |
+
def update_config(
|
| 151 |
+
self, agent_name: str, updates: Dict[str, Any]
|
| 152 |
+
) -> Optional[AgentConfig]:
|
| 153 |
+
"""Update agent configuration with a dictionary of updates"""
|
| 154 |
+
if agent_name not in self.configs:
|
| 155 |
+
return None
|
| 156 |
+
|
| 157 |
+
config = self.configs[agent_name]
|
| 158 |
+
for key, value in updates.items():
|
| 159 |
+
if hasattr(config, key):
|
| 160 |
+
setattr(config, key, value)
|
| 161 |
+
|
| 162 |
+
return config
|
| 163 |
+
|
| 164 |
+
def list_configs(self) -> List[str]:
|
| 165 |
+
"""List all agent configuration names"""
|
| 166 |
+
return list(self.configs.keys())
|
| 167 |
+
|
| 168 |
+
def list_prompt_templates(self) -> List[str]:
|
| 169 |
+
"""List all prompt template names"""
|
| 170 |
+
return list(self.prompt_templates.keys())
|
| 171 |
+
|
| 172 |
+
def load_config_from_dict(self, config_dict: Dict[str, Any]):
|
| 173 |
+
"""Load configuration from a dictionary"""
|
| 174 |
+
# Load agent configs
|
| 175 |
+
if "agents" in config_dict:
|
| 176 |
+
for agent_name, agent_data in config_dict["agents"].items():
|
| 177 |
+
config = AgentConfig(
|
| 178 |
+
name=agent_name,
|
| 179 |
+
instructions=agent_data.get("instructions", ""),
|
| 180 |
+
model=agent_data.get("model", "gpt-5.2"),
|
| 181 |
+
reasoning_effort=agent_data.get("reasoning_effort"),
|
| 182 |
+
temperature=agent_data.get("temperature", 0.7),
|
| 183 |
+
max_tokens=agent_data.get("max_tokens"),
|
| 184 |
+
timeout=agent_data.get("timeout", 30.0),
|
| 185 |
+
retry_attempts=agent_data.get("retry_attempts", 3),
|
| 186 |
+
enable_tracing=agent_data.get("enable_tracing", True),
|
| 187 |
+
custom_prompts=agent_data.get("custom_prompts", {}),
|
| 188 |
+
)
|
| 189 |
+
self.configs[agent_name] = config
|
| 190 |
+
|
| 191 |
+
# Load prompt templates
|
| 192 |
+
if "prompt_templates" in config_dict:
|
| 193 |
+
for template_name, template_data in config_dict["prompt_templates"].items():
|
| 194 |
+
template = AgentPromptTemplate(
|
| 195 |
+
system_prompt=template_data.get("system_prompt", ""),
|
| 196 |
+
user_prompt_template=template_data.get("user_prompt_template", ""),
|
| 197 |
+
variables=template_data.get("variables", {}),
|
| 198 |
+
)
|
| 199 |
+
self.prompt_templates[template_name] = template
|
| 200 |
+
|
| 201 |
+
def _validate_config(self, config_data: Dict[str, Any]) -> bool:
|
| 202 |
+
"""Validate agent configuration data"""
|
| 203 |
+
# Check required fields
|
| 204 |
+
if "name" not in config_data or "instructions" not in config_data:
|
| 205 |
+
return False
|
| 206 |
+
|
| 207 |
+
# Check temperature range
|
| 208 |
+
temperature = config_data.get("temperature", 0.7)
|
| 209 |
+
if not 0.0 <= temperature <= 2.0:
|
| 210 |
+
return False
|
| 211 |
+
|
| 212 |
+
# Check timeout is positive
|
| 213 |
+
timeout = config_data.get("timeout", 30.0)
|
| 214 |
+
if timeout <= 0:
|
| 215 |
+
return False
|
| 216 |
+
|
| 217 |
+
return True
|
| 218 |
+
|
| 219 |
+
def save_config_to_file(self, filename: str, agents: List[str] = None):
|
| 220 |
+
"""Save current configurations to a file"""
|
| 221 |
+
# Prepare data structure
|
| 222 |
+
data = {"agents": {}, "prompt_templates": {}}
|
| 223 |
+
|
| 224 |
+
# Add agent configs
|
| 225 |
+
agents_to_save = agents if agents else list(self.configs.keys())
|
| 226 |
+
for agent_name in agents_to_save:
|
| 227 |
+
if agent_name in self.configs:
|
| 228 |
+
config = self.configs[agent_name]
|
| 229 |
+
data["agents"][agent_name] = asdict(config)
|
| 230 |
+
|
| 231 |
+
# Add prompt templates
|
| 232 |
+
for template_name, template in self.prompt_templates.items():
|
| 233 |
+
data["prompt_templates"][template_name] = asdict(template)
|
| 234 |
+
|
| 235 |
+
try:
|
| 236 |
+
with open(filename, "w") as f:
|
| 237 |
+
json.dump(data, f, indent=2)
|
| 238 |
+
logger.info(f"Saved agent configurations to {filename}")
|
| 239 |
+
except Exception as e:
|
| 240 |
+
logger.error(f"Failed to save agent config to {filename}: {e}")
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
# Global config manager instance
|
| 244 |
+
_global_config_manager: Optional[AgentConfigManager] = None
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def get_config_manager(
|
| 248 |
+
model_overrides: Optional[Dict[str, str]] = None,
|
| 249 |
+
template_vars: Optional[Dict[str, Any]] = None,
|
| 250 |
+
) -> AgentConfigManager:
|
| 251 |
+
"""Get the global agent configuration manager"""
|
| 252 |
+
global _global_config_manager
|
| 253 |
+
if _global_config_manager is None:
|
| 254 |
+
_global_config_manager = AgentConfigManager(model_overrides, template_vars)
|
| 255 |
+
else:
|
| 256 |
+
if model_overrides:
|
| 257 |
+
_global_config_manager.update_models(model_overrides)
|
| 258 |
+
if template_vars:
|
| 259 |
+
_global_config_manager.update_template_vars(template_vars)
|
| 260 |
+
return _global_config_manager
|
ankigen/agents/generators.py
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Specialized generator agents for card generation
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from typing import List, Dict, Any, Optional, Tuple
|
| 5 |
+
|
| 6 |
+
from openai import AsyncOpenAI
|
| 7 |
+
|
| 8 |
+
from ankigen.logging import logger
|
| 9 |
+
from ankigen.models import Card, CardFront, CardBack
|
| 10 |
+
from .base import BaseAgentWrapper, AgentConfig
|
| 11 |
+
from .config import get_config_manager
|
| 12 |
+
from .schemas import CardsGenerationSchema
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def card_dict_to_card(
|
| 16 |
+
card_data: Dict[str, Any],
|
| 17 |
+
default_topic: str,
|
| 18 |
+
default_subject: str,
|
| 19 |
+
) -> Card:
|
| 20 |
+
"""Convert a dictionary representation of a card into a Card object."""
|
| 21 |
+
|
| 22 |
+
if not isinstance(card_data, dict):
|
| 23 |
+
raise ValueError("Card payload must be a dictionary")
|
| 24 |
+
|
| 25 |
+
front_data = card_data.get("front")
|
| 26 |
+
back_data = card_data.get("back")
|
| 27 |
+
|
| 28 |
+
if not isinstance(front_data, dict) or "question" not in front_data:
|
| 29 |
+
raise ValueError("Card front must include a question field")
|
| 30 |
+
if not isinstance(back_data, dict) or "answer" not in back_data:
|
| 31 |
+
raise ValueError("Card back must include an answer field")
|
| 32 |
+
|
| 33 |
+
metadata = card_data.get("metadata", {}) or {}
|
| 34 |
+
if not isinstance(metadata, dict):
|
| 35 |
+
metadata = {}
|
| 36 |
+
|
| 37 |
+
subject = metadata.get("subject") or default_subject or "general"
|
| 38 |
+
topic = metadata.get("topic") or default_topic or "General Concepts"
|
| 39 |
+
|
| 40 |
+
card = Card(
|
| 41 |
+
card_type=str(card_data.get("card_type", "basic")),
|
| 42 |
+
front=CardFront(question=str(front_data.get("question", ""))),
|
| 43 |
+
back=CardBack(
|
| 44 |
+
answer=str(back_data.get("answer", "")),
|
| 45 |
+
explanation=str(back_data.get("explanation", "")),
|
| 46 |
+
example=str(back_data.get("example", "")),
|
| 47 |
+
),
|
| 48 |
+
metadata=metadata,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
if card.metadata is not None:
|
| 52 |
+
card.metadata.setdefault("subject", subject)
|
| 53 |
+
card.metadata.setdefault("topic", topic)
|
| 54 |
+
|
| 55 |
+
return card
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
class SubjectExpertAgent(BaseAgentWrapper):
|
| 59 |
+
"""Subject matter expert agent for domain-specific card generation"""
|
| 60 |
+
|
| 61 |
+
def __init__(self, openai_client: AsyncOpenAI, subject: str = "general"):
|
| 62 |
+
config_manager = get_config_manager()
|
| 63 |
+
base_config = config_manager.get_agent_config("subject_expert")
|
| 64 |
+
|
| 65 |
+
if not base_config:
|
| 66 |
+
raise ValueError(
|
| 67 |
+
"subject_expert configuration not found - agent system not properly initialized"
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
base_config.output_type = CardsGenerationSchema
|
| 71 |
+
|
| 72 |
+
if subject != "general" and base_config.custom_prompts:
|
| 73 |
+
subject_prompt = base_config.custom_prompts.get(subject.lower(), "")
|
| 74 |
+
if subject_prompt:
|
| 75 |
+
base_config.instructions += (
|
| 76 |
+
f"\n\nSubject-specific guidance: {subject_prompt}"
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
super().__init__(base_config, openai_client)
|
| 80 |
+
self.subject = subject
|
| 81 |
+
|
| 82 |
+
def _build_batch_prompt(
|
| 83 |
+
self,
|
| 84 |
+
topic: str,
|
| 85 |
+
cards_in_batch: int,
|
| 86 |
+
batch_num: int,
|
| 87 |
+
context: Optional[Dict[str, Any]],
|
| 88 |
+
previous_topics: List[str],
|
| 89 |
+
) -> str:
|
| 90 |
+
"""Build user input prompt for a batch of cards."""
|
| 91 |
+
user_input = f"Generate {cards_in_batch} flashcards for the topic: {topic}"
|
| 92 |
+
|
| 93 |
+
if context and context.get("generate_cloze"):
|
| 94 |
+
user_input += (
|
| 95 |
+
"\n\nIMPORTANT: Generate a mix of card types including cloze cards. "
|
| 96 |
+
"For code examples, syntax, and fill-in-the-blank concepts, use cloze cards "
|
| 97 |
+
"(card_type='cloze'). Aim for roughly 50% cloze cards when dealing with technical/programming content."
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
if context:
|
| 101 |
+
learning_preferences = context.get("learning_preferences")
|
| 102 |
+
if learning_preferences:
|
| 103 |
+
user_input += f"\n\nLearning focus: {learning_preferences}"
|
| 104 |
+
user_input += f"\n\nAdditional context: {context}"
|
| 105 |
+
|
| 106 |
+
if previous_topics:
|
| 107 |
+
topics_summary = ", ".join(previous_topics[-20:])
|
| 108 |
+
user_input += f"\n\nAvoid creating cards about these already covered topics: {topics_summary}"
|
| 109 |
+
|
| 110 |
+
if batch_num > 1:
|
| 111 |
+
user_input += f"\n\nThis is batch {batch_num} of cards. Ensure these cards cover different aspects of the topic."
|
| 112 |
+
|
| 113 |
+
return user_input
|
| 114 |
+
|
| 115 |
+
def _extract_topics_for_dedup(self, batch_cards: List[Card]) -> List[str]:
|
| 116 |
+
"""Extract key terms from card questions for deduplication."""
|
| 117 |
+
topics = []
|
| 118 |
+
for card in batch_cards:
|
| 119 |
+
if hasattr(card, "front") and card.front and card.front.question:
|
| 120 |
+
question_words = card.front.question.lower().split()
|
| 121 |
+
key_terms = [word for word in question_words if len(word) > 3][:3]
|
| 122 |
+
if key_terms:
|
| 123 |
+
topics.append(" ".join(key_terms))
|
| 124 |
+
return topics
|
| 125 |
+
|
| 126 |
+
def _accumulate_usage(
|
| 127 |
+
self, total_usage: Dict[str, int], batch_usage: Optional[Dict[str, Any]]
|
| 128 |
+
) -> None:
|
| 129 |
+
"""Accumulate batch usage into total usage."""
|
| 130 |
+
if batch_usage:
|
| 131 |
+
for key in total_usage:
|
| 132 |
+
total_usage[key] += batch_usage.get(key, 0)
|
| 133 |
+
|
| 134 |
+
async def generate_cards(
|
| 135 |
+
self, topic: str, num_cards: int = 5, context: Optional[Dict[str, Any]] = None
|
| 136 |
+
) -> List[Card]:
|
| 137 |
+
"""Generate flashcards for a given topic with automatic batching."""
|
| 138 |
+
batch_size = 10
|
| 139 |
+
all_cards: List[Card] = []
|
| 140 |
+
total_usage: Dict[str, int] = {
|
| 141 |
+
"total_tokens": 0,
|
| 142 |
+
"input_tokens": 0,
|
| 143 |
+
"output_tokens": 0,
|
| 144 |
+
}
|
| 145 |
+
previous_topics: List[str] = []
|
| 146 |
+
|
| 147 |
+
cards_remaining = num_cards
|
| 148 |
+
batch_num = 1
|
| 149 |
+
num_batches = ((num_cards - 1) // batch_size) + 1
|
| 150 |
+
|
| 151 |
+
logger.info(
|
| 152 |
+
f"Generating {num_cards} cards for '{topic}' using {num_batches} batches"
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
try:
|
| 156 |
+
while cards_remaining > 0:
|
| 157 |
+
cards_in_batch = min(batch_size, cards_remaining)
|
| 158 |
+
logger.info(f"Generating batch {batch_num}: {cards_in_batch} cards")
|
| 159 |
+
|
| 160 |
+
if not self.agent:
|
| 161 |
+
await self.initialize()
|
| 162 |
+
|
| 163 |
+
user_input = self._build_batch_prompt(
|
| 164 |
+
topic, cards_in_batch, batch_num, context, previous_topics
|
| 165 |
+
)
|
| 166 |
+
response, usage = await self.execute(user_input, context)
|
| 167 |
+
|
| 168 |
+
self._accumulate_usage(total_usage, usage)
|
| 169 |
+
batch_cards = self._parse_cards_response(response, topic)
|
| 170 |
+
all_cards.extend(batch_cards)
|
| 171 |
+
|
| 172 |
+
previous_topics.extend(self._extract_topics_for_dedup(batch_cards))
|
| 173 |
+
cards_remaining -= len(batch_cards)
|
| 174 |
+
|
| 175 |
+
logger.info(
|
| 176 |
+
f"Batch {batch_num} generated {len(batch_cards)} cards. {cards_remaining} remaining."
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
if len(batch_cards) == 0:
|
| 180 |
+
logger.warning(f"No cards generated in batch {batch_num}, stopping")
|
| 181 |
+
break
|
| 182 |
+
|
| 183 |
+
batch_num += 1
|
| 184 |
+
|
| 185 |
+
if total_usage.get("total_tokens", 0) > 0:
|
| 186 |
+
logger.info(
|
| 187 |
+
f"Total usage: {total_usage['total_tokens']} tokens "
|
| 188 |
+
f"(Input: {total_usage['input_tokens']}, Output: {total_usage['output_tokens']})"
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
logger.info(
|
| 192 |
+
f"Generated {len(all_cards)} cards across {batch_num} batches for '{topic}'"
|
| 193 |
+
)
|
| 194 |
+
return all_cards
|
| 195 |
+
|
| 196 |
+
except Exception as e:
|
| 197 |
+
logger.error(f"Card generation failed: {e}")
|
| 198 |
+
raise
|
| 199 |
+
|
| 200 |
+
def _build_generation_prompt(
|
| 201 |
+
self,
|
| 202 |
+
topic: str,
|
| 203 |
+
num_cards: int,
|
| 204 |
+
difficulty: str,
|
| 205 |
+
prerequisites: List[str],
|
| 206 |
+
context: Dict[str, Any],
|
| 207 |
+
) -> str:
|
| 208 |
+
"""Build the generation prompt"""
|
| 209 |
+
prerequisites_str = ", ".join(prerequisites) if prerequisites else "None"
|
| 210 |
+
|
| 211 |
+
prompt = f"""Generate {num_cards} high-quality flashcards for the topic: {topic}
|
| 212 |
+
|
| 213 |
+
Subject: {self.subject}
|
| 214 |
+
Difficulty Level: {difficulty}
|
| 215 |
+
Prerequisites: {prerequisites_str}
|
| 216 |
+
|
| 217 |
+
Requirements:
|
| 218 |
+
- Focus on {self.subject} concepts and terminology
|
| 219 |
+
- Ensure technical accuracy and depth appropriate for {difficulty} level
|
| 220 |
+
- Include practical applications and real-world examples
|
| 221 |
+
- Test understanding, not just memorization
|
| 222 |
+
- Use clear, unambiguous questions
|
| 223 |
+
|
| 224 |
+
Return your response as a JSON object with this structure:
|
| 225 |
+
{{
|
| 226 |
+
"cards": [
|
| 227 |
+
{{
|
| 228 |
+
"card_type": "basic",
|
| 229 |
+
"front": {{
|
| 230 |
+
"question": "Clear, specific question"
|
| 231 |
+
}},
|
| 232 |
+
"back": {{
|
| 233 |
+
"answer": "Concise, accurate answer",
|
| 234 |
+
"explanation": "Detailed explanation with reasoning",
|
| 235 |
+
"example": "Practical example or application"
|
| 236 |
+
}},
|
| 237 |
+
"metadata": {{
|
| 238 |
+
"difficulty": "{difficulty}",
|
| 239 |
+
"prerequisites": {json.dumps(prerequisites)},
|
| 240 |
+
"topic": "{topic}",
|
| 241 |
+
"subject": "{self.subject}",
|
| 242 |
+
"learning_outcomes": ["outcome1", "outcome2"],
|
| 243 |
+
"common_misconceptions": ["misconception1"]
|
| 244 |
+
}}
|
| 245 |
+
}}
|
| 246 |
+
]
|
| 247 |
+
}}"""
|
| 248 |
+
|
| 249 |
+
if context.get("source_text"):
|
| 250 |
+
prompt += f"\n\nBase the cards on this source material:\n{context['source_text'][:2000]}..."
|
| 251 |
+
|
| 252 |
+
return prompt
|
| 253 |
+
|
| 254 |
+
def _parse_cards_response(self, response: Any, topic: str) -> List[Card]:
|
| 255 |
+
"""Parse the agent response into Card objects"""
|
| 256 |
+
try:
|
| 257 |
+
# Handle structured output from CardsGenerationSchema
|
| 258 |
+
if hasattr(response, "cards"):
|
| 259 |
+
# Response is already a CardsGenerationSchema object
|
| 260 |
+
logger.info(f"✅ STRUCTURED OUTPUT RECEIVED: {type(response)}")
|
| 261 |
+
card_data_list = response.cards
|
| 262 |
+
elif isinstance(response, dict) and "cards" in response:
|
| 263 |
+
# Response is a dict with cards
|
| 264 |
+
card_data_list = response["cards"]
|
| 265 |
+
elif isinstance(response, str):
|
| 266 |
+
# Fallback: Clean up the response - remove markdown code blocks if present
|
| 267 |
+
response = response.strip()
|
| 268 |
+
if response.startswith("```json"):
|
| 269 |
+
response = response[7:] # Remove ```json
|
| 270 |
+
if response.startswith("```"):
|
| 271 |
+
response = response[3:] # Remove ```
|
| 272 |
+
if response.endswith("```"):
|
| 273 |
+
response = response[:-3] # Remove trailing ```
|
| 274 |
+
response = response.strip()
|
| 275 |
+
|
| 276 |
+
data = json.loads(response)
|
| 277 |
+
if "cards" not in data:
|
| 278 |
+
raise ValueError("Response missing 'cards' field")
|
| 279 |
+
card_data_list = data["cards"]
|
| 280 |
+
else:
|
| 281 |
+
raise ValueError(f"Unexpected response format: {type(response)}")
|
| 282 |
+
|
| 283 |
+
cards = []
|
| 284 |
+
for i, card_data in enumerate(card_data_list):
|
| 285 |
+
try:
|
| 286 |
+
if hasattr(card_data, "dict"):
|
| 287 |
+
payload = card_data.dict()
|
| 288 |
+
elif isinstance(card_data, dict):
|
| 289 |
+
payload = card_data
|
| 290 |
+
else:
|
| 291 |
+
logger.warning(
|
| 292 |
+
f"Skipping card {i}: unsupported payload type {type(card_data)}"
|
| 293 |
+
)
|
| 294 |
+
continue
|
| 295 |
+
|
| 296 |
+
card = card_dict_to_card(payload, topic, self.subject)
|
| 297 |
+
cards.append(card)
|
| 298 |
+
|
| 299 |
+
except Exception as e:
|
| 300 |
+
logger.warning(f"Failed to parse card {i}: {e}")
|
| 301 |
+
continue
|
| 302 |
+
|
| 303 |
+
logger.info(f"✅ PARSED {len(cards)} CARDS FROM STRUCTURED OUTPUT")
|
| 304 |
+
return cards
|
| 305 |
+
|
| 306 |
+
except json.JSONDecodeError as e:
|
| 307 |
+
logger.error(f"💥 JSON DECODE ERROR: {e}")
|
| 308 |
+
logger.error("💥 RAW RESPONSE THAT FAILED TO PARSE:")
|
| 309 |
+
logger.error("---FAILED RESPONSE START---")
|
| 310 |
+
logger.error(f"{response}")
|
| 311 |
+
logger.error("---FAILED RESPONSE END---")
|
| 312 |
+
logger.error(f"💥 RESPONSE TYPE: {type(response)}")
|
| 313 |
+
if isinstance(response, str):
|
| 314 |
+
logger.error(f"💥 RESPONSE LENGTH: {len(response)}")
|
| 315 |
+
logger.error(f"💥 FIRST 200 CHARS: {repr(response[:200])}")
|
| 316 |
+
logger.error(f"💥 LAST 200 CHARS: {repr(response[-200:])}")
|
| 317 |
+
raise ValueError(f"Invalid JSON response from agent: {e}")
|
| 318 |
+
except Exception as e:
|
| 319 |
+
logger.error(f"💥 GENERAL PARSING ERROR: {e}")
|
| 320 |
+
logger.error(f"💥 RESPONSE THAT CAUSED ERROR: {response}")
|
| 321 |
+
raise
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
class QualityReviewAgent(BaseAgentWrapper):
|
| 325 |
+
"""Single-pass quality review agent for lightweight validation and fixes."""
|
| 326 |
+
|
| 327 |
+
def __init__(self, openai_client: AsyncOpenAI, model: str):
|
| 328 |
+
config = AgentConfig(
|
| 329 |
+
name="quality_reviewer",
|
| 330 |
+
instructions=(
|
| 331 |
+
"You are a meticulous flashcard reviewer. Review each card for factual accuracy, clarity,"
|
| 332 |
+
" atomic scope, and answer quality. When needed, revise the card while keeping it concise and"
|
| 333 |
+
" faithful to the original intent. Always respond with a JSON object containing:"
|
| 334 |
+
' {"approved": bool, "reason": string, "revised_card": object or null}.'
|
| 335 |
+
" The revised card must follow the input schema with fields card_type, front.question,"
|
| 336 |
+
" back.answer/explanation/example, and metadata."
|
| 337 |
+
),
|
| 338 |
+
model=model,
|
| 339 |
+
temperature=0.2,
|
| 340 |
+
timeout=45.0,
|
| 341 |
+
retry_attempts=2,
|
| 342 |
+
enable_tracing=False,
|
| 343 |
+
)
|
| 344 |
+
super().__init__(config, openai_client)
|
| 345 |
+
|
| 346 |
+
async def review_card(self, card: Card) -> Tuple[Optional[Card], bool, str]:
|
| 347 |
+
"""Review a card and optionally return a revised version."""
|
| 348 |
+
|
| 349 |
+
card_payload = {
|
| 350 |
+
"card_type": card.card_type,
|
| 351 |
+
"front": {"question": card.front.question if card.front else ""},
|
| 352 |
+
"back": {
|
| 353 |
+
"answer": card.back.answer if card.back else "",
|
| 354 |
+
"explanation": card.back.explanation if card.back else "",
|
| 355 |
+
"example": card.back.example if card.back else "",
|
| 356 |
+
},
|
| 357 |
+
"metadata": card.metadata or {},
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
user_input = (
|
| 361 |
+
"Review the following flashcard. Approve it if it is accurate, clear, and atomic."
|
| 362 |
+
" If improvements are needed, provide a revised_card with the corrections applied.\n\n"
|
| 363 |
+
"Flashcard JSON:\n"
|
| 364 |
+
f"{json.dumps(card_payload, ensure_ascii=False)}\n\n"
|
| 365 |
+
"Respond with JSON matching this schema:\n"
|
| 366 |
+
'{\n "approved": true | false,\n "reason": "short explanation",\n'
|
| 367 |
+
' "revised_card": { ... } | null\n}'
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
try:
|
| 371 |
+
response, _ = await self.execute(user_input)
|
| 372 |
+
except Exception as e:
|
| 373 |
+
logger.error(f"Quality review failed to execute: {e}")
|
| 374 |
+
return card, True, "Review failed; keeping original card"
|
| 375 |
+
|
| 376 |
+
try:
|
| 377 |
+
parsed = json.loads(response) if isinstance(response, str) else response
|
| 378 |
+
except Exception as e:
|
| 379 |
+
logger.warning(f"Failed to parse review response as JSON: {e}")
|
| 380 |
+
return card, True, "Reviewer returned invalid JSON; keeping original"
|
| 381 |
+
|
| 382 |
+
approved = bool(parsed.get("approved", True))
|
| 383 |
+
reason = str(parsed.get("reason", ""))
|
| 384 |
+
revised_payload = parsed.get("revised_card")
|
| 385 |
+
|
| 386 |
+
revised_card: Optional[Card] = None
|
| 387 |
+
if isinstance(revised_payload, dict):
|
| 388 |
+
try:
|
| 389 |
+
metadata = revised_payload.get("metadata", {}) or {}
|
| 390 |
+
revised_subject = metadata.get("subject") or (card.metadata or {}).get(
|
| 391 |
+
"subject",
|
| 392 |
+
"general",
|
| 393 |
+
)
|
| 394 |
+
revised_topic = metadata.get("topic") or (card.metadata or {}).get(
|
| 395 |
+
"topic",
|
| 396 |
+
"General Concepts",
|
| 397 |
+
)
|
| 398 |
+
revised_card = card_dict_to_card(
|
| 399 |
+
revised_payload, revised_topic, revised_subject
|
| 400 |
+
)
|
| 401 |
+
except Exception as e:
|
| 402 |
+
logger.warning(f"Failed to build revised card from review payload: {e}")
|
| 403 |
+
revised_card = None
|
| 404 |
+
|
| 405 |
+
return revised_card or card, approved, reason or ""
|
ankigen/agents/integration.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Main integration module for AnkiGen agent system
|
| 2 |
+
|
| 3 |
+
from typing import List, Dict, Any, Tuple, Optional
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
from ankigen.logging import logger
|
| 8 |
+
from ankigen.models import Card
|
| 9 |
+
from ankigen.llm_interface import OpenAIClientManager
|
| 10 |
+
from ankigen.context7 import Context7Client
|
| 11 |
+
|
| 12 |
+
from .generators import SubjectExpertAgent
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class AgentOrchestrator:
|
| 16 |
+
"""Main orchestrator for the AnkiGen agent system"""
|
| 17 |
+
|
| 18 |
+
def __init__(self, client_manager: OpenAIClientManager):
|
| 19 |
+
self.client_manager = client_manager
|
| 20 |
+
self.openai_client = None
|
| 21 |
+
|
| 22 |
+
self.subject_expert = None
|
| 23 |
+
|
| 24 |
+
async def initialize(
|
| 25 |
+
self,
|
| 26 |
+
api_key: str,
|
| 27 |
+
model_overrides: Dict[str, str] = None,
|
| 28 |
+
reasoning_overrides: Dict[str, Optional[str]] = None,
|
| 29 |
+
):
|
| 30 |
+
"""Initialize the agent system"""
|
| 31 |
+
try:
|
| 32 |
+
# Initialize OpenAI client
|
| 33 |
+
await self.client_manager.initialize_client(api_key)
|
| 34 |
+
self.openai_client = self.client_manager.get_client()
|
| 35 |
+
|
| 36 |
+
# Set up model overrides if provided
|
| 37 |
+
config_manager = None
|
| 38 |
+
if model_overrides:
|
| 39 |
+
from ankigen.agents.config import get_config_manager
|
| 40 |
+
|
| 41 |
+
config_manager = get_config_manager()
|
| 42 |
+
config_manager.update_models(model_overrides)
|
| 43 |
+
logger.info(f"Applied model overrides: {model_overrides}")
|
| 44 |
+
|
| 45 |
+
if reasoning_overrides:
|
| 46 |
+
if config_manager is None:
|
| 47 |
+
from ankigen.agents.config import get_config_manager
|
| 48 |
+
|
| 49 |
+
config_manager = get_config_manager()
|
| 50 |
+
for agent_name, effort in reasoning_overrides.items():
|
| 51 |
+
config_manager.update_agent_config(
|
| 52 |
+
agent_name, reasoning_effort=effort
|
| 53 |
+
)
|
| 54 |
+
logger.info(f"Applied reasoning overrides: {reasoning_overrides}")
|
| 55 |
+
|
| 56 |
+
logger.info("Agent system initialized successfully (simplified pipeline)")
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
logger.error(f"Failed to initialize agent system: {e}")
|
| 60 |
+
raise
|
| 61 |
+
|
| 62 |
+
async def generate_cards_with_agents(
|
| 63 |
+
self,
|
| 64 |
+
topic: str,
|
| 65 |
+
subject: str = "general",
|
| 66 |
+
num_cards: int = 5,
|
| 67 |
+
difficulty: str = "intermediate",
|
| 68 |
+
context: Dict[str, Any] = None,
|
| 69 |
+
library_name: Optional[str] = None,
|
| 70 |
+
library_topic: Optional[str] = None,
|
| 71 |
+
generate_cloze: bool = False,
|
| 72 |
+
topics_list: Optional[List[str]] = None,
|
| 73 |
+
cards_per_topic: int = 8,
|
| 74 |
+
) -> Tuple[List[Card], Dict[str, Any]]:
|
| 75 |
+
"""Generate cards using the agent system.
|
| 76 |
+
|
| 77 |
+
If topics_list is provided, generates cards for each subtopic separately
|
| 78 |
+
to ensure comprehensive coverage. Otherwise falls back to single-topic mode.
|
| 79 |
+
"""
|
| 80 |
+
start_time = datetime.now()
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
if not self.openai_client:
|
| 84 |
+
raise ValueError("Agent system not initialized")
|
| 85 |
+
|
| 86 |
+
# Enhance context with library documentation if requested
|
| 87 |
+
enhanced_context = context or {}
|
| 88 |
+
library_docs = None
|
| 89 |
+
|
| 90 |
+
if library_name:
|
| 91 |
+
library_docs = await self._fetch_library_docs(
|
| 92 |
+
library_name, library_topic, num_cards
|
| 93 |
+
)
|
| 94 |
+
if library_docs:
|
| 95 |
+
enhanced_context["library_documentation"] = library_docs
|
| 96 |
+
enhanced_context["library_name"] = library_name
|
| 97 |
+
|
| 98 |
+
# Generate cards - either per-topic or single-topic mode
|
| 99 |
+
if topics_list and len(topics_list) > 0:
|
| 100 |
+
logger.info(
|
| 101 |
+
f"Starting multi-topic generation: {len(topics_list)} topics, "
|
| 102 |
+
f"{cards_per_topic} cards each for '{topic}'"
|
| 103 |
+
)
|
| 104 |
+
cards = await self._generate_cards_per_topic(
|
| 105 |
+
main_subject=topic,
|
| 106 |
+
subject=subject,
|
| 107 |
+
topics_list=topics_list,
|
| 108 |
+
cards_per_topic=cards_per_topic,
|
| 109 |
+
difficulty=difficulty,
|
| 110 |
+
context=enhanced_context,
|
| 111 |
+
generate_cloze=generate_cloze,
|
| 112 |
+
)
|
| 113 |
+
else:
|
| 114 |
+
# Fallback to single-topic mode
|
| 115 |
+
logger.info(f"Starting single-topic generation: {topic} ({subject})")
|
| 116 |
+
cards = await self._generation_phase(
|
| 117 |
+
topic=topic,
|
| 118 |
+
subject=subject,
|
| 119 |
+
num_cards=num_cards,
|
| 120 |
+
difficulty=difficulty,
|
| 121 |
+
context=enhanced_context,
|
| 122 |
+
generate_cloze=generate_cloze,
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# Collect metadata
|
| 126 |
+
metadata = {
|
| 127 |
+
"generation_method": "agent_system",
|
| 128 |
+
"generation_time": (datetime.now() - start_time).total_seconds(),
|
| 129 |
+
"cards_generated": len(cards),
|
| 130 |
+
"topic": topic,
|
| 131 |
+
"subject": subject,
|
| 132 |
+
"difficulty": difficulty,
|
| 133 |
+
"library_name": library_name if library_name else None,
|
| 134 |
+
"library_docs_used": bool(library_docs),
|
| 135 |
+
"topics_list": topics_list,
|
| 136 |
+
"multi_topic_mode": topics_list is not None and len(topics_list) > 0,
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
logger.info(
|
| 140 |
+
f"Agent-based generation complete: {len(cards)} cards generated"
|
| 141 |
+
)
|
| 142 |
+
return cards, metadata
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
logger.error(f"Agent-based generation failed: {e}")
|
| 146 |
+
raise
|
| 147 |
+
|
| 148 |
+
async def _fetch_library_docs(
|
| 149 |
+
self, library_name: str, library_topic: Optional[str], num_cards: int
|
| 150 |
+
) -> Optional[str]:
|
| 151 |
+
"""Fetch library documentation from Context7."""
|
| 152 |
+
logger.info(f"Fetching library documentation for: {library_name}")
|
| 153 |
+
try:
|
| 154 |
+
context7_client = Context7Client()
|
| 155 |
+
|
| 156 |
+
# Dynamic token allocation based on card generation needs
|
| 157 |
+
base_tokens = 8000
|
| 158 |
+
if num_cards > 40:
|
| 159 |
+
token_limit = 12000
|
| 160 |
+
elif num_cards > 20:
|
| 161 |
+
token_limit = 10000
|
| 162 |
+
else:
|
| 163 |
+
token_limit = base_tokens
|
| 164 |
+
|
| 165 |
+
if library_topic:
|
| 166 |
+
token_limit = int(token_limit * 0.8)
|
| 167 |
+
|
| 168 |
+
logger.info(
|
| 169 |
+
f"Fetching {token_limit} tokens of documentation"
|
| 170 |
+
+ (f" for topic: {library_topic}" if library_topic else "")
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
library_docs = await context7_client.fetch_library_documentation(
|
| 174 |
+
library_name, topic=library_topic, tokens=token_limit
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
if library_docs:
|
| 178 |
+
logger.info(
|
| 179 |
+
f"Added {len(library_docs)} chars of {library_name} documentation to context"
|
| 180 |
+
)
|
| 181 |
+
return library_docs
|
| 182 |
+
else:
|
| 183 |
+
logger.warning(
|
| 184 |
+
f"Could not fetch documentation for library: {library_name}"
|
| 185 |
+
)
|
| 186 |
+
return None
|
| 187 |
+
except Exception as e:
|
| 188 |
+
logger.error(f"Error fetching library documentation: {e}")
|
| 189 |
+
return None
|
| 190 |
+
|
| 191 |
+
async def _generate_cards_per_topic(
|
| 192 |
+
self,
|
| 193 |
+
main_subject: str,
|
| 194 |
+
subject: str,
|
| 195 |
+
topics_list: List[str],
|
| 196 |
+
cards_per_topic: int,
|
| 197 |
+
difficulty: str,
|
| 198 |
+
context: Dict[str, Any],
|
| 199 |
+
generate_cloze: bool,
|
| 200 |
+
) -> List[Card]:
|
| 201 |
+
"""Generate cards for each topic in the topics_list."""
|
| 202 |
+
all_cards: List[Card] = []
|
| 203 |
+
total_topics = len(topics_list)
|
| 204 |
+
|
| 205 |
+
for i, subtopic in enumerate(topics_list):
|
| 206 |
+
topic_num = i + 1
|
| 207 |
+
logger.info(
|
| 208 |
+
f"Generating topic {topic_num}/{total_topics}: {subtopic} "
|
| 209 |
+
f"({cards_per_topic} cards)"
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# Add topic context
|
| 213 |
+
topic_context = {
|
| 214 |
+
**context,
|
| 215 |
+
"main_subject": main_subject,
|
| 216 |
+
"topic_index": topic_num,
|
| 217 |
+
"total_topics": total_topics,
|
| 218 |
+
"current_subtopic": subtopic,
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
cards = await self._generation_phase(
|
| 222 |
+
topic=subtopic,
|
| 223 |
+
subject=subject,
|
| 224 |
+
num_cards=cards_per_topic,
|
| 225 |
+
difficulty=difficulty,
|
| 226 |
+
context=topic_context,
|
| 227 |
+
generate_cloze=generate_cloze,
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
all_cards.extend(cards)
|
| 231 |
+
logger.info(
|
| 232 |
+
f"Topic {topic_num}/{total_topics} complete: {len(cards)} cards. "
|
| 233 |
+
f"Total: {len(all_cards)}"
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
return all_cards
|
| 237 |
+
|
| 238 |
+
async def _generation_phase(
|
| 239 |
+
self,
|
| 240 |
+
topic: str,
|
| 241 |
+
subject: str,
|
| 242 |
+
num_cards: int,
|
| 243 |
+
difficulty: str,
|
| 244 |
+
context: Dict[str, Any] = None,
|
| 245 |
+
generate_cloze: bool = False,
|
| 246 |
+
) -> List[Card]:
|
| 247 |
+
"""Execute the card generation phase"""
|
| 248 |
+
|
| 249 |
+
if not self.subject_expert or self.subject_expert.subject != subject:
|
| 250 |
+
self.subject_expert = SubjectExpertAgent(self.openai_client, subject)
|
| 251 |
+
|
| 252 |
+
# Add difficulty and cloze preference to context
|
| 253 |
+
if context is None:
|
| 254 |
+
context = {}
|
| 255 |
+
context["difficulty"] = difficulty
|
| 256 |
+
context["generate_cloze"] = generate_cloze
|
| 257 |
+
|
| 258 |
+
cards = await self.subject_expert.generate_cards(
|
| 259 |
+
topic=topic, num_cards=num_cards, context=context
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
logger.info(f"Generation phase complete: {len(cards)} cards generated")
|
| 263 |
+
return cards
|
| 264 |
+
|
| 265 |
+
def get_performance_metrics(self) -> Dict[str, Any]:
|
| 266 |
+
"""Get performance metrics for the agent system"""
|
| 267 |
+
|
| 268 |
+
# Basic performance info only
|
| 269 |
+
return {
|
| 270 |
+
"agents_enabled": True,
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
async def integrate_with_existing_workflow(
|
| 275 |
+
client_manager: OpenAIClientManager, api_key: str, **generation_params
|
| 276 |
+
) -> Tuple[List[Card], Dict[str, Any]]:
|
| 277 |
+
"""Integration point for existing AnkiGen workflow"""
|
| 278 |
+
|
| 279 |
+
# Agents are always enabled
|
| 280 |
+
|
| 281 |
+
# Initialize and use agent system
|
| 282 |
+
orchestrator = AgentOrchestrator(client_manager)
|
| 283 |
+
await orchestrator.initialize(api_key)
|
| 284 |
+
|
| 285 |
+
cards, metadata = await orchestrator.generate_cards_with_agents(**generation_params)
|
| 286 |
+
|
| 287 |
+
return cards, metadata
|
ankigen/agents/performance.py
ADDED
|
@@ -0,0 +1,475 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Performance optimizations for agent system
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import time
|
| 5 |
+
import hashlib
|
| 6 |
+
from typing import Dict, Any, List, Optional, Callable, TypeVar, Generic
|
| 7 |
+
from dataclasses import dataclass, field
|
| 8 |
+
from functools import wraps, lru_cache
|
| 9 |
+
import json
|
| 10 |
+
|
| 11 |
+
from ankigen.logging import logger
|
| 12 |
+
from ankigen.models import Card
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
T = TypeVar("T")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class CacheConfig:
|
| 20 |
+
"""Configuration for agent response caching"""
|
| 21 |
+
|
| 22 |
+
enable_caching: bool = True
|
| 23 |
+
cache_ttl: int = 3600 # seconds
|
| 24 |
+
max_cache_size: int = 1000
|
| 25 |
+
cache_backend: str = "memory" # "memory" or "file"
|
| 26 |
+
cache_directory: Optional[str] = None
|
| 27 |
+
|
| 28 |
+
def __post_init__(self):
|
| 29 |
+
if self.cache_backend == "file" and not self.cache_directory:
|
| 30 |
+
self.cache_directory = "cache/agents"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@dataclass
|
| 34 |
+
class PerformanceConfig:
|
| 35 |
+
"""Configuration for performance optimizations"""
|
| 36 |
+
|
| 37 |
+
enable_batch_processing: bool = True
|
| 38 |
+
max_batch_size: int = 10
|
| 39 |
+
batch_timeout: float = 2.0 # seconds
|
| 40 |
+
enable_parallel_execution: bool = True
|
| 41 |
+
max_concurrent_requests: int = 5
|
| 42 |
+
enable_request_deduplication: bool = True
|
| 43 |
+
enable_response_caching: bool = True
|
| 44 |
+
cache_config: CacheConfig = field(default_factory=CacheConfig)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@dataclass
|
| 48 |
+
class CacheEntry(Generic[T]):
|
| 49 |
+
"""Cache entry with metadata"""
|
| 50 |
+
|
| 51 |
+
value: T
|
| 52 |
+
created_at: float
|
| 53 |
+
access_count: int = 0
|
| 54 |
+
last_accessed: float = field(default_factory=time.time)
|
| 55 |
+
cache_key: str = ""
|
| 56 |
+
|
| 57 |
+
def is_expired(self, ttl: int) -> bool:
|
| 58 |
+
"""Check if cache entry is expired"""
|
| 59 |
+
return time.time() - self.created_at > ttl
|
| 60 |
+
|
| 61 |
+
def touch(self):
|
| 62 |
+
"""Update access metadata"""
|
| 63 |
+
self.access_count += 1
|
| 64 |
+
self.last_accessed = time.time()
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class MemoryCache(Generic[T]):
|
| 68 |
+
"""In-memory cache with LRU eviction"""
|
| 69 |
+
|
| 70 |
+
def __init__(self, config: CacheConfig):
|
| 71 |
+
self.config = config
|
| 72 |
+
self._cache: Dict[str, CacheEntry[T]] = {}
|
| 73 |
+
self._access_order: List[str] = []
|
| 74 |
+
self._lock = asyncio.Lock()
|
| 75 |
+
|
| 76 |
+
async def get(self, key: str) -> Optional[T]:
|
| 77 |
+
"""Get value from cache"""
|
| 78 |
+
async with self._lock:
|
| 79 |
+
entry = self._cache.get(key)
|
| 80 |
+
if not entry:
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
if entry.is_expired(self.config.cache_ttl):
|
| 84 |
+
await self._remove(key)
|
| 85 |
+
return None
|
| 86 |
+
|
| 87 |
+
entry.touch()
|
| 88 |
+
self._update_access_order(key)
|
| 89 |
+
|
| 90 |
+
logger.debug(f"Cache hit for key: {key[:20]}...")
|
| 91 |
+
return entry.value
|
| 92 |
+
|
| 93 |
+
async def set(self, key: str, value: T) -> None:
|
| 94 |
+
"""Set value in cache"""
|
| 95 |
+
async with self._lock:
|
| 96 |
+
# Check if we need to evict entries
|
| 97 |
+
if len(self._cache) >= self.config.max_cache_size:
|
| 98 |
+
await self._evict_lru()
|
| 99 |
+
|
| 100 |
+
entry = CacheEntry(value=value, created_at=time.time(), cache_key=key)
|
| 101 |
+
|
| 102 |
+
self._cache[key] = entry
|
| 103 |
+
self._update_access_order(key)
|
| 104 |
+
|
| 105 |
+
logger.debug(f"Cache set for key: {key[:20]}...")
|
| 106 |
+
|
| 107 |
+
async def remove(self, key: str) -> bool:
|
| 108 |
+
"""Remove entry from cache"""
|
| 109 |
+
async with self._lock:
|
| 110 |
+
return await self._remove(key)
|
| 111 |
+
|
| 112 |
+
async def clear(self) -> None:
|
| 113 |
+
"""Clear all cache entries"""
|
| 114 |
+
async with self._lock:
|
| 115 |
+
self._cache.clear()
|
| 116 |
+
self._access_order.clear()
|
| 117 |
+
logger.info("Cache cleared")
|
| 118 |
+
|
| 119 |
+
async def _remove(self, key: str) -> bool:
|
| 120 |
+
"""Internal remove method"""
|
| 121 |
+
if key in self._cache:
|
| 122 |
+
del self._cache[key]
|
| 123 |
+
if key in self._access_order:
|
| 124 |
+
self._access_order.remove(key)
|
| 125 |
+
return True
|
| 126 |
+
return False
|
| 127 |
+
|
| 128 |
+
async def _evict_lru(self) -> None:
|
| 129 |
+
"""Evict least recently used entries"""
|
| 130 |
+
if not self._access_order:
|
| 131 |
+
return
|
| 132 |
+
|
| 133 |
+
# Remove oldest entries
|
| 134 |
+
to_remove = self._access_order[: len(self._access_order) // 4] # Remove 25%
|
| 135 |
+
for key in to_remove:
|
| 136 |
+
await self._remove(key)
|
| 137 |
+
|
| 138 |
+
logger.debug(f"Evicted {len(to_remove)} cache entries")
|
| 139 |
+
|
| 140 |
+
def _update_access_order(self, key: str) -> None:
|
| 141 |
+
"""Update access order for LRU tracking"""
|
| 142 |
+
if key in self._access_order:
|
| 143 |
+
self._access_order.remove(key)
|
| 144 |
+
self._access_order.append(key)
|
| 145 |
+
|
| 146 |
+
def get_stats(self) -> Dict[str, Any]:
|
| 147 |
+
"""Get cache statistics"""
|
| 148 |
+
total_accesses = sum(entry.access_count for entry in self._cache.values())
|
| 149 |
+
return {
|
| 150 |
+
"entries": len(self._cache),
|
| 151 |
+
"max_size": self.config.max_cache_size,
|
| 152 |
+
"total_accesses": total_accesses,
|
| 153 |
+
"hit_rate": total_accesses / max(1, len(self._cache)),
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
class BatchProcessor:
|
| 158 |
+
"""Batch processor for agent requests"""
|
| 159 |
+
|
| 160 |
+
def __init__(self, config: PerformanceConfig):
|
| 161 |
+
self.config = config
|
| 162 |
+
self._batches: Dict[str, List[Dict[str, Any]]] = {}
|
| 163 |
+
self._batch_timers: Dict[str, asyncio.Task] = {}
|
| 164 |
+
self._lock = asyncio.Lock()
|
| 165 |
+
|
| 166 |
+
async def add_request(
|
| 167 |
+
self, batch_key: str, request_data: Dict[str, Any], processor_func: Callable
|
| 168 |
+
) -> Any:
|
| 169 |
+
"""Add request to batch for processing"""
|
| 170 |
+
|
| 171 |
+
if not self.config.enable_batch_processing:
|
| 172 |
+
# Process immediately if batching is disabled
|
| 173 |
+
return await processor_func([request_data])
|
| 174 |
+
|
| 175 |
+
async with self._lock:
|
| 176 |
+
# Initialize batch if needed
|
| 177 |
+
if batch_key not in self._batches:
|
| 178 |
+
self._batches[batch_key] = []
|
| 179 |
+
self._start_batch_timer(batch_key, processor_func)
|
| 180 |
+
|
| 181 |
+
# Add request to batch
|
| 182 |
+
self._batches[batch_key].append(request_data)
|
| 183 |
+
|
| 184 |
+
# Process immediately if batch is full
|
| 185 |
+
if len(self._batches[batch_key]) >= self.config.max_batch_size:
|
| 186 |
+
return await self._process_batch(batch_key, processor_func)
|
| 187 |
+
|
| 188 |
+
# Wait for timer or batch completion
|
| 189 |
+
return await self._wait_for_batch_result(
|
| 190 |
+
batch_key, request_data, processor_func
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
def _start_batch_timer(self, batch_key: str, processor_func: Callable) -> None:
|
| 194 |
+
"""Start timer for batch processing"""
|
| 195 |
+
|
| 196 |
+
async def timer():
|
| 197 |
+
await asyncio.sleep(self.config.batch_timeout)
|
| 198 |
+
async with self._lock:
|
| 199 |
+
if batch_key in self._batches and self._batches[batch_key]:
|
| 200 |
+
await self._process_batch(batch_key, processor_func)
|
| 201 |
+
|
| 202 |
+
self._batch_timers[batch_key] = asyncio.create_task(timer())
|
| 203 |
+
|
| 204 |
+
async def _process_batch(
|
| 205 |
+
self, batch_key: str, processor_func: Callable
|
| 206 |
+
) -> List[Any]:
|
| 207 |
+
"""Process accumulated batch"""
|
| 208 |
+
if batch_key not in self._batches:
|
| 209 |
+
return []
|
| 210 |
+
|
| 211 |
+
batch = self._batches.pop(batch_key)
|
| 212 |
+
|
| 213 |
+
# Cancel timer
|
| 214 |
+
if batch_key in self._batch_timers:
|
| 215 |
+
self._batch_timers[batch_key].cancel()
|
| 216 |
+
del self._batch_timers[batch_key]
|
| 217 |
+
|
| 218 |
+
if not batch:
|
| 219 |
+
return []
|
| 220 |
+
|
| 221 |
+
logger.debug(f"Processing batch {batch_key} with {len(batch)} requests")
|
| 222 |
+
|
| 223 |
+
try:
|
| 224 |
+
# Process the batch
|
| 225 |
+
results = await processor_func(batch)
|
| 226 |
+
return results if isinstance(results, list) else [results]
|
| 227 |
+
|
| 228 |
+
except Exception as e:
|
| 229 |
+
logger.error(f"Batch processing failed for {batch_key}: {e}")
|
| 230 |
+
raise
|
| 231 |
+
|
| 232 |
+
async def _wait_for_batch_result(
|
| 233 |
+
self, batch_key: str, request_data: Dict[str, Any], processor_func: Callable
|
| 234 |
+
) -> Any:
|
| 235 |
+
"""Wait for batch processing to complete"""
|
| 236 |
+
# This is a simplified implementation
|
| 237 |
+
# In a real implementation, you'd use events/conditions to coordinate
|
| 238 |
+
# between requests in the same batch
|
| 239 |
+
|
| 240 |
+
while batch_key in self._batches:
|
| 241 |
+
await asyncio.sleep(0.1)
|
| 242 |
+
|
| 243 |
+
# For now, process individually as fallback
|
| 244 |
+
return await processor_func([request_data])
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
class RequestDeduplicator:
|
| 248 |
+
"""Deduplicates identical agent requests"""
|
| 249 |
+
|
| 250 |
+
def __init__(self):
|
| 251 |
+
self._pending_requests: Dict[str, asyncio.Future] = {}
|
| 252 |
+
self._lock = asyncio.Lock()
|
| 253 |
+
|
| 254 |
+
@lru_cache(maxsize=1000)
|
| 255 |
+
def _generate_request_hash(self, request_data: str) -> str:
|
| 256 |
+
"""Generate hash for request deduplication"""
|
| 257 |
+
return hashlib.md5(request_data.encode()).hexdigest()
|
| 258 |
+
|
| 259 |
+
async def deduplicate_request(
|
| 260 |
+
self, request_data: Dict[str, Any], processor_func: Callable
|
| 261 |
+
) -> Any:
|
| 262 |
+
"""Deduplicate and process request"""
|
| 263 |
+
|
| 264 |
+
# Generate hash for deduplication
|
| 265 |
+
request_str = json.dumps(request_data, sort_keys=True)
|
| 266 |
+
request_hash = self._generate_request_hash(request_str)
|
| 267 |
+
|
| 268 |
+
async with self._lock:
|
| 269 |
+
# Check if request is already pending
|
| 270 |
+
if request_hash in self._pending_requests:
|
| 271 |
+
logger.debug(f"Deduplicating request: {request_hash[:16]}...")
|
| 272 |
+
return await self._pending_requests[request_hash]
|
| 273 |
+
|
| 274 |
+
# Create future for this request
|
| 275 |
+
future = asyncio.create_task(
|
| 276 |
+
self._process_unique_request(request_hash, request_data, processor_func)
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
+
self._pending_requests[request_hash] = future
|
| 280 |
+
|
| 281 |
+
try:
|
| 282 |
+
result = await future
|
| 283 |
+
return result
|
| 284 |
+
finally:
|
| 285 |
+
# Clean up completed request
|
| 286 |
+
async with self._lock:
|
| 287 |
+
self._pending_requests.pop(request_hash, None)
|
| 288 |
+
|
| 289 |
+
async def _process_unique_request(
|
| 290 |
+
self, request_hash: str, request_data: Dict[str, Any], processor_func: Callable
|
| 291 |
+
) -> Any:
|
| 292 |
+
"""Process unique request"""
|
| 293 |
+
logger.debug(f"Processing unique request: {request_hash[:16]}...")
|
| 294 |
+
return await processor_func(request_data)
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
class PerformanceOptimizer:
|
| 298 |
+
"""Main performance optimization coordinator"""
|
| 299 |
+
|
| 300 |
+
def __init__(self, config: PerformanceConfig):
|
| 301 |
+
self.config = config
|
| 302 |
+
self.cache = (
|
| 303 |
+
MemoryCache(config.cache_config) if config.enable_response_caching else None
|
| 304 |
+
)
|
| 305 |
+
self.batch_processor = (
|
| 306 |
+
BatchProcessor(config) if config.enable_batch_processing else None
|
| 307 |
+
)
|
| 308 |
+
self.deduplicator = (
|
| 309 |
+
RequestDeduplicator() if config.enable_request_deduplication else None
|
| 310 |
+
)
|
| 311 |
+
self._semaphore = asyncio.Semaphore(config.max_concurrent_requests)
|
| 312 |
+
|
| 313 |
+
async def optimize_agent_call(
|
| 314 |
+
self,
|
| 315 |
+
agent_name: str,
|
| 316 |
+
request_data: Dict[str, Any],
|
| 317 |
+
processor_func: Callable,
|
| 318 |
+
cache_key_generator: Optional[Callable[[Dict[str, Any]], str]] = None,
|
| 319 |
+
) -> Any:
|
| 320 |
+
"""Optimize agent call with caching, batching, and deduplication"""
|
| 321 |
+
|
| 322 |
+
# Generate cache key
|
| 323 |
+
cache_key = None
|
| 324 |
+
if self.cache and cache_key_generator:
|
| 325 |
+
cache_key = cache_key_generator(request_data)
|
| 326 |
+
|
| 327 |
+
# Check cache first
|
| 328 |
+
cached_result = await self.cache.get(cache_key)
|
| 329 |
+
if cached_result is not None:
|
| 330 |
+
return cached_result
|
| 331 |
+
|
| 332 |
+
# Apply rate limiting
|
| 333 |
+
async with self._semaphore:
|
| 334 |
+
# Apply deduplication
|
| 335 |
+
if self.deduplicator and self.config.enable_request_deduplication:
|
| 336 |
+
result = await self.deduplicator.deduplicate_request(
|
| 337 |
+
request_data, processor_func
|
| 338 |
+
)
|
| 339 |
+
else:
|
| 340 |
+
result = await processor_func(request_data)
|
| 341 |
+
|
| 342 |
+
# Cache result
|
| 343 |
+
if self.cache and cache_key and result is not None:
|
| 344 |
+
await self.cache.set(cache_key, result)
|
| 345 |
+
|
| 346 |
+
return result
|
| 347 |
+
|
| 348 |
+
async def optimize_batch_processing(
|
| 349 |
+
self, batch_key: str, request_data: Dict[str, Any], processor_func: Callable
|
| 350 |
+
) -> Any:
|
| 351 |
+
"""Optimize using batch processing"""
|
| 352 |
+
if self.batch_processor:
|
| 353 |
+
return await self.batch_processor.add_request(
|
| 354 |
+
batch_key, request_data, processor_func
|
| 355 |
+
)
|
| 356 |
+
else:
|
| 357 |
+
return await processor_func([request_data])
|
| 358 |
+
|
| 359 |
+
def get_performance_stats(self) -> Dict[str, Any]:
|
| 360 |
+
"""Get performance optimization statistics"""
|
| 361 |
+
stats = {
|
| 362 |
+
"config": {
|
| 363 |
+
"batch_processing": self.config.enable_batch_processing,
|
| 364 |
+
"parallel_execution": self.config.enable_parallel_execution,
|
| 365 |
+
"request_deduplication": self.config.enable_request_deduplication,
|
| 366 |
+
"response_caching": self.config.enable_response_caching,
|
| 367 |
+
},
|
| 368 |
+
"concurrency": {
|
| 369 |
+
"max_concurrent": self.config.max_concurrent_requests,
|
| 370 |
+
"current_available": self._semaphore._value,
|
| 371 |
+
},
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
if self.cache:
|
| 375 |
+
stats["cache"] = self.cache.get_stats()
|
| 376 |
+
|
| 377 |
+
return stats
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
# Global performance optimizer
|
| 381 |
+
_global_optimizer: Optional[PerformanceOptimizer] = None
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
def get_performance_optimizer(
|
| 385 |
+
config: Optional[PerformanceConfig] = None,
|
| 386 |
+
) -> PerformanceOptimizer:
|
| 387 |
+
"""Get global performance optimizer instance"""
|
| 388 |
+
global _global_optimizer
|
| 389 |
+
if _global_optimizer is None:
|
| 390 |
+
_global_optimizer = PerformanceOptimizer(config or PerformanceConfig())
|
| 391 |
+
return _global_optimizer
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
# Decorators for performance optimization
|
| 395 |
+
def cache_response(cache_key_func: Callable[[Any], str], ttl: int = 3600):
|
| 396 |
+
"""Decorator to cache function responses"""
|
| 397 |
+
|
| 398 |
+
def decorator(func):
|
| 399 |
+
@wraps(func)
|
| 400 |
+
async def wrapper(*args, **kwargs):
|
| 401 |
+
optimizer = get_performance_optimizer()
|
| 402 |
+
if not optimizer.cache:
|
| 403 |
+
return await func(*args, **kwargs)
|
| 404 |
+
|
| 405 |
+
# Generate cache key
|
| 406 |
+
cache_key = cache_key_func(*args, **kwargs)
|
| 407 |
+
|
| 408 |
+
# Check cache
|
| 409 |
+
cached_result = await optimizer.cache.get(cache_key)
|
| 410 |
+
if cached_result is not None:
|
| 411 |
+
return cached_result
|
| 412 |
+
|
| 413 |
+
# Execute function
|
| 414 |
+
result = await func(*args, **kwargs)
|
| 415 |
+
|
| 416 |
+
# Cache result
|
| 417 |
+
if result is not None:
|
| 418 |
+
await optimizer.cache.set(cache_key, result)
|
| 419 |
+
|
| 420 |
+
return result
|
| 421 |
+
|
| 422 |
+
return wrapper
|
| 423 |
+
|
| 424 |
+
return decorator
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
def rate_limit(max_concurrent: int = 5):
|
| 428 |
+
"""Decorator to apply rate limiting"""
|
| 429 |
+
semaphore = asyncio.Semaphore(max_concurrent)
|
| 430 |
+
|
| 431 |
+
def decorator(func):
|
| 432 |
+
@wraps(func)
|
| 433 |
+
async def wrapper(*args, **kwargs):
|
| 434 |
+
async with semaphore:
|
| 435 |
+
return await func(*args, **kwargs)
|
| 436 |
+
|
| 437 |
+
return wrapper
|
| 438 |
+
|
| 439 |
+
return decorator
|
| 440 |
+
|
| 441 |
+
|
| 442 |
+
# Utility functions for cache key generation
|
| 443 |
+
def generate_card_cache_key(
|
| 444 |
+
topic: str, subject: str, num_cards: int, difficulty: str, **kwargs
|
| 445 |
+
) -> str:
|
| 446 |
+
"""Generate cache key for card generation"""
|
| 447 |
+
key_data = {
|
| 448 |
+
"topic": topic,
|
| 449 |
+
"subject": subject,
|
| 450 |
+
"num_cards": num_cards,
|
| 451 |
+
"difficulty": difficulty,
|
| 452 |
+
"context": kwargs.get("context", {}),
|
| 453 |
+
}
|
| 454 |
+
key_str = json.dumps(key_data, sort_keys=True)
|
| 455 |
+
return f"cards:{hashlib.md5(key_str.encode()).hexdigest()}"
|
| 456 |
+
|
| 457 |
+
|
| 458 |
+
def generate_judgment_cache_key(
|
| 459 |
+
cards: List[Card], judgment_type: str = "general"
|
| 460 |
+
) -> str:
|
| 461 |
+
"""Generate cache key for card judgment"""
|
| 462 |
+
# Use card content to generate stable hash
|
| 463 |
+
card_data = []
|
| 464 |
+
for card in cards:
|
| 465 |
+
card_data.append(
|
| 466 |
+
{
|
| 467 |
+
"question": card.front.question,
|
| 468 |
+
"answer": card.back.answer,
|
| 469 |
+
"type": card.card_type,
|
| 470 |
+
}
|
| 471 |
+
)
|
| 472 |
+
|
| 473 |
+
key_data = {"cards": card_data, "judgment_type": judgment_type}
|
| 474 |
+
key_str = json.dumps(key_data, sort_keys=True)
|
| 475 |
+
return f"judgment:{hashlib.md5(key_str.encode()).hexdigest()}"
|
ankigen/agents/schemas.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pydantic schemas for structured outputs from agents.
|
| 3 |
+
These schemas ensure type safety and eliminate JSON parsing errors.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from typing import List, Dict, Any, Optional
|
| 7 |
+
from pydantic import BaseModel, Field
|
| 8 |
+
from enum import Enum
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class DifficultyLevel(str, Enum):
|
| 12 |
+
"""Difficulty levels for flashcards"""
|
| 13 |
+
|
| 14 |
+
BEGINNER = "beginner"
|
| 15 |
+
INTERMEDIATE = "intermediate"
|
| 16 |
+
ADVANCED = "advanced"
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class CardType(str, Enum):
|
| 20 |
+
"""Types of flashcards"""
|
| 21 |
+
|
| 22 |
+
BASIC = "basic"
|
| 23 |
+
CLOZE = "cloze"
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class CardFrontSchema(BaseModel):
|
| 27 |
+
"""Schema for the front of a flashcard"""
|
| 28 |
+
|
| 29 |
+
question: str = Field(..., description="The question or prompt for the flashcard")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class CardBackSchema(BaseModel):
|
| 33 |
+
"""Schema for the back of a flashcard"""
|
| 34 |
+
|
| 35 |
+
answer: str = Field(..., description="The main answer to the question")
|
| 36 |
+
explanation: str = Field(..., description="Detailed explanation of the answer")
|
| 37 |
+
example: str = Field(..., description="A concrete example illustrating the concept")
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class CardMetadataSchema(BaseModel):
|
| 41 |
+
"""Schema for flashcard metadata"""
|
| 42 |
+
|
| 43 |
+
topic: str = Field(..., description="The main topic of the card")
|
| 44 |
+
subject: str = Field(..., description="The subject area (e.g., Biology, History)")
|
| 45 |
+
difficulty: DifficultyLevel = Field(..., description="The difficulty level")
|
| 46 |
+
tags: Optional[List[str]] = Field(
|
| 47 |
+
None, description="Relevant tags for categorization"
|
| 48 |
+
)
|
| 49 |
+
learning_outcomes: Optional[List[str]] = Field(
|
| 50 |
+
None, description="What the learner should achieve"
|
| 51 |
+
)
|
| 52 |
+
prerequisites: Optional[List[str]] = Field(
|
| 53 |
+
None, description="Required prior knowledge"
|
| 54 |
+
)
|
| 55 |
+
related_concepts: Optional[List[str]] = Field(
|
| 56 |
+
None, description="Related concepts to explore"
|
| 57 |
+
)
|
| 58 |
+
estimated_time: Optional[str] = Field(None, description="Estimated time to learn")
|
| 59 |
+
common_mistakes: Optional[List[str]] = Field(
|
| 60 |
+
None, description="Common mistakes to avoid"
|
| 61 |
+
)
|
| 62 |
+
memory_aids: Optional[List[str]] = Field(
|
| 63 |
+
None, description="Memory aids or mnemonics"
|
| 64 |
+
)
|
| 65 |
+
real_world_applications: Optional[List[str]] = Field(
|
| 66 |
+
None, description="Real-world applications"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class CardSchema(BaseModel):
|
| 71 |
+
"""Complete schema for a flashcard"""
|
| 72 |
+
|
| 73 |
+
card_type: CardType = Field(..., description="The type of flashcard")
|
| 74 |
+
front: CardFrontSchema = Field(..., description="The front of the card")
|
| 75 |
+
back: CardBackSchema = Field(..., description="The back of the card")
|
| 76 |
+
metadata: CardMetadataSchema = Field(..., description="Metadata about the card")
|
| 77 |
+
enhancement_notes: Optional[str] = Field(
|
| 78 |
+
None, description="Notes about enhancements made"
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class CardsGenerationSchema(BaseModel):
|
| 83 |
+
"""Schema for multiple cards generation"""
|
| 84 |
+
|
| 85 |
+
cards: List[CardSchema] = Field(..., description="List of generated flashcards")
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class JudgeDecisionSchema(BaseModel):
|
| 89 |
+
"""Schema for judge decisions"""
|
| 90 |
+
|
| 91 |
+
approved: bool = Field(..., description="Whether the card is approved")
|
| 92 |
+
score: float = Field(
|
| 93 |
+
..., ge=0.0, le=1.0, description="Quality score between 0 and 1"
|
| 94 |
+
)
|
| 95 |
+
feedback: str = Field(..., description="Detailed feedback about the card")
|
| 96 |
+
improvements: Optional[List[str]] = Field(
|
| 97 |
+
None, description="Suggested improvements"
|
| 98 |
+
)
|
| 99 |
+
reasoning: str = Field(..., description="Detailed reasoning for the decision")
|
| 100 |
+
confidence: float = Field(
|
| 101 |
+
..., ge=0.0, le=1.0, description="Confidence in the decision"
|
| 102 |
+
)
|
| 103 |
+
metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
class EnhancementSchema(BaseModel):
|
| 107 |
+
"""Schema for card enhancements"""
|
| 108 |
+
|
| 109 |
+
enhanced_card: CardSchema = Field(..., description="The enhanced flashcard")
|
| 110 |
+
enhancement_summary: str = Field(..., description="Summary of what was enhanced")
|
| 111 |
+
enhancement_details: Optional[Dict[str, Any]] = Field(
|
| 112 |
+
None, description="Detailed enhancement information"
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
class GenerationRequestSchema(BaseModel):
|
| 117 |
+
"""Schema for generation requests"""
|
| 118 |
+
|
| 119 |
+
topic: str = Field(..., description="The topic to generate cards for")
|
| 120 |
+
subject: str = Field(..., description="The subject area")
|
| 121 |
+
num_cards: int = Field(..., ge=1, le=20, description="Number of cards to generate")
|
| 122 |
+
difficulty: DifficultyLevel = Field(..., description="Target difficulty level")
|
| 123 |
+
context: Optional[Dict[str, Any]] = Field(None, description="Additional context")
|
| 124 |
+
preferences: Optional[Dict[str, Any]] = Field(None, description="User preferences")
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
class TokenUsageSchema(BaseModel):
|
| 128 |
+
"""Schema for token usage tracking"""
|
| 129 |
+
|
| 130 |
+
prompt_tokens: int = Field(..., ge=0, description="Number of tokens in the prompt")
|
| 131 |
+
completion_tokens: int = Field(
|
| 132 |
+
..., ge=0, description="Number of tokens in the completion"
|
| 133 |
+
)
|
| 134 |
+
total_tokens: int = Field(..., ge=0, description="Total tokens used")
|
| 135 |
+
estimated_cost: float = Field(..., ge=0.0, description="Estimated cost in USD")
|
| 136 |
+
model: str = Field(..., description="Model used for the request")
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
class AutoConfigSchema(BaseModel):
|
| 140 |
+
"""Schema for auto-configuration based on subject analysis"""
|
| 141 |
+
|
| 142 |
+
# What to search for in Context7
|
| 143 |
+
library_search_term: str = Field(
|
| 144 |
+
...,
|
| 145 |
+
description="Library name to search for in Context7 (e.g., 'pandas', 'react', 'tensorflow')",
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# Specific topic within the library (optional)
|
| 149 |
+
documentation_focus: Optional[str] = Field(
|
| 150 |
+
None,
|
| 151 |
+
description="Specific topic/area within the library documentation to focus on",
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
# Suggested settings based on subject analysis
|
| 155 |
+
topic_number: int = Field(
|
| 156 |
+
..., ge=2, le=20, description="Number of topics to generate (2-20)"
|
| 157 |
+
)
|
| 158 |
+
topics_list: List[str] = Field(
|
| 159 |
+
...,
|
| 160 |
+
min_length=2,
|
| 161 |
+
max_length=20,
|
| 162 |
+
description="List of distinct subtopics to cover, ordered by learning progression",
|
| 163 |
+
)
|
| 164 |
+
cards_per_topic: int = Field(
|
| 165 |
+
..., ge=2, le=30, description="Number of cards per topic (2-30)"
|
| 166 |
+
)
|
| 167 |
+
learning_preferences: str = Field(
|
| 168 |
+
..., description="Learning preferences and focus areas for card generation"
|
| 169 |
+
)
|
| 170 |
+
generate_cloze: bool = Field(
|
| 171 |
+
...,
|
| 172 |
+
description="Whether to generate cloze cards (true for syntax/code, false for concepts)",
|
| 173 |
+
)
|
| 174 |
+
model_choice: str = Field(
|
| 175 |
+
...,
|
| 176 |
+
description="Recommended model: 'gpt-5.2-auto', 'gpt-5.2-instant', or 'gpt-5.2-thinking'",
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# Analysis metadata
|
| 180 |
+
subject_type: str = Field(
|
| 181 |
+
...,
|
| 182 |
+
description="Type of subject: 'concepts', 'syntax', 'api', 'theory', 'practical'",
|
| 183 |
+
)
|
| 184 |
+
scope: str = Field(
|
| 185 |
+
..., description="Scope of the subject: 'narrow', 'medium', 'broad'"
|
| 186 |
+
)
|
| 187 |
+
rationale: str = Field(
|
| 188 |
+
..., description="Brief explanation of why these settings were chosen"
|
| 189 |
+
)
|
ankigen/agents/security.py
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Security enhancements for agent system
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
import hashlib
|
| 5 |
+
import re
|
| 6 |
+
from typing import Dict, Any, Optional, List
|
| 7 |
+
from dataclasses import dataclass, field
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from collections import defaultdict
|
| 10 |
+
import asyncio
|
| 11 |
+
|
| 12 |
+
from ankigen.logging import logger
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class RateLimitConfig:
|
| 17 |
+
"""Configuration for rate limiting"""
|
| 18 |
+
|
| 19 |
+
requests_per_minute: int = 60
|
| 20 |
+
requests_per_hour: int = 1000
|
| 21 |
+
burst_limit: int = 10
|
| 22 |
+
cooldown_period: int = 300 # seconds
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@dataclass
|
| 26 |
+
class SecurityConfig:
|
| 27 |
+
"""Security configuration for agents"""
|
| 28 |
+
|
| 29 |
+
enable_input_validation: bool = True
|
| 30 |
+
enable_output_filtering: bool = True
|
| 31 |
+
enable_rate_limiting: bool = True
|
| 32 |
+
max_input_length: int = 10000
|
| 33 |
+
max_output_length: int = 50000
|
| 34 |
+
blocked_patterns: List[str] = field(default_factory=list)
|
| 35 |
+
allowed_file_extensions: List[str] = field(
|
| 36 |
+
default_factory=lambda: [".txt", ".md", ".json", ".yaml"]
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
def __post_init__(self):
|
| 40 |
+
if not self.blocked_patterns:
|
| 41 |
+
self.blocked_patterns = [
|
| 42 |
+
r"(?i)(api[_\-]?key|secret|password|token|credential)",
|
| 43 |
+
r"(?i)(sk-[a-zA-Z0-9]{48,})", # OpenAI API key pattern
|
| 44 |
+
r"(?i)(access[_\-]?token)",
|
| 45 |
+
r"(?i)(private[_\-]?key)",
|
| 46 |
+
r"(?i)(<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>)", # Script tags
|
| 47 |
+
r"(?i)(javascript:|data:|vbscript:)", # URL schemes
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class RateLimiter:
|
| 52 |
+
"""Rate limiter for API calls and agent executions"""
|
| 53 |
+
|
| 54 |
+
def __init__(self, config: RateLimitConfig):
|
| 55 |
+
self.config = config
|
| 56 |
+
self._requests: Dict[str, List[float]] = defaultdict(list)
|
| 57 |
+
self._locks: Dict[str, asyncio.Lock] = defaultdict(asyncio.Lock)
|
| 58 |
+
|
| 59 |
+
async def check_rate_limit(self, identifier: str) -> bool:
|
| 60 |
+
"""Check if request is within rate limits"""
|
| 61 |
+
async with self._locks[identifier]:
|
| 62 |
+
now = time.time()
|
| 63 |
+
|
| 64 |
+
# Clean old requests
|
| 65 |
+
self._requests[identifier] = [
|
| 66 |
+
req_time
|
| 67 |
+
for req_time in self._requests[identifier]
|
| 68 |
+
if now - req_time < 3600 # Keep last hour
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
recent_requests = self._requests[identifier]
|
| 72 |
+
|
| 73 |
+
# Check burst limit (last minute)
|
| 74 |
+
last_minute = [req for req in recent_requests if now - req < 60]
|
| 75 |
+
if len(last_minute) >= self.config.burst_limit:
|
| 76 |
+
logger.warning(f"Burst limit exceeded for {identifier}")
|
| 77 |
+
return False
|
| 78 |
+
|
| 79 |
+
# Check per-minute limit
|
| 80 |
+
if len(last_minute) >= self.config.requests_per_minute:
|
| 81 |
+
logger.warning(f"Per-minute rate limit exceeded for {identifier}")
|
| 82 |
+
return False
|
| 83 |
+
|
| 84 |
+
# Check per-hour limit
|
| 85 |
+
if len(recent_requests) >= self.config.requests_per_hour:
|
| 86 |
+
logger.warning(f"Per-hour rate limit exceeded for {identifier}")
|
| 87 |
+
return False
|
| 88 |
+
|
| 89 |
+
# Record this request
|
| 90 |
+
self._requests[identifier].append(now)
|
| 91 |
+
return True
|
| 92 |
+
|
| 93 |
+
def get_reset_time(self, identifier: str) -> Optional[datetime]:
|
| 94 |
+
"""Get when rate limits will reset for identifier"""
|
| 95 |
+
if identifier not in self._requests:
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
now = time.time()
|
| 99 |
+
recent_requests = [req for req in self._requests[identifier] if now - req < 60]
|
| 100 |
+
|
| 101 |
+
if len(recent_requests) >= self.config.requests_per_minute:
|
| 102 |
+
oldest_request = min(recent_requests)
|
| 103 |
+
return datetime.fromtimestamp(oldest_request + 60)
|
| 104 |
+
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class SecurityValidator:
|
| 109 |
+
"""Security validator for agent inputs and outputs"""
|
| 110 |
+
|
| 111 |
+
def __init__(self, config: SecurityConfig):
|
| 112 |
+
self.config = config
|
| 113 |
+
self._blocked_patterns = [
|
| 114 |
+
re.compile(pattern) for pattern in config.blocked_patterns
|
| 115 |
+
]
|
| 116 |
+
|
| 117 |
+
def validate_input(self, input_text: str, source: str = "unknown") -> bool:
|
| 118 |
+
"""Validate input for security issues"""
|
| 119 |
+
if not self.config.enable_input_validation:
|
| 120 |
+
return True
|
| 121 |
+
|
| 122 |
+
try:
|
| 123 |
+
# Check input length
|
| 124 |
+
if len(input_text) > self.config.max_input_length:
|
| 125 |
+
logger.warning(f"Input too long from {source}: {len(input_text)} chars")
|
| 126 |
+
return False
|
| 127 |
+
|
| 128 |
+
# Check for blocked patterns
|
| 129 |
+
for pattern in self._blocked_patterns:
|
| 130 |
+
if pattern.search(input_text):
|
| 131 |
+
logger.warning(f"Blocked pattern detected in input from {source}")
|
| 132 |
+
return False
|
| 133 |
+
|
| 134 |
+
# Check for suspicious content
|
| 135 |
+
if self._contains_suspicious_content(input_text):
|
| 136 |
+
logger.warning(f"Suspicious content detected in input from {source}")
|
| 137 |
+
return False
|
| 138 |
+
|
| 139 |
+
return True
|
| 140 |
+
|
| 141 |
+
except Exception as e:
|
| 142 |
+
logger.error(f"Error validating input from {source}: {e}")
|
| 143 |
+
return False
|
| 144 |
+
|
| 145 |
+
def validate_output(self, output_text: str, agent_name: str = "unknown") -> bool:
|
| 146 |
+
"""Validate output for security issues"""
|
| 147 |
+
if not self.config.enable_output_filtering:
|
| 148 |
+
return True
|
| 149 |
+
|
| 150 |
+
try:
|
| 151 |
+
# Check output length
|
| 152 |
+
if len(output_text) > self.config.max_output_length:
|
| 153 |
+
logger.warning(
|
| 154 |
+
f"Output too long from {agent_name}: {len(output_text)} chars"
|
| 155 |
+
)
|
| 156 |
+
return False
|
| 157 |
+
|
| 158 |
+
# Check for leaked sensitive information
|
| 159 |
+
for pattern in self._blocked_patterns:
|
| 160 |
+
if pattern.search(output_text):
|
| 161 |
+
logger.warning(
|
| 162 |
+
f"Potential data leak detected in output from {agent_name}"
|
| 163 |
+
)
|
| 164 |
+
return False
|
| 165 |
+
|
| 166 |
+
return True
|
| 167 |
+
|
| 168 |
+
except Exception as e:
|
| 169 |
+
logger.error(f"Error validating output from {agent_name}: {e}")
|
| 170 |
+
return False
|
| 171 |
+
|
| 172 |
+
def sanitize_input(self, input_text: str) -> str:
|
| 173 |
+
"""Sanitize input by removing potentially dangerous content"""
|
| 174 |
+
try:
|
| 175 |
+
# Remove HTML/XML tags
|
| 176 |
+
sanitized = re.sub(r"<[^>]+>", "", input_text)
|
| 177 |
+
|
| 178 |
+
# Remove suspicious URLs
|
| 179 |
+
sanitized = re.sub(
|
| 180 |
+
r"(?i)(javascript:|data:|vbscript:)[^\s]*", "[URL_REMOVED]", sanitized
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
# Truncate if too long
|
| 184 |
+
if len(sanitized) > self.config.max_input_length:
|
| 185 |
+
sanitized = sanitized[: self.config.max_input_length] + "...[TRUNCATED]"
|
| 186 |
+
|
| 187 |
+
return sanitized
|
| 188 |
+
|
| 189 |
+
except Exception as e:
|
| 190 |
+
logger.error(f"Error sanitizing input: {e}")
|
| 191 |
+
return input_text[:1000] # Return truncated original as fallback
|
| 192 |
+
|
| 193 |
+
def sanitize_output(self, output_text: str) -> str:
|
| 194 |
+
"""Sanitize output by removing sensitive information"""
|
| 195 |
+
try:
|
| 196 |
+
sanitized = output_text
|
| 197 |
+
|
| 198 |
+
# Replace potential API keys or secrets
|
| 199 |
+
for pattern in self._blocked_patterns:
|
| 200 |
+
sanitized = pattern.sub("[REDACTED]", sanitized)
|
| 201 |
+
|
| 202 |
+
# Truncate if too long
|
| 203 |
+
if len(sanitized) > self.config.max_output_length:
|
| 204 |
+
sanitized = (
|
| 205 |
+
sanitized[: self.config.max_output_length] + "...[TRUNCATED]"
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
return sanitized
|
| 209 |
+
|
| 210 |
+
except Exception as e:
|
| 211 |
+
logger.error(f"Error sanitizing output: {e}")
|
| 212 |
+
return output_text[:5000] # Return truncated original as fallback
|
| 213 |
+
|
| 214 |
+
def _contains_suspicious_content(self, text: str) -> bool:
|
| 215 |
+
"""Check for suspicious content patterns"""
|
| 216 |
+
suspicious_patterns = [
|
| 217 |
+
r"(?i)(\beval\s*\()", # eval() calls
|
| 218 |
+
r"(?i)(\bexec\s*\()", # exec() calls
|
| 219 |
+
r"(?i)(__import__)", # Dynamic imports
|
| 220 |
+
r"(?i)(subprocess|os\.system)", # System commands
|
| 221 |
+
r"(?i)(file://|ftp://)", # File/FTP URLs
|
| 222 |
+
r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", # IP addresses
|
| 223 |
+
]
|
| 224 |
+
|
| 225 |
+
for pattern in suspicious_patterns:
|
| 226 |
+
if re.search(pattern, text):
|
| 227 |
+
return True
|
| 228 |
+
|
| 229 |
+
return False
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
class SecureAgentWrapper:
|
| 233 |
+
"""Secure wrapper for agent execution with rate limiting and validation"""
|
| 234 |
+
|
| 235 |
+
def __init__(
|
| 236 |
+
self, base_agent, rate_limiter: RateLimiter, validator: SecurityValidator
|
| 237 |
+
):
|
| 238 |
+
self.base_agent = base_agent
|
| 239 |
+
self.rate_limiter = rate_limiter
|
| 240 |
+
self.validator = validator
|
| 241 |
+
self._identifier = self._generate_identifier()
|
| 242 |
+
|
| 243 |
+
def _generate_identifier(self) -> str:
|
| 244 |
+
"""Generate unique identifier for rate limiting"""
|
| 245 |
+
agent_name = getattr(self.base_agent, "config", {}).get("name", "unknown")
|
| 246 |
+
# Include agent name and some randomness for fairness
|
| 247 |
+
return hashlib.md5(f"{agent_name}_{id(self.base_agent)}".encode()).hexdigest()[
|
| 248 |
+
:16
|
| 249 |
+
]
|
| 250 |
+
|
| 251 |
+
async def secure_execute(
|
| 252 |
+
self, user_input: str, context: Dict[str, Any] = None
|
| 253 |
+
) -> Any:
|
| 254 |
+
"""Execute agent with security checks and rate limiting"""
|
| 255 |
+
|
| 256 |
+
# Rate limiting check
|
| 257 |
+
if not await self.rate_limiter.check_rate_limit(self._identifier):
|
| 258 |
+
reset_time = self.rate_limiter.get_reset_time(self._identifier)
|
| 259 |
+
raise SecurityError(f"Rate limit exceeded. Reset at: {reset_time}")
|
| 260 |
+
|
| 261 |
+
# Input validation
|
| 262 |
+
if not self.validator.validate_input(user_input, self._identifier):
|
| 263 |
+
raise SecurityError("Input validation failed")
|
| 264 |
+
|
| 265 |
+
# Sanitize input
|
| 266 |
+
sanitized_input = self.validator.sanitize_input(user_input)
|
| 267 |
+
|
| 268 |
+
try:
|
| 269 |
+
# Execute the base agent
|
| 270 |
+
result = await self.base_agent.execute(sanitized_input, context)
|
| 271 |
+
|
| 272 |
+
# Validate output
|
| 273 |
+
if isinstance(result, str):
|
| 274 |
+
if not self.validator.validate_output(result, self._identifier):
|
| 275 |
+
raise SecurityError("Output validation failed")
|
| 276 |
+
|
| 277 |
+
# Sanitize output
|
| 278 |
+
result = self.validator.sanitize_output(result)
|
| 279 |
+
|
| 280 |
+
return result
|
| 281 |
+
|
| 282 |
+
except Exception as e:
|
| 283 |
+
logger.error(f"Secure execution failed for {self._identifier}: {e}")
|
| 284 |
+
raise
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
class SecurityError(Exception):
|
| 288 |
+
"""Custom exception for security-related errors"""
|
| 289 |
+
|
| 290 |
+
pass
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
# Global security components
|
| 294 |
+
_global_rate_limiter: Optional[RateLimiter] = None
|
| 295 |
+
_global_validator: Optional[SecurityValidator] = None
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def get_rate_limiter(config: Optional[RateLimitConfig] = None) -> RateLimiter:
|
| 299 |
+
"""Get global rate limiter instance"""
|
| 300 |
+
global _global_rate_limiter
|
| 301 |
+
if _global_rate_limiter is None:
|
| 302 |
+
_global_rate_limiter = RateLimiter(config or RateLimitConfig())
|
| 303 |
+
return _global_rate_limiter
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def get_security_validator(
|
| 307 |
+
config: Optional[SecurityConfig] = None,
|
| 308 |
+
) -> SecurityValidator:
|
| 309 |
+
"""Get global security validator instance"""
|
| 310 |
+
global _global_validator
|
| 311 |
+
if _global_validator is None:
|
| 312 |
+
_global_validator = SecurityValidator(config or SecurityConfig())
|
| 313 |
+
return _global_validator
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def create_secure_agent(
|
| 317 |
+
base_agent,
|
| 318 |
+
rate_config: Optional[RateLimitConfig] = None,
|
| 319 |
+
security_config: Optional[SecurityConfig] = None,
|
| 320 |
+
) -> SecureAgentWrapper:
|
| 321 |
+
"""Create a secure wrapper for an agent"""
|
| 322 |
+
rate_limiter = get_rate_limiter(rate_config)
|
| 323 |
+
validator = get_security_validator(security_config)
|
| 324 |
+
return SecureAgentWrapper(base_agent, rate_limiter, validator)
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
# Configuration file permissions utility
|
| 328 |
+
def set_secure_file_permissions(file_path: str):
|
| 329 |
+
"""Set secure permissions for configuration files"""
|
| 330 |
+
try:
|
| 331 |
+
import os
|
| 332 |
+
import stat
|
| 333 |
+
|
| 334 |
+
# Set read/write for owner only (0o600)
|
| 335 |
+
os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR)
|
| 336 |
+
logger.info(f"Set secure permissions for {file_path}")
|
| 337 |
+
|
| 338 |
+
except Exception as e:
|
| 339 |
+
logger.warning(f"Could not set secure permissions for {file_path}: {e}")
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
# Input validation utilities
|
| 343 |
+
def strip_html_tags(text: str) -> str:
|
| 344 |
+
"""Strip HTML tags from text (improved version)"""
|
| 345 |
+
import html
|
| 346 |
+
|
| 347 |
+
# Decode HTML entities first
|
| 348 |
+
text = html.unescape(text)
|
| 349 |
+
|
| 350 |
+
# Remove HTML/XML tags
|
| 351 |
+
text = re.sub(r"<[^>]+>", "", text)
|
| 352 |
+
|
| 353 |
+
# Remove remaining HTML entities
|
| 354 |
+
text = re.sub(r"&[a-zA-Z0-9#]+;", "", text)
|
| 355 |
+
|
| 356 |
+
# Clean up whitespace
|
| 357 |
+
text = re.sub(r"\s+", " ", text).strip()
|
| 358 |
+
|
| 359 |
+
return text
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
def validate_api_key_format(api_key: str) -> bool:
|
| 363 |
+
"""Validate OpenAI API key format without logging it"""
|
| 364 |
+
if not api_key:
|
| 365 |
+
return False
|
| 366 |
+
|
| 367 |
+
# Check basic format (starts with sk- and has correct length)
|
| 368 |
+
if not api_key.startswith("sk-"):
|
| 369 |
+
return False
|
| 370 |
+
|
| 371 |
+
if len(api_key) < 20: # Minimum reasonable length
|
| 372 |
+
return False
|
| 373 |
+
|
| 374 |
+
# Check for obvious fake keys
|
| 375 |
+
fake_patterns = ["test", "fake", "demo", "example", "placeholder"]
|
| 376 |
+
lower_key = api_key.lower()
|
| 377 |
+
if any(pattern in lower_key for pattern in fake_patterns):
|
| 378 |
+
return False
|
| 379 |
+
|
| 380 |
+
return True
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
# Logging security
|
| 384 |
+
def sanitize_for_logging(text: str, max_length: int = 100) -> str:
|
| 385 |
+
"""Sanitize text for safe logging"""
|
| 386 |
+
if not text:
|
| 387 |
+
return "[EMPTY]"
|
| 388 |
+
|
| 389 |
+
# Remove potential secrets
|
| 390 |
+
validator = get_security_validator()
|
| 391 |
+
sanitized = validator.sanitize_output(text)
|
| 392 |
+
|
| 393 |
+
# Truncate for logging
|
| 394 |
+
if len(sanitized) > max_length:
|
| 395 |
+
sanitized = sanitized[:max_length] + "...[TRUNCATED]"
|
| 396 |
+
|
| 397 |
+
return sanitized
|
ankigen/agents/templates/generators.j2
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{# Generator agent configuration template #}
|
| 2 |
+
{
|
| 3 |
+
"subject_expert": {
|
| 4 |
+
"name": "subject_expert",
|
| 5 |
+
"instructions": "You are an expert in {{ subject | default('the subject area') }} with deep pedagogical knowledge. \nYour role is to generate flashcards that demonstrate mastery of {{ subject | default('the subject') }} concepts.\n\nKey responsibilities:\n- Create ATOMIC cards: short answers (1-9 words) when possible; keep explanations/examples brief; split complex info into multiple simple cards\n- Use clear, plain prompts without fancy formatting or unusual words\n- Design prompts that match real-life recall or decision points\n- For basic cards, keep to-be-learned info on the back; prompts are cues, not answers\n- For cloze cards, the deletion is the target; keep cloze sentences natural and minimal\n- Prefer why/how/when/if-then prompts and contrast pairs like A vs B\n- Include a few explain-to-a-non-expert/PM prompts to force clarity\n- Favor retrieval hooks and levers (failure modes, knobs) over proofs or derivations\n- Avoid long equations, derivations, and multi-paragraph cards\n- Ensure technical accuracy and depth appropriate for the target level\n- Use domain-specific terminology correctly\n- Connect concepts to prerequisite knowledge\n\nCard Types:\n- Basic cards (card_type='basic'): Standard Q&A format for concepts and facts\n- Cloze cards (card_type='cloze'): Fill-in-the-blank format using {{ '{{c1::answer}}' }} syntax for code, syntax, formulas\n\nFor cloze cards, wrap the answer in {{ '{{c1::text}}' }} format. Example: 'The vLLM class for inference is {{ '{{c1::LLM}}' }}'\n\nPrioritize atomic simplicity - break complex info into multiple simple cards. Generate cards that test understanding through simple, direct recall.",
|
| 6 |
+
"model": "{{ subject_expert_model }}",
|
| 7 |
+
"temperature": 0.7,
|
| 8 |
+
"timeout": 120.0,
|
| 9 |
+
"custom_prompts": {
|
| 10 |
+
"math": "Focus on problem-solving strategies and mathematical reasoning",
|
| 11 |
+
"science": "Emphasize experimental design and scientific method",
|
| 12 |
+
"history": "Connect events to broader historical patterns and causation",
|
| 13 |
+
"programming": "Include executable examples and best practices"
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
}
|
ankigen/agents/token_tracker.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Token usage tracking for OpenAI API calls using tiktoken.
|
| 3 |
+
Provides accurate token counting and cost estimation.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import tiktoken
|
| 7 |
+
from typing import Dict, List, Any, Optional
|
| 8 |
+
from dataclasses import dataclass, field
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
from ankigen.logging import logger
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@dataclass
|
| 15 |
+
class TokenUsage:
|
| 16 |
+
"""Track token usage for a single request"""
|
| 17 |
+
|
| 18 |
+
prompt_tokens: int
|
| 19 |
+
completion_tokens: int
|
| 20 |
+
total_tokens: int
|
| 21 |
+
estimated_cost: Optional[float]
|
| 22 |
+
model: str
|
| 23 |
+
timestamp: datetime = field(default_factory=datetime.now)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class TokenTracker:
|
| 27 |
+
"""Track token usage across multiple requests"""
|
| 28 |
+
|
| 29 |
+
def __init__(self):
|
| 30 |
+
self.usage_history: List[TokenUsage] = []
|
| 31 |
+
self.total_cost = 0.0
|
| 32 |
+
self.total_tokens = 0
|
| 33 |
+
|
| 34 |
+
def count_tokens_for_messages(
|
| 35 |
+
self, messages: List[Dict[str, str]], model: str
|
| 36 |
+
) -> int:
|
| 37 |
+
"""
|
| 38 |
+
Count total tokens for a list of chat messages using tiktoken.
|
| 39 |
+
|
| 40 |
+
Implements OpenAI's token counting algorithm for chat completions:
|
| 41 |
+
- Each message adds 3 tokens for role/content/structure overhead
|
| 42 |
+
- Message names add an additional token
|
| 43 |
+
- The entire message list adds 3 tokens for conversation wrapper
|
| 44 |
+
|
| 45 |
+
The encoding is selected based on the model:
|
| 46 |
+
- Attempts to use model-specific encoding via tiktoken
|
| 47 |
+
- Falls back to 'o200k_base' (GPT-4 Turbo encoding) for unknown models
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
messages: List of message dicts (each with 'role', 'content', optional 'name')
|
| 51 |
+
model: OpenAI model identifier (e.g., 'gpt-5.2', 'gpt-4o')
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
Total tokens required to send these messages to the model
|
| 55 |
+
"""
|
| 56 |
+
try:
|
| 57 |
+
encoding = tiktoken.encoding_for_model(model)
|
| 58 |
+
except KeyError:
|
| 59 |
+
encoding = tiktoken.get_encoding("o200k_base")
|
| 60 |
+
|
| 61 |
+
tokens_per_message = 3
|
| 62 |
+
tokens_per_name = 1
|
| 63 |
+
|
| 64 |
+
num_tokens = 0
|
| 65 |
+
for message in messages:
|
| 66 |
+
num_tokens += tokens_per_message
|
| 67 |
+
for key, value in message.items():
|
| 68 |
+
num_tokens += len(encoding.encode(str(value)))
|
| 69 |
+
if key == "name":
|
| 70 |
+
num_tokens += tokens_per_name
|
| 71 |
+
|
| 72 |
+
num_tokens += 3
|
| 73 |
+
return num_tokens
|
| 74 |
+
|
| 75 |
+
def count_tokens_for_text(self, text: str, model: str) -> int:
|
| 76 |
+
try:
|
| 77 |
+
encoding = tiktoken.encoding_for_model(model)
|
| 78 |
+
except KeyError:
|
| 79 |
+
encoding = tiktoken.get_encoding("o200k_base")
|
| 80 |
+
|
| 81 |
+
return len(encoding.encode(text))
|
| 82 |
+
|
| 83 |
+
def track_usage_from_response(
|
| 84 |
+
self, response_data, model: str
|
| 85 |
+
) -> Optional[TokenUsage]:
|
| 86 |
+
try:
|
| 87 |
+
if hasattr(response_data, "usage"):
|
| 88 |
+
usage = response_data.usage
|
| 89 |
+
prompt_tokens = usage.prompt_tokens
|
| 90 |
+
completion_tokens = usage.completion_tokens
|
| 91 |
+
|
| 92 |
+
actual_cost = None
|
| 93 |
+
if hasattr(usage, "total_cost"):
|
| 94 |
+
actual_cost = usage.total_cost
|
| 95 |
+
elif hasattr(usage, "cost"):
|
| 96 |
+
actual_cost = usage.cost
|
| 97 |
+
|
| 98 |
+
return self.track_usage(
|
| 99 |
+
prompt_tokens, completion_tokens, model, actual_cost
|
| 100 |
+
)
|
| 101 |
+
return None
|
| 102 |
+
except Exception as e:
|
| 103 |
+
logger.error(f"Failed to track usage from response: {e}")
|
| 104 |
+
return None
|
| 105 |
+
|
| 106 |
+
def track_usage(
|
| 107 |
+
self,
|
| 108 |
+
prompt_tokens: int,
|
| 109 |
+
completion_tokens: int,
|
| 110 |
+
model: str,
|
| 111 |
+
actual_cost: Optional[float] = None,
|
| 112 |
+
) -> TokenUsage:
|
| 113 |
+
total_tokens = prompt_tokens + completion_tokens
|
| 114 |
+
|
| 115 |
+
final_cost = actual_cost # Cost estimation removed - rely on API-provided costs
|
| 116 |
+
|
| 117 |
+
usage = TokenUsage(
|
| 118 |
+
prompt_tokens=prompt_tokens,
|
| 119 |
+
completion_tokens=completion_tokens,
|
| 120 |
+
total_tokens=total_tokens,
|
| 121 |
+
estimated_cost=final_cost,
|
| 122 |
+
model=model,
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
self.usage_history.append(usage)
|
| 126 |
+
if final_cost:
|
| 127 |
+
self.total_cost += final_cost
|
| 128 |
+
self.total_tokens += total_tokens
|
| 129 |
+
|
| 130 |
+
logger.info(
|
| 131 |
+
f"💰 Token usage - Model: {model}, Prompt: {prompt_tokens}, Completion: {completion_tokens}, Cost: ${final_cost:.4f}"
|
| 132 |
+
if final_cost
|
| 133 |
+
else f"💰 Token usage - Model: {model}, Prompt: {prompt_tokens}, Completion: {completion_tokens}"
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
return usage
|
| 137 |
+
|
| 138 |
+
def get_session_summary(self) -> Dict[str, Any]:
|
| 139 |
+
if not self.usage_history:
|
| 140 |
+
return {
|
| 141 |
+
"total_requests": 0,
|
| 142 |
+
"total_tokens": 0,
|
| 143 |
+
"total_cost": 0.0,
|
| 144 |
+
"by_model": {},
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
by_model = {}
|
| 148 |
+
for usage in self.usage_history:
|
| 149 |
+
if usage.model not in by_model:
|
| 150 |
+
by_model[usage.model] = {"requests": 0, "tokens": 0, "cost": 0.0}
|
| 151 |
+
by_model[usage.model]["requests"] += 1
|
| 152 |
+
by_model[usage.model]["tokens"] += usage.total_tokens
|
| 153 |
+
if usage.estimated_cost:
|
| 154 |
+
by_model[usage.model]["cost"] += usage.estimated_cost
|
| 155 |
+
|
| 156 |
+
return {
|
| 157 |
+
"total_requests": len(self.usage_history),
|
| 158 |
+
"total_tokens": self.total_tokens,
|
| 159 |
+
"total_cost": self.total_cost,
|
| 160 |
+
"by_model": by_model,
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
def get_session_usage(self) -> Dict[str, Any]:
|
| 164 |
+
return self.get_session_summary()
|
| 165 |
+
|
| 166 |
+
def reset_session(self):
|
| 167 |
+
self.usage_history.clear()
|
| 168 |
+
self.total_cost = 0.0
|
| 169 |
+
self.total_tokens = 0
|
| 170 |
+
logger.info("🔄 Token usage tracking reset")
|
| 171 |
+
|
| 172 |
+
def track_usage_from_agents_sdk(
|
| 173 |
+
self, usage_dict: Dict[str, Any], model: str
|
| 174 |
+
) -> Optional[TokenUsage]:
|
| 175 |
+
"""Track usage from OpenAI Agents SDK usage format"""
|
| 176 |
+
try:
|
| 177 |
+
if not usage_dict or usage_dict.get("total_tokens", 0) == 0:
|
| 178 |
+
return None
|
| 179 |
+
|
| 180 |
+
prompt_tokens = usage_dict.get("input_tokens", 0)
|
| 181 |
+
completion_tokens = usage_dict.get("output_tokens", 0)
|
| 182 |
+
|
| 183 |
+
return self.track_usage(prompt_tokens, completion_tokens, model)
|
| 184 |
+
except Exception as e:
|
| 185 |
+
logger.error(f"Failed to track usage from agents SDK: {e}")
|
| 186 |
+
return None
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
# Global token tracker instance
|
| 190 |
+
_global_tracker = TokenTracker()
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def get_token_tracker() -> TokenTracker:
|
| 194 |
+
return _global_tracker
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def track_agent_usage(
|
| 198 |
+
prompt_text: str,
|
| 199 |
+
completion_text: str,
|
| 200 |
+
model: str,
|
| 201 |
+
actual_cost: Optional[float] = None,
|
| 202 |
+
) -> TokenUsage:
|
| 203 |
+
tracker = get_token_tracker()
|
| 204 |
+
|
| 205 |
+
prompt_tokens = tracker.count_tokens_for_text(prompt_text, model)
|
| 206 |
+
completion_tokens = tracker.count_tokens_for_text(completion_text, model)
|
| 207 |
+
|
| 208 |
+
return tracker.track_usage(prompt_tokens, completion_tokens, model, actual_cost)
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def track_usage_from_openai_response(response_data, model: str) -> Optional[TokenUsage]:
|
| 212 |
+
tracker = get_token_tracker()
|
| 213 |
+
return tracker.track_usage_from_response(response_data, model)
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def track_usage_from_agents_sdk(
|
| 217 |
+
usage_dict: Dict[str, Any], model: str
|
| 218 |
+
) -> Optional[TokenUsage]:
|
| 219 |
+
"""Track usage from OpenAI Agents SDK usage format"""
|
| 220 |
+
tracker = get_token_tracker()
|
| 221 |
+
return tracker.track_usage_from_agents_sdk(usage_dict, model)
|
ankigen/auto_config.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Auto-configuration service for intelligent settings population"""
|
| 2 |
+
|
| 3 |
+
from typing import Dict, Any
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
|
| 6 |
+
from ankigen.logging import logger
|
| 7 |
+
from ankigen.context7 import Context7Client
|
| 8 |
+
from ankigen.agents.schemas import AutoConfigSchema
|
| 9 |
+
from ankigen.llm_interface import structured_agent_call
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class AutoConfigService:
|
| 13 |
+
"""Service for analyzing subjects and auto-configuring flashcard generation settings"""
|
| 14 |
+
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.context7_client = Context7Client()
|
| 17 |
+
|
| 18 |
+
async def analyze_subject(
|
| 19 |
+
self,
|
| 20 |
+
subject: str,
|
| 21 |
+
openai_client: AsyncOpenAI,
|
| 22 |
+
target_topic_count: int | None = None,
|
| 23 |
+
) -> AutoConfigSchema:
|
| 24 |
+
"""Analyze a subject string and return configuration settings.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
subject: The subject to analyze
|
| 28 |
+
openai_client: OpenAI client for LLM calls
|
| 29 |
+
target_topic_count: If provided, forces exactly this many topics in decomposition
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
# Build topic count instruction if override provided
|
| 33 |
+
topic_count_instruction = ""
|
| 34 |
+
if target_topic_count is not None:
|
| 35 |
+
topic_count_instruction = f"""
|
| 36 |
+
IMPORTANT OVERRIDE: The user has requested exactly {target_topic_count} topics.
|
| 37 |
+
You MUST set topic_number to {target_topic_count} and provide exactly {target_topic_count} items in topics_list.
|
| 38 |
+
Choose the {target_topic_count} most important/foundational subtopics for this subject.
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
system_prompt = f"""You are an educational content analyzer specializing in spaced repetition learning. Analyze the given subject and determine flashcard generation settings that focus on ESSENTIAL concepts.
|
| 42 |
+
{topic_count_instruction}
|
| 43 |
+
|
| 44 |
+
CRITICAL PRINCIPLE: Quality over quantity. Focus on fundamental concepts that unlock understanding, not trivial facts.
|
| 45 |
+
|
| 46 |
+
Consider:
|
| 47 |
+
1. Extract any library/framework names for Context7 search (e.g., "pandas", "react", "tensorflow")
|
| 48 |
+
2. IMPORTANT: Extract the specific documentation focus from the subject
|
| 49 |
+
- "Basic Pandas Dataframe" → documentation_focus: "dataframe basics, creation, indexing"
|
| 50 |
+
- "React hooks tutorial" → documentation_focus: "hooks, useState, useEffect"
|
| 51 |
+
- "Docker networking" → documentation_focus: "networking, network drivers, container communication"
|
| 52 |
+
3. Identify the scope: narrow (specific feature), medium (several related topics), broad (full overview)
|
| 53 |
+
4. Determine content type: concepts (theory/understanding), syntax (code/commands), api (library usage), practical (hands-on skills)
|
| 54 |
+
5. TOPIC DECOMPOSITION: Break down the subject into distinct subtopics that together provide comprehensive coverage
|
| 55 |
+
6. Recommend cloze cards for syntax/code, basic cards for concepts
|
| 56 |
+
7. Choose model based on complexity: gpt-5.2-thinking for complex topics, gpt-5.2-instant for basic/simple, gpt-5.2-auto for mixed scope
|
| 57 |
+
- Valid model_choice values: "gpt-5.2-auto", "gpt-5.2-instant", "gpt-5.2-thinking"
|
| 58 |
+
|
| 59 |
+
TOPIC DECOMPOSITION (topics_list):
|
| 60 |
+
You MUST provide a topics_list - a list of distinct subtopics that together cover the subject comprehensively.
|
| 61 |
+
- Each topic should be specific and non-overlapping
|
| 62 |
+
- Order topics from foundational to advanced (learning progression)
|
| 63 |
+
- The number of topics should match topic_number
|
| 64 |
+
|
| 65 |
+
Examples:
|
| 66 |
+
- "React Hooks" → topics_list: ["useState fundamentals", "useEffect and lifecycle", "useRef and useContext", "custom hooks patterns", "performance with useMemo/useCallback", "testing hooks"]
|
| 67 |
+
- "Docker basics" → topics_list: ["containers vs VMs", "images and Dockerfile", "container lifecycle", "volumes and persistence", "networking fundamentals", "docker-compose basics"]
|
| 68 |
+
- "Machine Learning" → topics_list: ["supervised vs unsupervised", "regression models", "classification models", "model evaluation metrics", "overfitting and regularization", "feature engineering", "cross-validation"]
|
| 69 |
+
|
| 70 |
+
IMPORTANT - Focus on HIGH-VALUE topics:
|
| 71 |
+
- GOOD topics: Core concepts, fundamental principles, mental models, design patterns, key abstractions
|
| 72 |
+
- AVOID topics: Trivial commands (like "docker ps"), basic syntax that's easily googled, minor API details
|
| 73 |
+
|
| 74 |
+
Guidelines for settings (MINIMUM 30 cards total):
|
| 75 |
+
- Narrow/specific scope: 4-5 essential topics with 8-10 cards each (32-50 cards)
|
| 76 |
+
- Medium scope: 5-7 core topics with 7-9 cards each (35-63 cards)
|
| 77 |
+
- Broad scope: 6-8 fundamental topics with 6-8 cards each (36-64 cards)
|
| 78 |
+
- "Basic"/"Introduction" keywords: Start with fundamentals, 40-50 cards total
|
| 79 |
+
- "Complex" keywords: Deep dive into critical concepts, 45-60 cards
|
| 80 |
+
|
| 81 |
+
Learning preference suggestions:
|
| 82 |
+
- For basics: "Focus on fundamental concepts and mental models that form the foundation"
|
| 83 |
+
- For practical: "Emphasize core patterns and principles with real-world applications"
|
| 84 |
+
- For theory: "Build deep conceptual understanding with progressive complexity"
|
| 85 |
+
|
| 86 |
+
Return a JSON object matching the AutoConfigSchema."""
|
| 87 |
+
|
| 88 |
+
user_prompt = f"""Analyze this subject for flashcard generation: "{subject}"
|
| 89 |
+
|
| 90 |
+
Extract:
|
| 91 |
+
1. The library name if mentioned
|
| 92 |
+
2. The specific documentation focus (what aspects of the library to focus on)
|
| 93 |
+
3. Suggested settings for effective learning
|
| 94 |
+
|
| 95 |
+
Provide a brief rationale for your choices."""
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
config = await structured_agent_call(
|
| 99 |
+
openai_client=openai_client,
|
| 100 |
+
model="gpt-5.2",
|
| 101 |
+
instructions=system_prompt,
|
| 102 |
+
user_input=user_prompt,
|
| 103 |
+
output_type=AutoConfigSchema,
|
| 104 |
+
temperature=0.3, # Lower temperature for more consistent analysis
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
logger.info(
|
| 108 |
+
f"Subject analysis complete: library='{config.library_search_term}', "
|
| 109 |
+
f"topics={config.topic_number}, cards/topic={config.cards_per_topic}"
|
| 110 |
+
)
|
| 111 |
+
return config
|
| 112 |
+
|
| 113 |
+
except Exception as e:
|
| 114 |
+
logger.error(f"Failed to analyze subject: {e}")
|
| 115 |
+
# Return sensible defaults on error (still aim for good card count)
|
| 116 |
+
# Use the subject as a single topic as fallback
|
| 117 |
+
return AutoConfigSchema(
|
| 118 |
+
library_search_term="",
|
| 119 |
+
documentation_focus=None,
|
| 120 |
+
topic_number=6,
|
| 121 |
+
topics_list=[
|
| 122 |
+
f"{subject} - fundamentals",
|
| 123 |
+
f"{subject} - core concepts",
|
| 124 |
+
f"{subject} - practical applications",
|
| 125 |
+
f"{subject} - common patterns",
|
| 126 |
+
f"{subject} - best practices",
|
| 127 |
+
f"{subject} - advanced topics",
|
| 128 |
+
],
|
| 129 |
+
cards_per_topic=8,
|
| 130 |
+
learning_preferences="Focus on fundamental concepts and core principles with practical examples",
|
| 131 |
+
generate_cloze=False,
|
| 132 |
+
model_choice="gpt-5.2-auto",
|
| 133 |
+
subject_type="concepts",
|
| 134 |
+
scope="medium",
|
| 135 |
+
rationale="Using default settings due to analysis error",
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
async def auto_configure(
|
| 139 |
+
self,
|
| 140 |
+
subject: str,
|
| 141 |
+
openai_client: AsyncOpenAI,
|
| 142 |
+
target_topic_count: int | None = None,
|
| 143 |
+
) -> Dict[str, Any]:
|
| 144 |
+
"""
|
| 145 |
+
Complete auto-configuration pipeline:
|
| 146 |
+
1. Analyze subject with AI
|
| 147 |
+
2. Search Context7 for library if detected
|
| 148 |
+
3. Return complete configuration for UI
|
| 149 |
+
|
| 150 |
+
Args:
|
| 151 |
+
subject: The subject to analyze
|
| 152 |
+
openai_client: OpenAI client for LLM calls
|
| 153 |
+
target_topic_count: If provided, forces exactly this many topics
|
| 154 |
+
"""
|
| 155 |
+
|
| 156 |
+
if not subject or not subject.strip():
|
| 157 |
+
logger.warning("Empty subject provided to auto_configure")
|
| 158 |
+
return {}
|
| 159 |
+
|
| 160 |
+
logger.info(f"Starting auto-configuration for subject: '{subject}'")
|
| 161 |
+
|
| 162 |
+
# Step 1: Analyze the subject
|
| 163 |
+
config = await self.analyze_subject(
|
| 164 |
+
subject, openai_client, target_topic_count=target_topic_count
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# Step 2: Search Context7 for library if one was detected
|
| 168 |
+
library_id = None
|
| 169 |
+
if config.library_search_term:
|
| 170 |
+
logger.info(
|
| 171 |
+
f"Searching Context7 for library: '{config.library_search_term}'"
|
| 172 |
+
)
|
| 173 |
+
try:
|
| 174 |
+
library_id = await self.context7_client.resolve_library_id(
|
| 175 |
+
config.library_search_term
|
| 176 |
+
)
|
| 177 |
+
if library_id:
|
| 178 |
+
logger.info(f"Resolved library to Context7 ID: {library_id}")
|
| 179 |
+
else:
|
| 180 |
+
logger.warning(
|
| 181 |
+
f"Could not find library '{config.library_search_term}' in Context7"
|
| 182 |
+
)
|
| 183 |
+
except Exception as e:
|
| 184 |
+
logger.error(f"Context7 search failed: {e}")
|
| 185 |
+
|
| 186 |
+
# Step 3: Build complete configuration dict for UI
|
| 187 |
+
ui_config = {
|
| 188 |
+
"library_name": config.library_search_term if library_id else "",
|
| 189 |
+
"library_topic": config.documentation_focus or "",
|
| 190 |
+
"topic_number": config.topic_number,
|
| 191 |
+
"topics_list": config.topics_list,
|
| 192 |
+
"cards_per_topic": config.cards_per_topic,
|
| 193 |
+
"preference_prompt": config.learning_preferences,
|
| 194 |
+
"generate_cloze_checkbox": config.generate_cloze,
|
| 195 |
+
"model_choice": config.model_choice,
|
| 196 |
+
# Metadata for display
|
| 197 |
+
"analysis_metadata": {
|
| 198 |
+
"subject_type": config.subject_type,
|
| 199 |
+
"scope": config.scope,
|
| 200 |
+
"rationale": config.rationale,
|
| 201 |
+
"library_found": library_id is not None,
|
| 202 |
+
"context7_id": library_id,
|
| 203 |
+
},
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
logger.info(
|
| 207 |
+
f"Auto-configuration complete: library={'found' if library_id else 'not found'}, "
|
| 208 |
+
f"topics={config.topic_number}, model={config.model_choice}"
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
return ui_config
|
ankigen/card_generator.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Module for core card generation logic
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from typing import List, Dict, Any
|
| 6 |
+
|
| 7 |
+
# Imports from our core modules
|
| 8 |
+
from ankigen.utils import (
|
| 9 |
+
get_logger,
|
| 10 |
+
ResponseCache,
|
| 11 |
+
strip_html_tags,
|
| 12 |
+
)
|
| 13 |
+
from ankigen.llm_interface import OpenAIClientManager
|
| 14 |
+
from ankigen.models import (
|
| 15 |
+
Card,
|
| 16 |
+
) # Import necessary Pydantic models
|
| 17 |
+
|
| 18 |
+
# Import agent system - required
|
| 19 |
+
from ankigen.agents.integration import AgentOrchestrator
|
| 20 |
+
from agents import set_tracing_disabled
|
| 21 |
+
|
| 22 |
+
logger = get_logger()
|
| 23 |
+
|
| 24 |
+
# Disable tracing to prevent metrics persistence issues
|
| 25 |
+
set_tracing_disabled(True)
|
| 26 |
+
|
| 27 |
+
AGENTS_AVAILABLE = True
|
| 28 |
+
logger.info("Agent system loaded successfully")
|
| 29 |
+
|
| 30 |
+
# --- Constants --- (Moved from app.py)
|
| 31 |
+
AVAILABLE_MODELS = [
|
| 32 |
+
{
|
| 33 |
+
"value": "gpt-5.2-auto",
|
| 34 |
+
"label": "GPT-5.2 Auto",
|
| 35 |
+
"description": "Adaptive reasoning",
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"value": "gpt-5.2-instant",
|
| 39 |
+
"label": "GPT-5.2 Instant",
|
| 40 |
+
"description": "Fast, minimal reasoning",
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"value": "gpt-5.2-thinking",
|
| 44 |
+
"label": "GPT-5.2 Thinking",
|
| 45 |
+
"description": "Higher reasoning effort",
|
| 46 |
+
},
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
GENERATION_MODES = [
|
| 50 |
+
{
|
| 51 |
+
"value": "subject",
|
| 52 |
+
"label": "Single Subject",
|
| 53 |
+
"description": "Generate cards for a specific topic",
|
| 54 |
+
},
|
| 55 |
+
]
|
| 56 |
+
|
| 57 |
+
# --- Core Functions --- (Moved and adapted from app.py)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# Legacy functions removed - all card generation now handled by agent system
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _parse_model_selection(model_selection: str) -> tuple[str, str | None]:
|
| 64 |
+
"""Parse model selection into model name and reasoning effort."""
|
| 65 |
+
if not model_selection:
|
| 66 |
+
return "gpt-5.2", None
|
| 67 |
+
|
| 68 |
+
normalized = model_selection.strip().lower()
|
| 69 |
+
if normalized == "gpt-5.2-auto":
|
| 70 |
+
return "gpt-5.2", None
|
| 71 |
+
if normalized == "gpt-5.2-instant":
|
| 72 |
+
return "gpt-5.2", "none"
|
| 73 |
+
if normalized == "gpt-5.2-thinking":
|
| 74 |
+
return "gpt-5.2", "high"
|
| 75 |
+
|
| 76 |
+
if "gpt-5.2" in normalized:
|
| 77 |
+
if "instant" in normalized:
|
| 78 |
+
return "gpt-5.2", "none"
|
| 79 |
+
if "thinking" in normalized:
|
| 80 |
+
return "gpt-5.2", "high"
|
| 81 |
+
if "auto" in normalized:
|
| 82 |
+
return "gpt-5.2", None
|
| 83 |
+
return "gpt-5.2", None
|
| 84 |
+
|
| 85 |
+
# Fallback for direct model names
|
| 86 |
+
return model_selection, None
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _map_generation_mode_to_subject(generation_mode: str, subject: str) -> str:
|
| 90 |
+
"""Map UI generation mode to agent subject."""
|
| 91 |
+
if generation_mode == "subject":
|
| 92 |
+
return subject if subject else "general"
|
| 93 |
+
elif generation_mode == "path":
|
| 94 |
+
return "curriculum_design"
|
| 95 |
+
elif generation_mode == "text":
|
| 96 |
+
return "content_analysis"
|
| 97 |
+
return "general"
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def _build_generation_context(generation_mode: str, source_text: str) -> Dict[str, Any]:
|
| 101 |
+
"""Build context dict for card generation."""
|
| 102 |
+
context: Dict[str, Any] = {}
|
| 103 |
+
if generation_mode == "text" and source_text:
|
| 104 |
+
context["source_text"] = source_text
|
| 105 |
+
return context
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def _get_token_usage_html(token_tracker) -> str:
|
| 109 |
+
"""Extract token usage and format as HTML."""
|
| 110 |
+
try:
|
| 111 |
+
if hasattr(token_tracker, "get_session_summary"):
|
| 112 |
+
token_usage = token_tracker.get_session_summary()
|
| 113 |
+
elif hasattr(token_tracker, "get_session_usage"):
|
| 114 |
+
token_usage = token_tracker.get_session_usage()
|
| 115 |
+
else:
|
| 116 |
+
raise AttributeError("TokenTracker has no session summary method")
|
| 117 |
+
|
| 118 |
+
return f"<div style='margin-top: 8px;'><b>Token Usage:</b> {token_usage['total_tokens']} tokens</div>"
|
| 119 |
+
except Exception as e:
|
| 120 |
+
logger.error(f"Token usage collection failed: {e}")
|
| 121 |
+
return "<div style='margin-top: 8px;'><b>Token Usage:</b> No usage data</div>"
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _format_cards_to_dataframe(
|
| 125 |
+
agent_cards: List[Card], subject: str
|
| 126 |
+
) -> tuple[pd.DataFrame, str]:
|
| 127 |
+
"""Format agent cards to DataFrame and generate message."""
|
| 128 |
+
formatted_cards = format_cards_for_dataframe(
|
| 129 |
+
agent_cards,
|
| 130 |
+
topic_name=subject if subject else "General",
|
| 131 |
+
start_index=1,
|
| 132 |
+
)
|
| 133 |
+
output_df = pd.DataFrame(formatted_cards, columns=get_dataframe_columns())
|
| 134 |
+
total_cards_message = f"<div><b>Cards Generated:</b> <span id='total-cards-count'>{len(output_df)}</span></div>"
|
| 135 |
+
return output_df, total_cards_message
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
async def orchestrate_card_generation(
|
| 139 |
+
client_manager: OpenAIClientManager,
|
| 140 |
+
cache: ResponseCache,
|
| 141 |
+
api_key_input: str,
|
| 142 |
+
subject: str,
|
| 143 |
+
generation_mode: str,
|
| 144 |
+
source_text: str,
|
| 145 |
+
url_input: str,
|
| 146 |
+
model_name: str,
|
| 147 |
+
topic_number: int,
|
| 148 |
+
cards_per_topic: int,
|
| 149 |
+
preference_prompt: str,
|
| 150 |
+
generate_cloze: bool,
|
| 151 |
+
use_llm_judge: bool = False,
|
| 152 |
+
library_name: str = None,
|
| 153 |
+
library_topic: str = None,
|
| 154 |
+
topics_list: List[str] = None,
|
| 155 |
+
):
|
| 156 |
+
"""Orchestrates the card generation process based on UI inputs."""
|
| 157 |
+
logger.info(f"Starting card generation orchestration in {generation_mode} mode")
|
| 158 |
+
logger.debug(
|
| 159 |
+
f"Parameters: mode={generation_mode}, topics={topic_number}, "
|
| 160 |
+
f"cards_per_topic={cards_per_topic}, cloze={generate_cloze}"
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
if not AGENTS_AVAILABLE:
|
| 164 |
+
logger.error("Agent system is required but not available")
|
| 165 |
+
gr.Error("Agent system is required but not available")
|
| 166 |
+
return pd.DataFrame(columns=get_dataframe_columns()), "Agent system error", ""
|
| 167 |
+
|
| 168 |
+
try:
|
| 169 |
+
from ankigen.agents.token_tracker import get_token_tracker
|
| 170 |
+
|
| 171 |
+
token_tracker = get_token_tracker()
|
| 172 |
+
orchestrator = AgentOrchestrator(client_manager)
|
| 173 |
+
|
| 174 |
+
model_name_resolved, reasoning_effort = _parse_model_selection(model_name)
|
| 175 |
+
logger.info(f"Using {model_name_resolved} for SubjectExpertAgent")
|
| 176 |
+
await orchestrator.initialize(
|
| 177 |
+
api_key_input,
|
| 178 |
+
{"subject_expert": model_name_resolved},
|
| 179 |
+
{"subject_expert": reasoning_effort},
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
agent_subject = _map_generation_mode_to_subject(generation_mode, subject)
|
| 183 |
+
context = _build_generation_context(generation_mode, source_text)
|
| 184 |
+
if preference_prompt:
|
| 185 |
+
context["learning_preferences"] = preference_prompt
|
| 186 |
+
total_cards_needed = topic_number * cards_per_topic
|
| 187 |
+
|
| 188 |
+
agent_cards, agent_metadata = await orchestrator.generate_cards_with_agents(
|
| 189 |
+
topic=subject if subject else "Mixed Topics",
|
| 190 |
+
subject=agent_subject,
|
| 191 |
+
num_cards=total_cards_needed,
|
| 192 |
+
difficulty="intermediate",
|
| 193 |
+
context=context,
|
| 194 |
+
library_name=library_name,
|
| 195 |
+
library_topic=library_topic,
|
| 196 |
+
generate_cloze=generate_cloze,
|
| 197 |
+
topics_list=topics_list,
|
| 198 |
+
cards_per_topic=cards_per_topic,
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
token_usage_html = _get_token_usage_html(token_tracker)
|
| 202 |
+
|
| 203 |
+
if agent_cards:
|
| 204 |
+
output_df, total_cards_message = _format_cards_to_dataframe(
|
| 205 |
+
agent_cards, subject
|
| 206 |
+
)
|
| 207 |
+
logger.info(f"Agent system generated {len(output_df)} cards successfully")
|
| 208 |
+
return output_df, total_cards_message, token_usage_html
|
| 209 |
+
|
| 210 |
+
logger.error("Agent system returned no cards")
|
| 211 |
+
gr.Error("Agent system returned no cards")
|
| 212 |
+
return (
|
| 213 |
+
pd.DataFrame(columns=get_dataframe_columns()),
|
| 214 |
+
"Agent system returned no cards.",
|
| 215 |
+
"",
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
except Exception as e:
|
| 219 |
+
logger.error(f"Agent system failed: {e}")
|
| 220 |
+
gr.Error(f"Agent system error: {str(e)}")
|
| 221 |
+
return (
|
| 222 |
+
pd.DataFrame(columns=get_dataframe_columns()),
|
| 223 |
+
f"Agent system error: {str(e)}",
|
| 224 |
+
"",
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
# Legacy helper functions removed - all processing now handled by agent system
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# --- Formatting and Utility Functions --- (Moved and adapted)
|
| 232 |
+
def format_cards_for_dataframe(
|
| 233 |
+
cards: list[Card], topic_name: str, topic_index: int = 0, start_index: int = 1
|
| 234 |
+
) -> list:
|
| 235 |
+
"""Formats a list of Card objects into a list of dictionaries for DataFrame display.
|
| 236 |
+
Ensures all data is plain text.
|
| 237 |
+
"""
|
| 238 |
+
formatted_cards = []
|
| 239 |
+
for i, card_obj in enumerate(cards):
|
| 240 |
+
actual_index = start_index + i
|
| 241 |
+
card_type = card_obj.card_type or "basic"
|
| 242 |
+
question = card_obj.front.question or ""
|
| 243 |
+
answer = card_obj.back.answer or ""
|
| 244 |
+
explanation = card_obj.back.explanation or ""
|
| 245 |
+
example = card_obj.back.example or ""
|
| 246 |
+
|
| 247 |
+
# Metadata processing
|
| 248 |
+
metadata = card_obj.metadata or {}
|
| 249 |
+
prerequisites = metadata.get("prerequisites", [])
|
| 250 |
+
learning_outcomes = metadata.get("learning_outcomes", [])
|
| 251 |
+
difficulty = metadata.get("difficulty", "N/A")
|
| 252 |
+
# Ensure list-based metadata are joined as plain strings for DataFrame
|
| 253 |
+
prerequisites_str = strip_html_tags(
|
| 254 |
+
", ".join(prerequisites)
|
| 255 |
+
if isinstance(prerequisites, list)
|
| 256 |
+
else str(prerequisites)
|
| 257 |
+
)
|
| 258 |
+
learning_outcomes_str = strip_html_tags(
|
| 259 |
+
", ".join(learning_outcomes)
|
| 260 |
+
if isinstance(learning_outcomes, list)
|
| 261 |
+
else str(learning_outcomes)
|
| 262 |
+
)
|
| 263 |
+
difficulty_str = strip_html_tags(str(difficulty))
|
| 264 |
+
|
| 265 |
+
formatted_card = {
|
| 266 |
+
"Index": (
|
| 267 |
+
f"{topic_index}.{actual_index}"
|
| 268 |
+
if topic_index > 0
|
| 269 |
+
else str(actual_index)
|
| 270 |
+
),
|
| 271 |
+
"Topic": strip_html_tags(topic_name), # Ensure topic is also plain
|
| 272 |
+
"Card_Type": strip_html_tags(card_type),
|
| 273 |
+
"Question": question, # Already stripped during Card object creation
|
| 274 |
+
"Answer": answer, # Already stripped
|
| 275 |
+
"Explanation": explanation, # Already stripped
|
| 276 |
+
"Example": example, # Already stripped
|
| 277 |
+
"Prerequisites": prerequisites_str,
|
| 278 |
+
"Learning_Outcomes": learning_outcomes_str,
|
| 279 |
+
"Difficulty": difficulty_str, # Ensure difficulty is plain text
|
| 280 |
+
"Source_URL": strip_html_tags(
|
| 281 |
+
metadata.get("source_url", "")
|
| 282 |
+
), # Ensure Source_URL is plain
|
| 283 |
+
}
|
| 284 |
+
formatted_cards.append(formatted_card)
|
| 285 |
+
return formatted_cards
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def get_dataframe_columns() -> list[str]:
|
| 289 |
+
"""Returns the standard list of columns for the Anki card DataFrame."""
|
| 290 |
+
return [
|
| 291 |
+
"Index",
|
| 292 |
+
"Topic",
|
| 293 |
+
"Card_Type",
|
| 294 |
+
"Question",
|
| 295 |
+
"Answer",
|
| 296 |
+
"Explanation",
|
| 297 |
+
"Example",
|
| 298 |
+
"Prerequisites",
|
| 299 |
+
"Learning_Outcomes",
|
| 300 |
+
"Difficulty",
|
| 301 |
+
"Source_URL",
|
| 302 |
+
]
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def generate_token_usage_html(token_usage=None):
|
| 306 |
+
"""Generate HTML for token usage display"""
|
| 307 |
+
if token_usage and isinstance(token_usage, dict):
|
| 308 |
+
total_tokens = token_usage.get("total_tokens", 0)
|
| 309 |
+
return f"<div style='margin-top: 8px;'><b>Token Usage:</b> {total_tokens} tokens</div>"
|
| 310 |
+
else:
|
| 311 |
+
return "<div style='margin-top: 8px;'><b>Token Usage:</b> No usage data</div>"
|
ankigen/cli.py
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""CLI interface for AnkiGen - Generate Anki flashcards from the command line"""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Optional
|
| 8 |
+
|
| 9 |
+
import click
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from rich.console import Console
|
| 12 |
+
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
|
| 13 |
+
from rich.table import Table
|
| 14 |
+
from rich.panel import Panel
|
| 15 |
+
|
| 16 |
+
from ankigen.agents.token_tracker import get_token_tracker
|
| 17 |
+
from ankigen.auto_config import AutoConfigService
|
| 18 |
+
from ankigen.card_generator import orchestrate_card_generation
|
| 19 |
+
from ankigen.exporters import export_dataframe_to_apkg, export_dataframe_to_csv
|
| 20 |
+
from ankigen.llm_interface import OpenAIClientManager
|
| 21 |
+
from ankigen.utils import ResponseCache, get_logger
|
| 22 |
+
|
| 23 |
+
console = Console()
|
| 24 |
+
logger = get_logger()
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_api_key() -> str:
|
| 28 |
+
"""Get OpenAI API key from env or prompt user"""
|
| 29 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
| 30 |
+
|
| 31 |
+
if not api_key:
|
| 32 |
+
console.print("[yellow]OpenAI API key not found in environment[/yellow]")
|
| 33 |
+
api_key = click.prompt("Enter your OpenAI API key", hide_input=True)
|
| 34 |
+
|
| 35 |
+
return api_key
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
async def auto_configure_from_prompt(
|
| 39 |
+
prompt: str,
|
| 40 |
+
api_key: str,
|
| 41 |
+
override_topics: Optional[int] = None,
|
| 42 |
+
override_cards: Optional[int] = None,
|
| 43 |
+
override_model: Optional[str] = None,
|
| 44 |
+
) -> dict:
|
| 45 |
+
"""Auto-configure settings from a prompt using AI analysis"""
|
| 46 |
+
|
| 47 |
+
with Progress(
|
| 48 |
+
SpinnerColumn(),
|
| 49 |
+
TextColumn("[progress.description]{task.description}"),
|
| 50 |
+
console=console,
|
| 51 |
+
) as progress:
|
| 52 |
+
progress.add_task("Analyzing subject...", total=None)
|
| 53 |
+
|
| 54 |
+
# Initialize client
|
| 55 |
+
client_manager = OpenAIClientManager()
|
| 56 |
+
await client_manager.initialize_client(api_key)
|
| 57 |
+
openai_client = client_manager.get_client()
|
| 58 |
+
|
| 59 |
+
# Get auto-config (pass topic count override so LLM decomposes correctly)
|
| 60 |
+
auto_config_service = AutoConfigService()
|
| 61 |
+
config = await auto_config_service.auto_configure(
|
| 62 |
+
prompt, openai_client, target_topic_count=override_topics
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# Apply remaining overrides (topics already handled in auto_configure)
|
| 66 |
+
if override_cards is not None:
|
| 67 |
+
config["cards_per_topic"] = override_cards
|
| 68 |
+
if override_model is not None:
|
| 69 |
+
config["model_choice"] = override_model
|
| 70 |
+
|
| 71 |
+
# Display configuration
|
| 72 |
+
table = Table(
|
| 73 |
+
title="Auto-Configuration", show_header=True, header_style="bold cyan"
|
| 74 |
+
)
|
| 75 |
+
table.add_column("Setting", style="dim")
|
| 76 |
+
table.add_column("Value", style="green")
|
| 77 |
+
|
| 78 |
+
table.add_row("Topics", str(config.get("topic_number", "N/A")))
|
| 79 |
+
table.add_row("Cards per Topic", str(config.get("cards_per_topic", "N/A")))
|
| 80 |
+
table.add_row(
|
| 81 |
+
"Total Cards",
|
| 82 |
+
str(config.get("topic_number", 0) * config.get("cards_per_topic", 0)),
|
| 83 |
+
)
|
| 84 |
+
table.add_row("Model", config.get("model_choice", "N/A"))
|
| 85 |
+
|
| 86 |
+
if config.get("library_name"):
|
| 87 |
+
table.add_row("Library", config.get("library_name"))
|
| 88 |
+
if config.get("library_topic"):
|
| 89 |
+
table.add_row("Library Topic", config.get("library_topic"))
|
| 90 |
+
|
| 91 |
+
# Display discovered topics
|
| 92 |
+
if config.get("topics_list"):
|
| 93 |
+
topics = config["topics_list"]
|
| 94 |
+
# Show first few topics, indicate if there are more
|
| 95 |
+
if len(topics) <= 4:
|
| 96 |
+
topics_str = ", ".join(topics)
|
| 97 |
+
else:
|
| 98 |
+
topics_str = ", ".join(topics[:3]) + f", ... (+{len(topics) - 3} more)"
|
| 99 |
+
table.add_row("Subtopics", topics_str)
|
| 100 |
+
|
| 101 |
+
if config.get("preference_prompt"):
|
| 102 |
+
table.add_row(
|
| 103 |
+
"Learning Focus", config.get("preference_prompt", "")[:50] + "..."
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
console.print(table)
|
| 107 |
+
|
| 108 |
+
return config
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
async def generate_cards_from_config(
|
| 112 |
+
prompt: str,
|
| 113 |
+
config: dict,
|
| 114 |
+
api_key: str,
|
| 115 |
+
) -> tuple:
|
| 116 |
+
"""Generate cards using the configuration"""
|
| 117 |
+
|
| 118 |
+
client_manager = OpenAIClientManager()
|
| 119 |
+
response_cache = ResponseCache()
|
| 120 |
+
|
| 121 |
+
with Progress(
|
| 122 |
+
SpinnerColumn(),
|
| 123 |
+
TextColumn("[progress.description]{task.description}"),
|
| 124 |
+
BarColumn(),
|
| 125 |
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
| 126 |
+
console=console,
|
| 127 |
+
) as progress:
|
| 128 |
+
task = progress.add_task(
|
| 129 |
+
f"Generating {config['topic_number'] * config['cards_per_topic']} cards...",
|
| 130 |
+
total=100,
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
# Generate cards
|
| 134 |
+
(
|
| 135 |
+
output_df,
|
| 136 |
+
total_cards_html,
|
| 137 |
+
token_usage_html,
|
| 138 |
+
) = await orchestrate_card_generation(
|
| 139 |
+
client_manager=client_manager,
|
| 140 |
+
cache=response_cache,
|
| 141 |
+
api_key_input=api_key,
|
| 142 |
+
subject=prompt,
|
| 143 |
+
generation_mode="subject",
|
| 144 |
+
source_text="",
|
| 145 |
+
url_input="",
|
| 146 |
+
model_name=config.get("model_choice", "gpt-5.2-auto"),
|
| 147 |
+
topic_number=config.get("topic_number", 3),
|
| 148 |
+
cards_per_topic=config.get("cards_per_topic", 5),
|
| 149 |
+
preference_prompt=config.get("preference_prompt", ""),
|
| 150 |
+
generate_cloze=config.get("generate_cloze_checkbox", False),
|
| 151 |
+
library_name=config.get("library_name")
|
| 152 |
+
if config.get("library_name")
|
| 153 |
+
else None,
|
| 154 |
+
library_topic=config.get("library_topic")
|
| 155 |
+
if config.get("library_topic")
|
| 156 |
+
else None,
|
| 157 |
+
topics_list=config.get("topics_list"),
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
progress.update(task, completed=100)
|
| 161 |
+
|
| 162 |
+
return output_df, total_cards_html, token_usage_html
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def export_cards(
|
| 166 |
+
df: pd.DataFrame,
|
| 167 |
+
output_path: str,
|
| 168 |
+
deck_name: str,
|
| 169 |
+
export_format: str = "apkg",
|
| 170 |
+
) -> str:
|
| 171 |
+
"""Export cards to file"""
|
| 172 |
+
|
| 173 |
+
with Progress(
|
| 174 |
+
SpinnerColumn(),
|
| 175 |
+
TextColumn("[progress.description]{task.description}"),
|
| 176 |
+
console=console,
|
| 177 |
+
) as progress:
|
| 178 |
+
progress.add_task(f"Exporting to {export_format.upper()}...", total=None)
|
| 179 |
+
|
| 180 |
+
if export_format == "apkg":
|
| 181 |
+
# Ensure .apkg extension
|
| 182 |
+
if not output_path.endswith(".apkg"):
|
| 183 |
+
output_path = (
|
| 184 |
+
output_path.replace(".csv", ".apkg")
|
| 185 |
+
if ".csv" in output_path
|
| 186 |
+
else f"{output_path}.apkg"
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
exported_path = export_dataframe_to_apkg(df, output_path, deck_name)
|
| 190 |
+
else: # csv
|
| 191 |
+
# Ensure .csv extension
|
| 192 |
+
if not output_path.endswith(".csv"):
|
| 193 |
+
output_path = (
|
| 194 |
+
output_path.replace(".apkg", ".csv")
|
| 195 |
+
if ".apkg" in output_path
|
| 196 |
+
else f"{output_path}.csv"
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
exported_path = export_dataframe_to_csv(df, output_path)
|
| 200 |
+
|
| 201 |
+
return exported_path
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
@click.command()
|
| 205 |
+
@click.option(
|
| 206 |
+
"-p",
|
| 207 |
+
"--prompt",
|
| 208 |
+
required=True,
|
| 209 |
+
help="Subject or topic for flashcard generation (e.g., 'Basic SQL', 'React Hooks')",
|
| 210 |
+
)
|
| 211 |
+
@click.option(
|
| 212 |
+
"--topics",
|
| 213 |
+
type=int,
|
| 214 |
+
help="Number of topics (auto-detected if not specified)",
|
| 215 |
+
)
|
| 216 |
+
@click.option(
|
| 217 |
+
"--cards-per-topic",
|
| 218 |
+
type=int,
|
| 219 |
+
help="Number of cards per topic (auto-detected if not specified)",
|
| 220 |
+
)
|
| 221 |
+
@click.option(
|
| 222 |
+
"--model",
|
| 223 |
+
type=click.Choice(
|
| 224 |
+
["gpt-5.2-auto", "gpt-5.2-instant", "gpt-5.2-thinking"],
|
| 225 |
+
case_sensitive=False,
|
| 226 |
+
),
|
| 227 |
+
help="Model to use for generation (auto-selected if not specified)",
|
| 228 |
+
)
|
| 229 |
+
@click.option(
|
| 230 |
+
"-o",
|
| 231 |
+
"--output",
|
| 232 |
+
default="deck.apkg",
|
| 233 |
+
help="Output file path (default: deck.apkg)",
|
| 234 |
+
)
|
| 235 |
+
@click.option(
|
| 236 |
+
"--format",
|
| 237 |
+
"export_format",
|
| 238 |
+
type=click.Choice(["apkg", "csv"], case_sensitive=False),
|
| 239 |
+
default="apkg",
|
| 240 |
+
help="Export format (default: apkg)",
|
| 241 |
+
)
|
| 242 |
+
@click.option(
|
| 243 |
+
"--api-key",
|
| 244 |
+
envvar="OPENAI_API_KEY",
|
| 245 |
+
help="OpenAI API key (or set OPENAI_API_KEY env var)",
|
| 246 |
+
)
|
| 247 |
+
@click.option(
|
| 248 |
+
"--no-confirm",
|
| 249 |
+
is_flag=True,
|
| 250 |
+
help="Skip confirmation prompt",
|
| 251 |
+
)
|
| 252 |
+
def main(
|
| 253 |
+
prompt: str,
|
| 254 |
+
topics: Optional[int],
|
| 255 |
+
cards_per_topic: Optional[int],
|
| 256 |
+
model: Optional[str],
|
| 257 |
+
output: str,
|
| 258 |
+
export_format: str,
|
| 259 |
+
api_key: Optional[str],
|
| 260 |
+
no_confirm: bool,
|
| 261 |
+
):
|
| 262 |
+
"""
|
| 263 |
+
AnkiGen CLI - Generate Anki flashcards from the command line
|
| 264 |
+
|
| 265 |
+
Examples:
|
| 266 |
+
|
| 267 |
+
# Quick generation with auto-config
|
| 268 |
+
ankigen -p "Basic SQL"
|
| 269 |
+
|
| 270 |
+
# With custom settings
|
| 271 |
+
ankigen -p "React Hooks" --topics 5 --cards-per-topic 8 --output hooks.apkg
|
| 272 |
+
|
| 273 |
+
# Export to CSV
|
| 274 |
+
ankigen -p "Docker basics" --format csv -o docker.csv
|
| 275 |
+
"""
|
| 276 |
+
|
| 277 |
+
# Print header
|
| 278 |
+
console.print(
|
| 279 |
+
Panel.fit(
|
| 280 |
+
"[bold cyan]AnkiGen CLI[/bold cyan]\n[dim]Generate Anki flashcards with AI[/dim]",
|
| 281 |
+
border_style="cyan",
|
| 282 |
+
)
|
| 283 |
+
)
|
| 284 |
+
console.print()
|
| 285 |
+
|
| 286 |
+
# Get API key
|
| 287 |
+
if not api_key:
|
| 288 |
+
api_key = get_api_key()
|
| 289 |
+
|
| 290 |
+
# Run async workflow
|
| 291 |
+
async def workflow():
|
| 292 |
+
try:
|
| 293 |
+
# Step 1: Auto-configure
|
| 294 |
+
console.print(f"[bold]Subject:[/bold] {prompt}\n")
|
| 295 |
+
config = await auto_configure_from_prompt(
|
| 296 |
+
prompt=prompt,
|
| 297 |
+
api_key=api_key,
|
| 298 |
+
override_topics=topics,
|
| 299 |
+
override_cards=cards_per_topic,
|
| 300 |
+
override_model=model,
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
# Step 2: Confirm (unless --no-confirm)
|
| 304 |
+
if not no_confirm:
|
| 305 |
+
console.print()
|
| 306 |
+
if not click.confirm("Proceed with card generation?", default=True):
|
| 307 |
+
console.print("[yellow]Cancelled[/yellow]")
|
| 308 |
+
return
|
| 309 |
+
|
| 310 |
+
console.print()
|
| 311 |
+
|
| 312 |
+
# Step 3: Generate cards
|
| 313 |
+
df, total_html, token_html = await generate_cards_from_config(
|
| 314 |
+
prompt=prompt,
|
| 315 |
+
config=config,
|
| 316 |
+
api_key=api_key,
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
if df.empty:
|
| 320 |
+
console.print("[red]✗[/red] No cards generated")
|
| 321 |
+
sys.exit(1)
|
| 322 |
+
|
| 323 |
+
# Step 4: Export
|
| 324 |
+
console.print()
|
| 325 |
+
deck_name = f"AnkiGen - {prompt}"
|
| 326 |
+
exported_path = export_cards(
|
| 327 |
+
df=df,
|
| 328 |
+
output_path=output,
|
| 329 |
+
deck_name=deck_name,
|
| 330 |
+
export_format=export_format,
|
| 331 |
+
)
|
| 332 |
+
|
| 333 |
+
# Step 5: Success summary
|
| 334 |
+
console.print()
|
| 335 |
+
file_size = Path(exported_path).stat().st_size / 1024 # KB
|
| 336 |
+
|
| 337 |
+
summary = Table.grid(padding=(0, 2))
|
| 338 |
+
summary.add_row("[green]✓[/green] Success!", "")
|
| 339 |
+
summary.add_row("Cards Generated:", f"[bold]{len(df)}[/bold]")
|
| 340 |
+
summary.add_row("Output File:", f"[bold]{exported_path}[/bold]")
|
| 341 |
+
summary.add_row("File Size:", f"{file_size:.1f} KB")
|
| 342 |
+
|
| 343 |
+
# Get token usage from tracker
|
| 344 |
+
tracker = get_token_tracker()
|
| 345 |
+
session = tracker.get_session_summary()
|
| 346 |
+
if session["total_tokens"] > 0:
|
| 347 |
+
# Calculate totals across all models
|
| 348 |
+
total_input = sum(u.prompt_tokens for u in tracker.usage_history)
|
| 349 |
+
total_output = sum(u.completion_tokens for u in tracker.usage_history)
|
| 350 |
+
summary.add_row(
|
| 351 |
+
"Tokens:",
|
| 352 |
+
f"{total_input:,} in / {total_output:,} out ({session['total_tokens']:,} total)",
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
console.print(
|
| 356 |
+
Panel(summary, border_style="green", title="Generation Complete")
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
except KeyboardInterrupt:
|
| 360 |
+
console.print("\n[yellow]Cancelled by user[/yellow]")
|
| 361 |
+
sys.exit(130)
|
| 362 |
+
except Exception as e:
|
| 363 |
+
logger.error(f"CLI error: {e}", exc_info=True)
|
| 364 |
+
console.print(f"[red]✗ Error:[/red] {str(e)}")
|
| 365 |
+
sys.exit(1)
|
| 366 |
+
|
| 367 |
+
# Run the async workflow
|
| 368 |
+
asyncio.run(workflow())
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
if __name__ == "__main__":
|
| 372 |
+
main()
|
ankigen/context7.py
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Context7 integration for library documentation"""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import subprocess
|
| 5 |
+
import json
|
| 6 |
+
from typing import Optional, Dict, Any
|
| 7 |
+
from tenacity import (
|
| 8 |
+
retry,
|
| 9 |
+
stop_after_attempt,
|
| 10 |
+
wait_exponential,
|
| 11 |
+
retry_if_exception_type,
|
| 12 |
+
)
|
| 13 |
+
from ankigen.logging import logger
|
| 14 |
+
from ankigen.exceptions import (
|
| 15 |
+
ValidationError,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
MAX_STRING_LENGTH = 200 # Prevent excessively long inputs
|
| 19 |
+
SUBPROCESS_TIMEOUT = 60.0 # 60 second timeout for Context7 calls
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class Context7Client:
|
| 23 |
+
"""Context7 MCP client for fetching library documentation"""
|
| 24 |
+
|
| 25 |
+
def __init__(self):
|
| 26 |
+
pass # No state needed - each call creates fresh subprocess
|
| 27 |
+
|
| 28 |
+
@retry(
|
| 29 |
+
stop=stop_after_attempt(3),
|
| 30 |
+
wait=wait_exponential(multiplier=1, min=2, max=10),
|
| 31 |
+
retry=retry_if_exception_type((TimeoutError, ConnectionError)),
|
| 32 |
+
reraise=True,
|
| 33 |
+
)
|
| 34 |
+
async def call_context7_tool(
|
| 35 |
+
self, tool_name: str, args: Dict[str, Any]
|
| 36 |
+
) -> Optional[Dict[str, Any]]:
|
| 37 |
+
"""Call a Context7 tool via direct JSONRPC with retry logic"""
|
| 38 |
+
try:
|
| 39 |
+
# Build the JSONRPC request
|
| 40 |
+
request = {
|
| 41 |
+
"jsonrpc": "2.0",
|
| 42 |
+
"id": 1,
|
| 43 |
+
"method": "tools/call",
|
| 44 |
+
"params": {"name": tool_name, "arguments": args},
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
# Call the Context7 server
|
| 48 |
+
process = await asyncio.create_subprocess_exec(
|
| 49 |
+
"npx",
|
| 50 |
+
"@upstash/context7-mcp",
|
| 51 |
+
stdin=subprocess.PIPE,
|
| 52 |
+
stdout=subprocess.PIPE,
|
| 53 |
+
stderr=subprocess.PIPE,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# Send initialization first
|
| 57 |
+
init_request = {
|
| 58 |
+
"jsonrpc": "2.0",
|
| 59 |
+
"id": 0,
|
| 60 |
+
"method": "initialize",
|
| 61 |
+
"params": {
|
| 62 |
+
"protocolVersion": "2025-06-18",
|
| 63 |
+
"capabilities": {},
|
| 64 |
+
"clientInfo": {"name": "ankigen", "version": "1.0.0"},
|
| 65 |
+
},
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
# Send both requests with timeout protection
|
| 69 |
+
# Optimize: Use list join for string concatenation
|
| 70 |
+
input_data = "\n".join([json.dumps(init_request), json.dumps(request), ""])
|
| 71 |
+
try:
|
| 72 |
+
stdout, stderr = await asyncio.wait_for(
|
| 73 |
+
process.communicate(input=input_data.encode()),
|
| 74 |
+
timeout=SUBPROCESS_TIMEOUT,
|
| 75 |
+
)
|
| 76 |
+
except asyncio.TimeoutError:
|
| 77 |
+
# Proper process cleanup on timeout
|
| 78 |
+
try:
|
| 79 |
+
if process.returncode is None: # Process still running
|
| 80 |
+
process.kill()
|
| 81 |
+
# Wait for process to actually terminate
|
| 82 |
+
await asyncio.wait_for(process.wait(), timeout=5.0)
|
| 83 |
+
except Exception as cleanup_error:
|
| 84 |
+
logger.error(f"Error during process cleanup: {cleanup_error}")
|
| 85 |
+
raise TimeoutError(
|
| 86 |
+
f"Context7 subprocess timed out after {SUBPROCESS_TIMEOUT}s"
|
| 87 |
+
)
|
| 88 |
+
except Exception:
|
| 89 |
+
# Clean up process on any other error
|
| 90 |
+
try:
|
| 91 |
+
if process.returncode is None:
|
| 92 |
+
process.kill()
|
| 93 |
+
await asyncio.wait_for(process.wait(), timeout=5.0)
|
| 94 |
+
except Exception:
|
| 95 |
+
pass # Best effort cleanup
|
| 96 |
+
raise
|
| 97 |
+
|
| 98 |
+
# Parse responses
|
| 99 |
+
responses = stdout.decode().strip().split("\n")
|
| 100 |
+
if len(responses) >= 2:
|
| 101 |
+
# Skip init response, get tool response
|
| 102 |
+
tool_response = json.loads(responses[1])
|
| 103 |
+
|
| 104 |
+
if "result" in tool_response:
|
| 105 |
+
result = tool_response["result"]
|
| 106 |
+
# Extract content from the result
|
| 107 |
+
if "content" in result and result["content"]:
|
| 108 |
+
content_item = result["content"][0]
|
| 109 |
+
if "text" in content_item:
|
| 110 |
+
return {"text": content_item["text"], "success": True}
|
| 111 |
+
elif "type" in content_item and content_item["type"] == "text":
|
| 112 |
+
return {
|
| 113 |
+
"text": content_item.get("text", ""),
|
| 114 |
+
"success": True,
|
| 115 |
+
}
|
| 116 |
+
return {"error": "No content in response", "success": False}
|
| 117 |
+
elif "error" in tool_response:
|
| 118 |
+
return {"error": tool_response["error"], "success": False}
|
| 119 |
+
|
| 120 |
+
return {"error": "Invalid response format", "success": False}
|
| 121 |
+
|
| 122 |
+
except Exception as e:
|
| 123 |
+
logger.error(f"Error calling Context7 tool {tool_name}: {e}")
|
| 124 |
+
return {"error": str(e), "success": False}
|
| 125 |
+
|
| 126 |
+
def _parse_library_response(self, text: str) -> list[Dict[str, Any]]:
|
| 127 |
+
"""Parse Context7 response text into list of library dicts.
|
| 128 |
+
|
| 129 |
+
Args:
|
| 130 |
+
text: Raw text response from Context7
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
List of library dicts with keys: title, id, snippets, trust
|
| 134 |
+
"""
|
| 135 |
+
libraries = []
|
| 136 |
+
lines = text.split("\n")
|
| 137 |
+
current_lib: Dict[str, Any] = {}
|
| 138 |
+
|
| 139 |
+
for line in lines:
|
| 140 |
+
line = line.strip()
|
| 141 |
+
|
| 142 |
+
if line.startswith("- Title:"):
|
| 143 |
+
if current_lib and current_lib.get("id"):
|
| 144 |
+
libraries.append(current_lib)
|
| 145 |
+
current_lib = {"title": line.replace("- Title:", "").strip().lower()}
|
| 146 |
+
|
| 147 |
+
elif line.startswith("- Context7-compatible library ID:"):
|
| 148 |
+
lib_id = line.replace("- Context7-compatible library ID:", "").strip()
|
| 149 |
+
if current_lib is not None:
|
| 150 |
+
current_lib["id"] = lib_id
|
| 151 |
+
|
| 152 |
+
elif line.startswith("- Code Snippets:"):
|
| 153 |
+
snippets_str = line.replace("- Code Snippets:", "").strip()
|
| 154 |
+
try:
|
| 155 |
+
if current_lib is not None:
|
| 156 |
+
current_lib["snippets"] = int(snippets_str)
|
| 157 |
+
except ValueError:
|
| 158 |
+
pass
|
| 159 |
+
|
| 160 |
+
elif line.startswith("- Trust Score:"):
|
| 161 |
+
score_str = line.replace("- Trust Score:", "").strip()
|
| 162 |
+
try:
|
| 163 |
+
if current_lib is not None:
|
| 164 |
+
current_lib["trust"] = float(score_str)
|
| 165 |
+
except ValueError:
|
| 166 |
+
pass
|
| 167 |
+
|
| 168 |
+
if current_lib and current_lib.get("id"):
|
| 169 |
+
libraries.append(current_lib)
|
| 170 |
+
|
| 171 |
+
return libraries
|
| 172 |
+
|
| 173 |
+
def _score_library(self, lib: Dict[str, Any], search_term: str) -> float:
|
| 174 |
+
"""Score a library based on how well it matches the search term.
|
| 175 |
+
|
| 176 |
+
Args:
|
| 177 |
+
lib: Library dict with title, id, snippets, trust
|
| 178 |
+
search_term: Lowercase search term
|
| 179 |
+
|
| 180 |
+
Returns:
|
| 181 |
+
Score (higher is better match)
|
| 182 |
+
"""
|
| 183 |
+
score = 0.0
|
| 184 |
+
lib_title = lib.get("title", "")
|
| 185 |
+
lib_id = lib["id"].lower()
|
| 186 |
+
|
| 187 |
+
# Exact title match gets highest priority
|
| 188 |
+
if lib_title == search_term:
|
| 189 |
+
score += 10000
|
| 190 |
+
elif lib_id == f"/{search_term}-dev/{search_term}":
|
| 191 |
+
score += 5000
|
| 192 |
+
elif f"/{search_term}/" in lib_id or lib_id.endswith(f"/{search_term}"):
|
| 193 |
+
score += 2000
|
| 194 |
+
elif search_term in lib_title:
|
| 195 |
+
if lib_title == search_term:
|
| 196 |
+
score += 1000
|
| 197 |
+
elif lib_title.startswith(search_term):
|
| 198 |
+
score += 200
|
| 199 |
+
else:
|
| 200 |
+
score += 50
|
| 201 |
+
|
| 202 |
+
# Bonus for code snippets (indicates main library)
|
| 203 |
+
snippets = lib.get("snippets", 0)
|
| 204 |
+
score += snippets / 10
|
| 205 |
+
|
| 206 |
+
# Bonus for trust score (high trust = official/authoritative)
|
| 207 |
+
trust = lib.get("trust", 0)
|
| 208 |
+
score += trust * 100
|
| 209 |
+
|
| 210 |
+
return score
|
| 211 |
+
|
| 212 |
+
def _select_best_library(
|
| 213 |
+
self, libraries: list[Dict[str, Any]], search_term: str
|
| 214 |
+
) -> Optional[Dict[str, Any]]:
|
| 215 |
+
"""Select the best matching library from a list.
|
| 216 |
+
|
| 217 |
+
Args:
|
| 218 |
+
libraries: List of library dicts
|
| 219 |
+
search_term: Lowercase search term
|
| 220 |
+
|
| 221 |
+
Returns:
|
| 222 |
+
Best matching library dict, or None if no match
|
| 223 |
+
"""
|
| 224 |
+
best_lib = None
|
| 225 |
+
best_score = -1.0
|
| 226 |
+
|
| 227 |
+
for lib in libraries:
|
| 228 |
+
score = self._score_library(lib, search_term)
|
| 229 |
+
|
| 230 |
+
if search_term in lib.get("title", "") or search_term in lib["id"].lower():
|
| 231 |
+
logger.debug(
|
| 232 |
+
f"Scoring {lib['id']}: title='{lib.get('title', '')}', "
|
| 233 |
+
f"snippets={lib.get('snippets', 0)}, trust={lib.get('trust', 0)}, score={score:.2f}"
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
if score > best_score:
|
| 237 |
+
best_score = score
|
| 238 |
+
best_lib = lib
|
| 239 |
+
|
| 240 |
+
if best_lib:
|
| 241 |
+
logger.info(
|
| 242 |
+
f"Selected library: {best_lib['id']} (title: {best_lib.get('title', 'unknown')}, "
|
| 243 |
+
f"snippets: {best_lib.get('snippets', 0)}, trust: {best_lib.get('trust', 0)}, "
|
| 244 |
+
f"score: {best_score:.2f})"
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
return best_lib
|
| 248 |
+
|
| 249 |
+
async def resolve_library_id(self, library_name: str) -> Optional[str]:
|
| 250 |
+
"""Resolve a library name to a Context7-compatible ID"""
|
| 251 |
+
logger.info(f"Resolving library ID for: {library_name}")
|
| 252 |
+
|
| 253 |
+
result = await self.call_context7_tool(
|
| 254 |
+
"resolve-library-id", {"libraryName": library_name}
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
if not (result and result.get("success") and result.get("text")):
|
| 258 |
+
logger.warning(f"Could not resolve library ID for '{library_name}'")
|
| 259 |
+
return None
|
| 260 |
+
|
| 261 |
+
libraries = self._parse_library_response(result["text"])
|
| 262 |
+
if not libraries:
|
| 263 |
+
logger.warning(f"Could not resolve library ID for '{library_name}'")
|
| 264 |
+
return None
|
| 265 |
+
|
| 266 |
+
best_lib = self._select_best_library(libraries, library_name.lower())
|
| 267 |
+
if best_lib:
|
| 268 |
+
logger.info(f"Resolved '{library_name}' to ID: {best_lib['id']}")
|
| 269 |
+
return best_lib["id"]
|
| 270 |
+
|
| 271 |
+
logger.warning(f"Could not resolve library ID for '{library_name}'")
|
| 272 |
+
return None
|
| 273 |
+
|
| 274 |
+
async def get_library_docs(
|
| 275 |
+
self, library_id: str, topic: Optional[str] = None, tokens: int = 5000
|
| 276 |
+
) -> Optional[str]:
|
| 277 |
+
"""Get documentation for a library"""
|
| 278 |
+
# Security: Validate library_id (should start with /)
|
| 279 |
+
if (
|
| 280 |
+
not library_id
|
| 281 |
+
or not library_id.startswith("/")
|
| 282 |
+
or len(library_id) > MAX_STRING_LENGTH
|
| 283 |
+
):
|
| 284 |
+
logger.error(f"Invalid library ID format (security): '{library_id}'")
|
| 285 |
+
raise ValidationError("Invalid library ID format")
|
| 286 |
+
|
| 287 |
+
logger.info(
|
| 288 |
+
f"Fetching docs for: {library_id}" + (f" (topic: {topic})" if topic else "")
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
args = {"context7CompatibleLibraryID": library_id, "tokens": tokens}
|
| 292 |
+
if topic:
|
| 293 |
+
args["topic"] = topic
|
| 294 |
+
|
| 295 |
+
result = await self.call_context7_tool("get-library-docs", args)
|
| 296 |
+
|
| 297 |
+
if result and result.get("success") and result.get("text"):
|
| 298 |
+
docs = result["text"]
|
| 299 |
+
logger.info(f"Retrieved {len(docs)} characters of documentation")
|
| 300 |
+
return docs
|
| 301 |
+
|
| 302 |
+
logger.warning(f"Could not fetch docs for '{library_id}'")
|
| 303 |
+
return None
|
| 304 |
+
|
| 305 |
+
async def fetch_library_documentation(
|
| 306 |
+
self, library_name: str, topic: Optional[str] = None, tokens: int = 5000
|
| 307 |
+
) -> Optional[str]:
|
| 308 |
+
"""Convenience method to resolve and fetch docs in one call"""
|
| 309 |
+
library_id = await self.resolve_library_id(library_name)
|
| 310 |
+
if not library_id:
|
| 311 |
+
return None
|
| 312 |
+
|
| 313 |
+
return await self.get_library_docs(library_id, topic, tokens)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
async def test_context7() -> None:
|
| 317 |
+
"""Test the Context7 integration"""
|
| 318 |
+
client = Context7Client()
|
| 319 |
+
|
| 320 |
+
print("Testing Context7 integration...")
|
| 321 |
+
|
| 322 |
+
# Test resolving a library
|
| 323 |
+
library_id = await client.resolve_library_id("react")
|
| 324 |
+
if library_id:
|
| 325 |
+
print(f"✓ Resolved 'react' to ID: {library_id}")
|
| 326 |
+
|
| 327 |
+
# Test fetching docs
|
| 328 |
+
docs = await client.get_library_docs(library_id, topic="hooks", tokens=2000)
|
| 329 |
+
if docs:
|
| 330 |
+
print(f"✓ Fetched {len(docs)} characters of documentation")
|
| 331 |
+
print(f"Preview: {docs[:300]}...")
|
| 332 |
+
else:
|
| 333 |
+
print("✗ Failed to fetch documentation")
|
| 334 |
+
else:
|
| 335 |
+
print("✗ Failed to resolve library ID")
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
if __name__ == "__main__":
|
| 339 |
+
asyncio.run(test_context7())
|
ankigen/exceptions.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Custom exceptions for AnkiGen application.
|
| 2 |
+
|
| 3 |
+
This module provides a hierarchy of custom exceptions to standardize
|
| 4 |
+
error handling across the codebase.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class AnkigenError(Exception):
|
| 9 |
+
"""Base exception for all AnkiGen errors."""
|
| 10 |
+
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ValidationError(AnkigenError):
|
| 15 |
+
"""Raised when input validation fails."""
|
| 16 |
+
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class SecurityError(AnkigenError):
|
| 21 |
+
"""Raised when a security check fails (SSRF, command injection, etc.)."""
|
| 22 |
+
|
| 23 |
+
pass
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class APIError(AnkigenError):
|
| 27 |
+
"""Base exception for API-related errors."""
|
| 28 |
+
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class OpenAIAPIError(APIError):
|
| 33 |
+
"""Raised when OpenAI API calls fail."""
|
| 34 |
+
|
| 35 |
+
pass
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class Context7APIError(APIError):
|
| 39 |
+
"""Raised when Context7 API calls fail."""
|
| 40 |
+
|
| 41 |
+
pass
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class ExportError(AnkigenError):
|
| 45 |
+
"""Base exception for export-related errors."""
|
| 46 |
+
|
| 47 |
+
pass
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class CardGenerationError(AnkigenError):
|
| 51 |
+
"""Raised when card generation fails."""
|
| 52 |
+
|
| 53 |
+
pass
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class ConfigurationError(AnkigenError):
|
| 57 |
+
"""Raised when configuration is invalid or missing."""
|
| 58 |
+
|
| 59 |
+
pass
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def handle_exception(
|
| 63 |
+
exc: Exception,
|
| 64 |
+
logger,
|
| 65 |
+
message: str,
|
| 66 |
+
reraise: bool = True,
|
| 67 |
+
reraise_as: type[Exception] | None = None,
|
| 68 |
+
) -> None:
|
| 69 |
+
"""Standardized exception handler.
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
exc: The exception to handle
|
| 73 |
+
logger: Logger instance to use
|
| 74 |
+
message: Error message to log
|
| 75 |
+
reraise: Whether to re-raise the exception
|
| 76 |
+
reraise_as: Optional exception type to wrap and re-raise as
|
| 77 |
+
|
| 78 |
+
Raises:
|
| 79 |
+
The original exception or wrapped exception if reraise is True
|
| 80 |
+
"""
|
| 81 |
+
logger.error(f"{message}: {exc}", exc_info=True)
|
| 82 |
+
|
| 83 |
+
if reraise:
|
| 84 |
+
if reraise_as:
|
| 85 |
+
raise reraise_as(f"{message}: {exc}") from exc
|
| 86 |
+
raise
|
ankigen/exporters.py
ADDED
|
@@ -0,0 +1,943 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Module for CSV and APKG export functions
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import genanki
|
| 6 |
+
import random
|
| 7 |
+
import html
|
| 8 |
+
from typing import List, Dict, Any, Optional
|
| 9 |
+
import csv
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
import os
|
| 12 |
+
|
| 13 |
+
from ankigen.utils import get_logger, strip_html_tags
|
| 14 |
+
|
| 15 |
+
logger = get_logger()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# --- Helper function for formatting fields ---
|
| 19 |
+
def _format_field_as_string(value: Any) -> str:
|
| 20 |
+
if isinstance(value, list) or isinstance(value, tuple):
|
| 21 |
+
return ", ".join(str(item).strip() for item in value if str(item).strip())
|
| 22 |
+
if pd.isna(value) or value is None:
|
| 23 |
+
return ""
|
| 24 |
+
return str(value).strip()
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _generate_timestamped_filename(
|
| 28 |
+
base_name: str, extension: str, include_timestamp: bool = True
|
| 29 |
+
) -> str:
|
| 30 |
+
"""Generate a filename with optional timestamp.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
base_name: The base name for the file (without extension)
|
| 34 |
+
extension: File extension (e.g., 'csv', 'apkg')
|
| 35 |
+
include_timestamp: Whether to include timestamp in filename
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
Generated filename with extension
|
| 39 |
+
"""
|
| 40 |
+
if include_timestamp:
|
| 41 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 42 |
+
return f"{base_name}_{timestamp}.{extension}"
|
| 43 |
+
return f"{base_name}.{extension}"
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _ensure_output_directory(filepath: str) -> None:
|
| 47 |
+
"""Ensure the output directory exists for the given filepath.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
filepath: Full path to the file
|
| 51 |
+
|
| 52 |
+
Creates the directory if it doesn't exist.
|
| 53 |
+
"""
|
| 54 |
+
output_dir = os.path.dirname(filepath)
|
| 55 |
+
if output_dir and not os.path.exists(output_dir):
|
| 56 |
+
os.makedirs(output_dir)
|
| 57 |
+
logger.info(f"Created output directory: {output_dir}")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _validate_non_empty_data(data: Any, data_type: str) -> None:
|
| 61 |
+
"""Validate that data is not empty.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
data: The data to validate (list, DataFrame, etc.)
|
| 65 |
+
data_type: Description of data type for error messages
|
| 66 |
+
|
| 67 |
+
Raises:
|
| 68 |
+
ValueError: If data is empty or None
|
| 69 |
+
"""
|
| 70 |
+
if data is None:
|
| 71 |
+
raise ValueError(f"No {data_type} provided to export.")
|
| 72 |
+
if isinstance(data, list) and not data:
|
| 73 |
+
raise ValueError(f"No {data_type} provided to export.")
|
| 74 |
+
if isinstance(data, pd.DataFrame) and data.empty:
|
| 75 |
+
raise ValueError(f"No {data_type} available to export.")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# --- Constants for APKG Generation (Subtask 10) ---
|
| 79 |
+
ANKI_BASIC_MODEL_NAME = "AnkiGen Basic"
|
| 80 |
+
ANKI_CLOZE_MODEL_NAME = "AnkiGen Cloze"
|
| 81 |
+
|
| 82 |
+
# It's good practice to generate unique IDs. These are examples.
|
| 83 |
+
# Real applications might use a persistent way to store/retrieve these if models are updated.
|
| 84 |
+
DEFAULT_BASIC_MODEL_ID = random.randrange(1 << 30, 1 << 31)
|
| 85 |
+
DEFAULT_CLOZE_MODEL_ID = random.randrange(1 << 30, 1 << 31)
|
| 86 |
+
|
| 87 |
+
# --- Shared CSS with dark mode support ---
|
| 88 |
+
CARD_CSS = """
|
| 89 |
+
/* CSS Variables - Light Mode (default) */
|
| 90 |
+
.card {
|
| 91 |
+
--bg-card: #ffffff;
|
| 92 |
+
--bg-answer: #f0f9ff;
|
| 93 |
+
--bg-explanation: #f0fdf4;
|
| 94 |
+
--bg-example: #fefce8;
|
| 95 |
+
--bg-back-extra: #eef2ff;
|
| 96 |
+
--bg-prereq: #f8fafc;
|
| 97 |
+
--bg-code: #2d2d2d;
|
| 98 |
+
|
| 99 |
+
--text-primary: #1a1a1a;
|
| 100 |
+
--text-secondary: #4b5563;
|
| 101 |
+
--text-muted: #666666;
|
| 102 |
+
--text-heading: #1f2937;
|
| 103 |
+
--text-code: #f8f8f2;
|
| 104 |
+
|
| 105 |
+
--accent-blue: #2563eb;
|
| 106 |
+
--accent-blue-light: #60a5fa;
|
| 107 |
+
--accent-green: #4ade80;
|
| 108 |
+
--accent-yellow: #facc15;
|
| 109 |
+
--accent-indigo: #818cf8;
|
| 110 |
+
--accent-red: #ef4444;
|
| 111 |
+
|
| 112 |
+
--border-light: #e5e7eb;
|
| 113 |
+
--border-dashed: #cbd5e1;
|
| 114 |
+
|
| 115 |
+
--shadow: rgba(0, 0, 0, 0.05);
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
/* Dark Mode Overrides */
|
| 119 |
+
.nightMode .card,
|
| 120 |
+
.night_mode .card {
|
| 121 |
+
--bg-card: #1e1e1e;
|
| 122 |
+
--bg-answer: #1e293b;
|
| 123 |
+
--bg-explanation: #14291a;
|
| 124 |
+
--bg-example: #292518;
|
| 125 |
+
--bg-back-extra: #1e1b2e;
|
| 126 |
+
--bg-prereq: #262626;
|
| 127 |
+
--bg-code: #0d0d0d;
|
| 128 |
+
|
| 129 |
+
--text-primary: #e4e4e7;
|
| 130 |
+
--text-secondary: #a1a1aa;
|
| 131 |
+
--text-muted: #9ca3af;
|
| 132 |
+
--text-heading: #f4f4f5;
|
| 133 |
+
--text-code: #f8f8f2;
|
| 134 |
+
|
| 135 |
+
--accent-blue: #60a5fa;
|
| 136 |
+
--accent-blue-light: #93c5fd;
|
| 137 |
+
--accent-green: #4ade80;
|
| 138 |
+
--accent-yellow: #fde047;
|
| 139 |
+
--accent-indigo: #a5b4fc;
|
| 140 |
+
--accent-red: #f87171;
|
| 141 |
+
|
| 142 |
+
--border-light: #3f3f46;
|
| 143 |
+
--border-dashed: #52525b;
|
| 144 |
+
|
| 145 |
+
--shadow: rgba(0, 0, 0, 0.3);
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
/* Base styles */
|
| 149 |
+
.card {
|
| 150 |
+
font-family: 'Inter', system-ui, -apple-system, sans-serif;
|
| 151 |
+
font-size: 16px;
|
| 152 |
+
line-height: 1.6;
|
| 153 |
+
color: var(--text-primary);
|
| 154 |
+
max-width: 800px;
|
| 155 |
+
margin: 0 auto;
|
| 156 |
+
padding: 20px;
|
| 157 |
+
background: var(--bg-card);
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
@media (max-width: 768px) {
|
| 161 |
+
.card {
|
| 162 |
+
font-size: 14px;
|
| 163 |
+
padding: 15px;
|
| 164 |
+
}
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
/* Question side */
|
| 168 |
+
.question-side {
|
| 169 |
+
position: relative;
|
| 170 |
+
min-height: 200px;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
.difficulty-indicator {
|
| 174 |
+
position: absolute;
|
| 175 |
+
top: 10px;
|
| 176 |
+
right: 10px;
|
| 177 |
+
width: 10px;
|
| 178 |
+
height: 10px;
|
| 179 |
+
border-radius: 50%;
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
.difficulty-indicator.beginner { background: var(--accent-green); }
|
| 183 |
+
.difficulty-indicator.intermediate { background: var(--accent-yellow); }
|
| 184 |
+
.difficulty-indicator.advanced { background: var(--accent-red); }
|
| 185 |
+
|
| 186 |
+
.question {
|
| 187 |
+
font-size: 1.3em;
|
| 188 |
+
font-weight: 600;
|
| 189 |
+
color: var(--accent-blue);
|
| 190 |
+
margin-bottom: 1.5em;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
.prerequisites {
|
| 194 |
+
margin-top: 1em;
|
| 195 |
+
font-size: 0.9em;
|
| 196 |
+
color: var(--text-muted);
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
.prerequisites-toggle {
|
| 200 |
+
color: var(--accent-blue);
|
| 201 |
+
cursor: pointer;
|
| 202 |
+
text-decoration: underline;
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
.prerequisites-content {
|
| 206 |
+
display: none;
|
| 207 |
+
margin-top: 0.5em;
|
| 208 |
+
padding: 0.5em;
|
| 209 |
+
background: var(--bg-prereq);
|
| 210 |
+
border-radius: 4px;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
.prerequisites.show .prerequisites-content {
|
| 214 |
+
display: block;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
/* Answer side sections */
|
| 218 |
+
.answer-section,
|
| 219 |
+
.explanation-section,
|
| 220 |
+
.example-section,
|
| 221 |
+
.back-extra-section {
|
| 222 |
+
margin: 1.5em 0;
|
| 223 |
+
padding: 1.2em;
|
| 224 |
+
border-radius: 8px;
|
| 225 |
+
box-shadow: 0 2px 4px var(--shadow);
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
.answer-section {
|
| 229 |
+
background: var(--bg-answer);
|
| 230 |
+
border-left: 4px solid var(--accent-blue);
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
.back-extra-section {
|
| 234 |
+
background: var(--bg-back-extra);
|
| 235 |
+
border-left: 4px solid var(--accent-indigo);
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
.explanation-section {
|
| 239 |
+
background: var(--bg-explanation);
|
| 240 |
+
border-left: 4px solid var(--accent-green);
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
.example-section {
|
| 244 |
+
background: var(--bg-example);
|
| 245 |
+
border-left: 4px solid var(--accent-yellow);
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
.example-section pre {
|
| 249 |
+
background-color: var(--bg-code);
|
| 250 |
+
color: var(--text-code);
|
| 251 |
+
padding: 1em;
|
| 252 |
+
border-radius: 0.3em;
|
| 253 |
+
overflow-x: auto;
|
| 254 |
+
font-family: 'Consolas', 'Monaco', 'Menlo', monospace;
|
| 255 |
+
font-size: 0.9em;
|
| 256 |
+
line-height: 1.4;
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
.example-section code {
|
| 260 |
+
font-family: 'Consolas', 'Monaco', 'Menlo', monospace;
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
.metadata-section {
|
| 264 |
+
margin-top: 2em;
|
| 265 |
+
padding-top: 1em;
|
| 266 |
+
border-top: 1px solid var(--border-light);
|
| 267 |
+
font-size: 0.9em;
|
| 268 |
+
color: var(--text-secondary);
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
.metadata-section h3 {
|
| 272 |
+
font-size: 1em;
|
| 273 |
+
color: var(--text-heading);
|
| 274 |
+
margin-bottom: 0.5em;
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
.metadata-section > div {
|
| 278 |
+
margin-bottom: 0.8em;
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
.source-url a {
|
| 282 |
+
color: var(--accent-blue);
|
| 283 |
+
text-decoration: none;
|
| 284 |
+
}
|
| 285 |
+
.source-url a:hover {
|
| 286 |
+
text-decoration: underline;
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
/* Cloze deletion styles */
|
| 290 |
+
.cloze {
|
| 291 |
+
font-weight: bold;
|
| 292 |
+
color: var(--accent-blue);
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
/* General utility */
|
| 296 |
+
hr {
|
| 297 |
+
border: none;
|
| 298 |
+
border-top: 1px dashed var(--border-dashed);
|
| 299 |
+
margin: 1.5em 0;
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
/* Rich text field styling */
|
| 303 |
+
.field ul, .field ol {
|
| 304 |
+
margin-left: 1.5em;
|
| 305 |
+
padding-left: 0.5em;
|
| 306 |
+
}
|
| 307 |
+
.field li {
|
| 308 |
+
margin-bottom: 0.3em;
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
/* Responsive design */
|
| 312 |
+
@media (max-width: 640px) {
|
| 313 |
+
.answer-section,
|
| 314 |
+
.explanation-section,
|
| 315 |
+
.example-section,
|
| 316 |
+
.back-extra-section {
|
| 317 |
+
padding: 1em;
|
| 318 |
+
margin: 1em 0;
|
| 319 |
+
}
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
/* Animations */
|
| 323 |
+
@keyframes fadeIn {
|
| 324 |
+
from { opacity: 0; }
|
| 325 |
+
to { opacity: 1; }
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
.card {
|
| 329 |
+
animation: fadeIn 0.3s ease-in-out;
|
| 330 |
+
}
|
| 331 |
+
"""
|
| 332 |
+
|
| 333 |
+
# --- Full Model Definitions ---
|
| 334 |
+
|
| 335 |
+
BASIC_MODEL = genanki.Model(
|
| 336 |
+
DEFAULT_BASIC_MODEL_ID, # Use the generated ID
|
| 337 |
+
ANKI_BASIC_MODEL_NAME, # Use the constant name
|
| 338 |
+
fields=[
|
| 339 |
+
{"name": "Question"},
|
| 340 |
+
{"name": "Answer"},
|
| 341 |
+
{"name": "Explanation"},
|
| 342 |
+
{"name": "Example"},
|
| 343 |
+
{"name": "Prerequisites"},
|
| 344 |
+
{"name": "Learning_Outcomes"},
|
| 345 |
+
{"name": "Difficulty"},
|
| 346 |
+
{"name": "SourceURL"}, # Added for consistency if used by template
|
| 347 |
+
{"name": "TagsStr"}, # Added for consistency if used by template
|
| 348 |
+
],
|
| 349 |
+
templates=[
|
| 350 |
+
{
|
| 351 |
+
"name": "Card 1",
|
| 352 |
+
"qfmt": """
|
| 353 |
+
<div class=\"card question-side\">
|
| 354 |
+
<div class=\"difficulty-indicator {{Difficulty}}\"></div>
|
| 355 |
+
<div class=\"content\">
|
| 356 |
+
<div class=\"question\">{{Question}}</div>
|
| 357 |
+
<div class=\"prerequisites\" onclick=\"event.stopPropagation();\">
|
| 358 |
+
<div class=\"prerequisites-toggle\">Show Prerequisites</div>
|
| 359 |
+
<div class=\"prerequisites-content\">{{Prerequisites}}</div>
|
| 360 |
+
</div>
|
| 361 |
+
</div>
|
| 362 |
+
</div>
|
| 363 |
+
<script>
|
| 364 |
+
document.querySelector('.prerequisites-toggle').addEventListener('click', function(e) {
|
| 365 |
+
e.stopPropagation();
|
| 366 |
+
this.parentElement.classList.toggle('show');
|
| 367 |
+
});
|
| 368 |
+
</script>
|
| 369 |
+
""",
|
| 370 |
+
"afmt": """
|
| 371 |
+
<div class=\"card answer-side\">
|
| 372 |
+
<div class=\"content\">
|
| 373 |
+
<div class=\"question-section\">
|
| 374 |
+
<div class=\"question\">{{Question}}</div>
|
| 375 |
+
<div class=\"prerequisites\">
|
| 376 |
+
<strong>Prerequisites:</strong> {{Prerequisites}}
|
| 377 |
+
</div>
|
| 378 |
+
</div>
|
| 379 |
+
<hr>
|
| 380 |
+
|
| 381 |
+
<div class=\"answer-section\">
|
| 382 |
+
<h3>Answer</h3>
|
| 383 |
+
<div class=\"answer\">{{Answer}}</div>
|
| 384 |
+
</div>
|
| 385 |
+
|
| 386 |
+
<div class=\"explanation-section\">
|
| 387 |
+
<h3>Explanation</h3>
|
| 388 |
+
<div class=\"explanation-text\">{{Explanation}}</div>
|
| 389 |
+
</div>
|
| 390 |
+
|
| 391 |
+
<div class=\"example-section\">
|
| 392 |
+
<h3>Example</h3>
|
| 393 |
+
<div class=\"example-text\">{{Example}}</div>
|
| 394 |
+
<!-- Example field might contain pre/code or plain text -->
|
| 395 |
+
<!-- Handled by how HTML is put into the Example field -->
|
| 396 |
+
</div>
|
| 397 |
+
|
| 398 |
+
<div class=\"metadata-section\">
|
| 399 |
+
<div class=\"learning-outcomes\">
|
| 400 |
+
<h3>Learning Outcomes</h3>
|
| 401 |
+
<div>{{Learning_Outcomes}}</div>
|
| 402 |
+
</div>
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
<div class=\"difficulty\">
|
| 406 |
+
<h3>Difficulty Level</h3>
|
| 407 |
+
<div>{{Difficulty}}</div>
|
| 408 |
+
</div>
|
| 409 |
+
{{#SourceURL}}<div class=\"source-url\"><small>Source: <a href=\"{{SourceURL}}\">{{SourceURL}}</a></small></div>{{/SourceURL}}
|
| 410 |
+
</div>
|
| 411 |
+
</div>
|
| 412 |
+
</div>
|
| 413 |
+
""",
|
| 414 |
+
}
|
| 415 |
+
],
|
| 416 |
+
css=CARD_CSS,
|
| 417 |
+
)
|
| 418 |
+
|
| 419 |
+
CLOZE_MODEL = genanki.Model(
|
| 420 |
+
DEFAULT_CLOZE_MODEL_ID, # Use the generated ID
|
| 421 |
+
ANKI_CLOZE_MODEL_NAME, # Use the constant name
|
| 422 |
+
fields=[
|
| 423 |
+
{"name": "Text"},
|
| 424 |
+
{"name": "Back Extra"},
|
| 425 |
+
{"name": "Explanation"},
|
| 426 |
+
{"name": "Example"},
|
| 427 |
+
{"name": "Prerequisites"},
|
| 428 |
+
{"name": "Learning_Outcomes"},
|
| 429 |
+
{"name": "Difficulty"},
|
| 430 |
+
{"name": "SourceURL"},
|
| 431 |
+
{"name": "TagsStr"},
|
| 432 |
+
],
|
| 433 |
+
templates=[
|
| 434 |
+
{
|
| 435 |
+
"name": "Cloze Card",
|
| 436 |
+
"qfmt": """
|
| 437 |
+
<div class=\"card question-side\">
|
| 438 |
+
<div class=\"difficulty-indicator {{Difficulty}}\"></div>
|
| 439 |
+
<div class=\"content\">
|
| 440 |
+
<div class=\"question\">{{cloze:Text}}</div>
|
| 441 |
+
<div class=\"prerequisites\" onclick=\"event.stopPropagation();\">
|
| 442 |
+
<div class=\"prerequisites-toggle\">Show Prerequisites</div>
|
| 443 |
+
<div class=\"prerequisites-content\">{{Prerequisites}}</div>
|
| 444 |
+
</div>
|
| 445 |
+
</div>
|
| 446 |
+
</div>
|
| 447 |
+
<script>
|
| 448 |
+
document.querySelector('.prerequisites-toggle').addEventListener('click', function(e) {
|
| 449 |
+
e.stopPropagation();
|
| 450 |
+
this.parentElement.classList.toggle('show');
|
| 451 |
+
});
|
| 452 |
+
</script>
|
| 453 |
+
""",
|
| 454 |
+
"afmt": """
|
| 455 |
+
<div class=\"card answer-side\">
|
| 456 |
+
<div class=\"content\">
|
| 457 |
+
<div class=\"question-section\">
|
| 458 |
+
<div class=\"question\">{{cloze:Text}}</div>
|
| 459 |
+
<div class=\"prerequisites\">
|
| 460 |
+
<strong>Prerequisites:</strong> {{Prerequisites}}
|
| 461 |
+
</div>
|
| 462 |
+
</div>
|
| 463 |
+
<hr>
|
| 464 |
+
|
| 465 |
+
{{#Back Extra}}
|
| 466 |
+
<div class=\"back-extra-section\">
|
| 467 |
+
<h3>Additional Information</h3>
|
| 468 |
+
<div class=\"back-extra-text\">{{Back Extra}}</div>
|
| 469 |
+
</div>
|
| 470 |
+
{{/Back Extra}}
|
| 471 |
+
|
| 472 |
+
<div class=\"explanation-section\">
|
| 473 |
+
<h3>Explanation</h3>
|
| 474 |
+
<div class=\"explanation-text\">{{Explanation}}</div>
|
| 475 |
+
</div>
|
| 476 |
+
|
| 477 |
+
<div class=\"example-section\">
|
| 478 |
+
<h3>Example</h3>
|
| 479 |
+
<div class=\"example-text\">{{Example}}</div>
|
| 480 |
+
</div>
|
| 481 |
+
|
| 482 |
+
<div class=\"metadata-section\">
|
| 483 |
+
<div class=\"learning-outcomes\">
|
| 484 |
+
<h3>Learning Outcomes</h3>
|
| 485 |
+
<div>{{Learning_Outcomes}}</div>
|
| 486 |
+
</div>
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
<div class=\"difficulty\">
|
| 490 |
+
<h3>Difficulty Level</h3>
|
| 491 |
+
<div>{{Difficulty}}</div>
|
| 492 |
+
</div>
|
| 493 |
+
{{#SourceURL}}<div class=\"source-url\"><small>Source: <a href=\"{{SourceURL}}\">{{SourceURL}}</a></small></div>{{/SourceURL}}
|
| 494 |
+
</div>
|
| 495 |
+
</div>
|
| 496 |
+
</div>
|
| 497 |
+
""",
|
| 498 |
+
}
|
| 499 |
+
],
|
| 500 |
+
css=CARD_CSS,
|
| 501 |
+
model_type=1, # Cloze model type
|
| 502 |
+
)
|
| 503 |
+
|
| 504 |
+
|
| 505 |
+
# --- Helper functions for APKG (Subtask 10) ---
|
| 506 |
+
def _get_or_create_model(
|
| 507 |
+
model_id: int,
|
| 508 |
+
name: str,
|
| 509 |
+
fields: List[Dict[str, str]],
|
| 510 |
+
templates: List[Dict[str, str]],
|
| 511 |
+
) -> genanki.Model:
|
| 512 |
+
return genanki.Model(model_id, name, fields=fields, templates=templates)
|
| 513 |
+
|
| 514 |
+
|
| 515 |
+
# --- New CSV Exporter for List of Dictionaries ---
|
| 516 |
+
|
| 517 |
+
|
| 518 |
+
def export_cards_to_csv(
|
| 519 |
+
cards: List[Dict[str, Any]], filename: Optional[str] = None
|
| 520 |
+
) -> str:
|
| 521 |
+
"""Export a list of card dictionaries to a CSV file.
|
| 522 |
+
|
| 523 |
+
Args:
|
| 524 |
+
cards: A list of dictionaries, where each dictionary represents a card
|
| 525 |
+
and should contain 'front' and 'back' keys. Other keys like
|
| 526 |
+
'tags' and 'note_type' are optional.
|
| 527 |
+
filename: Optional. The desired filename/path for the CSV.
|
| 528 |
+
If None, a timestamped filename will be generated.
|
| 529 |
+
|
| 530 |
+
Returns:
|
| 531 |
+
The path to the generated CSV file.
|
| 532 |
+
|
| 533 |
+
Raises:
|
| 534 |
+
IOError: If there is an issue writing to the file.
|
| 535 |
+
KeyError: If a card dictionary is missing essential keys like 'front' or 'back'.
|
| 536 |
+
ValueError: If the cards list is empty or not provided.
|
| 537 |
+
"""
|
| 538 |
+
# Validation using helper
|
| 539 |
+
_validate_non_empty_data(cards, "cards")
|
| 540 |
+
|
| 541 |
+
# Filename generation using helper
|
| 542 |
+
if not filename:
|
| 543 |
+
filename = _generate_timestamped_filename("ankigen_cards", "csv")
|
| 544 |
+
logger.info(f"No filename provided, generated: {filename}")
|
| 545 |
+
|
| 546 |
+
# Ensure output directory exists using helper
|
| 547 |
+
_ensure_output_directory(filename)
|
| 548 |
+
|
| 549 |
+
# Define the fieldnames expected in the CSV.
|
| 550 |
+
fieldnames = ["front", "back", "tags", "note_type"]
|
| 551 |
+
|
| 552 |
+
try:
|
| 553 |
+
logger.info(f"Attempting to export {len(cards)} cards to {filename}")
|
| 554 |
+
with open(filename, "w", newline="", encoding="utf-8") as csvfile:
|
| 555 |
+
writer = csv.DictWriter(
|
| 556 |
+
csvfile, fieldnames=fieldnames, extrasaction="ignore"
|
| 557 |
+
)
|
| 558 |
+
writer.writeheader()
|
| 559 |
+
for i, card in enumerate(cards):
|
| 560 |
+
try:
|
| 561 |
+
# Ensure mandatory fields exist
|
| 562 |
+
if "front" not in card or "back" not in card:
|
| 563 |
+
raise KeyError(
|
| 564 |
+
f"Card at index {i} is missing 'front' or 'back' key."
|
| 565 |
+
)
|
| 566 |
+
|
| 567 |
+
row_to_write = {
|
| 568 |
+
"front": card["front"],
|
| 569 |
+
"back": card["back"],
|
| 570 |
+
"tags": card.get("tags", ""),
|
| 571 |
+
"note_type": card.get("note_type", "Basic"),
|
| 572 |
+
}
|
| 573 |
+
writer.writerow(row_to_write)
|
| 574 |
+
except KeyError as e_inner:
|
| 575 |
+
logger.error(
|
| 576 |
+
f"Skipping card due to KeyError: {e_inner}. Card data: {card}"
|
| 577 |
+
)
|
| 578 |
+
continue
|
| 579 |
+
logger.info(f"Successfully exported cards to {filename}")
|
| 580 |
+
return filename
|
| 581 |
+
except IOError as e_io:
|
| 582 |
+
logger.error(f"IOError during CSV export to {filename}: {e_io}", exc_info=True)
|
| 583 |
+
raise
|
| 584 |
+
except Exception as e_general:
|
| 585 |
+
logger.error(
|
| 586 |
+
f"Unexpected error during CSV export to {filename}: {e_general}",
|
| 587 |
+
exc_info=True,
|
| 588 |
+
)
|
| 589 |
+
raise
|
| 590 |
+
|
| 591 |
+
|
| 592 |
+
def export_cards_to_apkg(
|
| 593 |
+
cards: List[Dict[str, Any]],
|
| 594 |
+
filename: Optional[str] = None,
|
| 595 |
+
deck_name: str = "Ankigen Generated Cards",
|
| 596 |
+
) -> str:
|
| 597 |
+
"""Exports a list of card dictionaries to an Anki .apkg file.
|
| 598 |
+
|
| 599 |
+
Args:
|
| 600 |
+
cards: List of dictionaries, where each dictionary represents a card.
|
| 601 |
+
It's expected that these dicts are prepared by export_dataframe_to_apkg
|
| 602 |
+
and contain keys like 'Question', 'Answer', 'Explanation', etc.
|
| 603 |
+
filename: The full path (including filename) for the exported file.
|
| 604 |
+
If None, a default filename will be generated in the current directory.
|
| 605 |
+
deck_name: The name of the deck if exporting to .apkg format.
|
| 606 |
+
|
| 607 |
+
Returns:
|
| 608 |
+
The path to the exported file.
|
| 609 |
+
"""
|
| 610 |
+
logger.info(f"Starting APKG export for {len(cards)} cards to deck '{deck_name}'.")
|
| 611 |
+
|
| 612 |
+
# Validation using helper - note this now raises ValueError instead of gr.Error
|
| 613 |
+
_validate_non_empty_data(cards, "cards")
|
| 614 |
+
|
| 615 |
+
# Filename generation using helper
|
| 616 |
+
if not filename:
|
| 617 |
+
filename = _generate_timestamped_filename("ankigen_deck", "apkg")
|
| 618 |
+
elif not filename.lower().endswith(".apkg"):
|
| 619 |
+
filename += ".apkg"
|
| 620 |
+
|
| 621 |
+
# Ensure output directory exists using helper
|
| 622 |
+
_ensure_output_directory(filename)
|
| 623 |
+
|
| 624 |
+
anki_basic_model = BASIC_MODEL
|
| 625 |
+
anki_cloze_model = CLOZE_MODEL
|
| 626 |
+
|
| 627 |
+
deck_id = random.randrange(1 << 30, 1 << 31)
|
| 628 |
+
anki_deck = genanki.Deck(deck_id, deck_name)
|
| 629 |
+
|
| 630 |
+
notes_added_count = 0
|
| 631 |
+
for card_dict in cards:
|
| 632 |
+
note_type = card_dict.get("note_type", "Basic")
|
| 633 |
+
tags_for_note_object = card_dict.get("tags_for_note_object", [])
|
| 634 |
+
|
| 635 |
+
# Extract all potential fields, defaulting to empty strings
|
| 636 |
+
# Security: Sanitize HTML to prevent XSS when viewing cards in Anki
|
| 637 |
+
question = html.escape(card_dict.get("Question", ""))
|
| 638 |
+
answer = html.escape(card_dict.get("Answer", ""))
|
| 639 |
+
explanation = html.escape(card_dict.get("Explanation", ""))
|
| 640 |
+
example = html.escape(card_dict.get("Example", ""))
|
| 641 |
+
prerequisites = html.escape(card_dict.get("Prerequisites", ""))
|
| 642 |
+
learning_outcomes = html.escape(card_dict.get("Learning_Outcomes", ""))
|
| 643 |
+
difficulty = html.escape(card_dict.get("Difficulty", ""))
|
| 644 |
+
source_url = html.escape(card_dict.get("SourceURL", ""))
|
| 645 |
+
tags_str_field = html.escape(card_dict.get("TagsStr", ""))
|
| 646 |
+
|
| 647 |
+
if not question:
|
| 648 |
+
logger.error(
|
| 649 |
+
f"SKIPPING CARD DUE TO EMPTY 'Question' (front/text) field. Card data: {card_dict}"
|
| 650 |
+
)
|
| 651 |
+
continue
|
| 652 |
+
|
| 653 |
+
try:
|
| 654 |
+
if note_type.lower() == "cloze":
|
| 655 |
+
# CLOZE_MODEL fields
|
| 656 |
+
note_fields = [
|
| 657 |
+
question, # Text
|
| 658 |
+
answer, # Back Extra
|
| 659 |
+
explanation,
|
| 660 |
+
example,
|
| 661 |
+
prerequisites,
|
| 662 |
+
learning_outcomes,
|
| 663 |
+
difficulty,
|
| 664 |
+
source_url,
|
| 665 |
+
tags_str_field,
|
| 666 |
+
]
|
| 667 |
+
note = genanki.Note(
|
| 668 |
+
model=anki_cloze_model,
|
| 669 |
+
fields=note_fields,
|
| 670 |
+
tags=tags_for_note_object,
|
| 671 |
+
)
|
| 672 |
+
else: # Basic
|
| 673 |
+
# BASIC_MODEL fields
|
| 674 |
+
note_fields = [
|
| 675 |
+
question,
|
| 676 |
+
answer,
|
| 677 |
+
explanation,
|
| 678 |
+
example,
|
| 679 |
+
prerequisites,
|
| 680 |
+
learning_outcomes,
|
| 681 |
+
difficulty,
|
| 682 |
+
source_url,
|
| 683 |
+
tags_str_field,
|
| 684 |
+
]
|
| 685 |
+
note = genanki.Note(
|
| 686 |
+
model=anki_basic_model,
|
| 687 |
+
fields=note_fields,
|
| 688 |
+
tags=tags_for_note_object,
|
| 689 |
+
)
|
| 690 |
+
anki_deck.add_note(note)
|
| 691 |
+
notes_added_count += 1
|
| 692 |
+
except Exception as e:
|
| 693 |
+
logger.error(
|
| 694 |
+
f"Failed to create genanki.Note for card: {card_dict}. Error: {e}",
|
| 695 |
+
exc_info=True,
|
| 696 |
+
)
|
| 697 |
+
logger.warning(f"Skipping card due to error: Question='{question[:50]}...'")
|
| 698 |
+
|
| 699 |
+
if notes_added_count == 0:
|
| 700 |
+
logger.error(
|
| 701 |
+
"No valid notes could be created from the provided cards. APKG generation aborted."
|
| 702 |
+
)
|
| 703 |
+
raise gr.Error("Failed to create any valid Anki notes from the input.")
|
| 704 |
+
|
| 705 |
+
logger.info(
|
| 706 |
+
f"Added {notes_added_count} notes to deck '{deck_name}'. Proceeding to package."
|
| 707 |
+
)
|
| 708 |
+
|
| 709 |
+
# Package and write
|
| 710 |
+
package = genanki.Package(anki_deck)
|
| 711 |
+
try:
|
| 712 |
+
package.write_to_file(filename)
|
| 713 |
+
logger.info(f"Successfully exported Anki deck to {filename}")
|
| 714 |
+
except Exception as e:
|
| 715 |
+
logger.error(f"Failed to write .apkg file to {filename}: {e}", exc_info=True)
|
| 716 |
+
raise IOError(f"Could not write .apkg file: {e}")
|
| 717 |
+
|
| 718 |
+
return filename
|
| 719 |
+
|
| 720 |
+
|
| 721 |
+
def export_cards_from_crawled_content(
|
| 722 |
+
cards: List[Dict[str, Any]],
|
| 723 |
+
output_path: Optional[
|
| 724 |
+
str
|
| 725 |
+
] = None, # Changed from filename to output_path for clarity
|
| 726 |
+
export_format: str = "csv", # Added export_format parameter
|
| 727 |
+
deck_name: str = "Ankigen Generated Cards",
|
| 728 |
+
) -> str:
|
| 729 |
+
"""Exports cards (list of dicts) to the specified format (CSV or APKG).
|
| 730 |
+
|
| 731 |
+
Args:
|
| 732 |
+
cards: List of dictionaries, where each dictionary represents a card.
|
| 733 |
+
Expected keys: 'front', 'back'. Optional: 'tags' (space-separated string), 'source_url', 'note_type' ('Basic' or 'Cloze').
|
| 734 |
+
output_path: The full path (including filename) for the exported file.
|
| 735 |
+
If None, a default filename will be generated in the current directory.
|
| 736 |
+
export_format: The desired format, either 'csv' or 'apkg'.
|
| 737 |
+
deck_name: The name of the deck if exporting to .apkg format.
|
| 738 |
+
|
| 739 |
+
Returns:
|
| 740 |
+
The path to the exported file.
|
| 741 |
+
"""
|
| 742 |
+
if not cards:
|
| 743 |
+
logger.warning("No cards provided to export_cards_from_crawled_content.")
|
| 744 |
+
# MODIFIED: Raise error immediately if no cards, as per test expectation
|
| 745 |
+
raise ValueError("No cards provided to export.")
|
| 746 |
+
|
| 747 |
+
logger.info(
|
| 748 |
+
f"Exporting {len(cards)} cards to format '{export_format}' with deck name '{deck_name}'."
|
| 749 |
+
)
|
| 750 |
+
|
| 751 |
+
if export_format.lower() == "csv":
|
| 752 |
+
return export_cards_to_csv(cards, filename=output_path)
|
| 753 |
+
elif export_format.lower() == "apkg":
|
| 754 |
+
return export_cards_to_apkg(cards, filename=output_path, deck_name=deck_name)
|
| 755 |
+
else:
|
| 756 |
+
supported_formats = ["csv", "apkg"]
|
| 757 |
+
logger.error(
|
| 758 |
+
f"Unsupported export format: {export_format}. Supported formats: {supported_formats}"
|
| 759 |
+
)
|
| 760 |
+
# MODIFIED: Updated error message to include supported formats
|
| 761 |
+
raise ValueError(
|
| 762 |
+
f"Unsupported export format: {export_format}. Supported formats: {supported_formats}"
|
| 763 |
+
)
|
| 764 |
+
|
| 765 |
+
|
| 766 |
+
# --- New DataFrame CSV Exporter (Subtask 11) ---
|
| 767 |
+
def export_dataframe_to_csv(
|
| 768 |
+
data: Optional[pd.DataFrame],
|
| 769 |
+
filename_suggestion: Optional[str] = "ankigen_cards.csv",
|
| 770 |
+
) -> Optional[str]:
|
| 771 |
+
"""Exports a Pandas DataFrame to a CSV file, designed for Gradio download.
|
| 772 |
+
|
| 773 |
+
Args:
|
| 774 |
+
data: The Pandas DataFrame to export.
|
| 775 |
+
filename_suggestion: A suggestion for the base filename (e.g., from subject).
|
| 776 |
+
|
| 777 |
+
Returns:
|
| 778 |
+
The path to the temporary CSV file, or None if an error occurs or data is empty.
|
| 779 |
+
"""
|
| 780 |
+
logger.info(
|
| 781 |
+
f"Attempting to export DataFrame to CSV. Suggested filename: {filename_suggestion}"
|
| 782 |
+
)
|
| 783 |
+
|
| 784 |
+
# Validation using helper
|
| 785 |
+
try:
|
| 786 |
+
_validate_non_empty_data(data, "card data")
|
| 787 |
+
except ValueError:
|
| 788 |
+
logger.warning(
|
| 789 |
+
"No data provided to export_dataframe_to_csv. Skipping CSV export."
|
| 790 |
+
)
|
| 791 |
+
raise gr.Error("No card data available")
|
| 792 |
+
|
| 793 |
+
try:
|
| 794 |
+
# Generate filename from suggestion
|
| 795 |
+
base_name_from_suggestion = "ankigen_cards" # Default base part
|
| 796 |
+
|
| 797 |
+
# Sanitize and use the suggestion (e.g., subject name) if provided
|
| 798 |
+
if filename_suggestion and isinstance(filename_suggestion, str):
|
| 799 |
+
# Remove .csv if present, then sanitize
|
| 800 |
+
processed_suggestion = filename_suggestion.removesuffix(".csv")
|
| 801 |
+
safe_suggestion = (
|
| 802 |
+
processed_suggestion.replace(" ", "_")
|
| 803 |
+
.replace("/", "-")
|
| 804 |
+
.replace("\\", "-")
|
| 805 |
+
)
|
| 806 |
+
if safe_suggestion:
|
| 807 |
+
base_name_from_suggestion = f"ankigen_{safe_suggestion[:50]}"
|
| 808 |
+
|
| 809 |
+
# Generate timestamped filename using helper
|
| 810 |
+
final_filename = _generate_timestamped_filename(
|
| 811 |
+
base_name_from_suggestion, "csv"
|
| 812 |
+
)
|
| 813 |
+
|
| 814 |
+
# Ensure output directory exists using helper
|
| 815 |
+
_ensure_output_directory(final_filename)
|
| 816 |
+
|
| 817 |
+
data.to_csv(final_filename, index=False)
|
| 818 |
+
logger.info(f"Successfully exported DataFrame to CSV: {final_filename}")
|
| 819 |
+
gr.Info(f"CSV ready for download: {os.path.basename(final_filename)}")
|
| 820 |
+
return final_filename
|
| 821 |
+
except Exception as e:
|
| 822 |
+
logger.error(f"Error exporting DataFrame to CSV: {e}", exc_info=True)
|
| 823 |
+
gr.Error(f"Error exporting DataFrame to CSV: {e}")
|
| 824 |
+
return None
|
| 825 |
+
|
| 826 |
+
|
| 827 |
+
# --- New DataFrame to APKG Exporter (for Main Generator Tab) ---
|
| 828 |
+
def export_dataframe_to_apkg(
|
| 829 |
+
df: pd.DataFrame,
|
| 830 |
+
output_path: Optional[str],
|
| 831 |
+
deck_name: str,
|
| 832 |
+
) -> str:
|
| 833 |
+
"""Exports a DataFrame of cards to an Anki .apkg file."""
|
| 834 |
+
# Validation using helper
|
| 835 |
+
_validate_non_empty_data(df, "cards in DataFrame")
|
| 836 |
+
|
| 837 |
+
logger.info(
|
| 838 |
+
f"Starting APKG export for DataFrame with {len(df)} rows to deck '{deck_name}'. Output: {output_path}"
|
| 839 |
+
)
|
| 840 |
+
|
| 841 |
+
cards_for_apkg: List[Dict[str, Any]] = []
|
| 842 |
+
for _, row in df.iterrows():
|
| 843 |
+
try:
|
| 844 |
+
note_type_val = (
|
| 845 |
+
_format_field_as_string(row.get("Card_Type", "Basic")) or "Basic"
|
| 846 |
+
)
|
| 847 |
+
topic = _format_field_as_string(row.get("Topic", ""))
|
| 848 |
+
difficulty_raw = _format_field_as_string(row.get("Difficulty", ""))
|
| 849 |
+
difficulty_plain_for_tag = strip_html_tags(difficulty_raw)
|
| 850 |
+
|
| 851 |
+
tags_list_for_note_obj = []
|
| 852 |
+
if topic:
|
| 853 |
+
tags_list_for_note_obj.append(topic.replace(" ", "_").replace(",", "_"))
|
| 854 |
+
if difficulty_plain_for_tag:
|
| 855 |
+
safe_difficulty_tag = difficulty_plain_for_tag.replace(" ", "_")
|
| 856 |
+
tags_list_for_note_obj.append(safe_difficulty_tag)
|
| 857 |
+
|
| 858 |
+
tags_str_for_field = " ".join(tags_list_for_note_obj)
|
| 859 |
+
|
| 860 |
+
card_data_for_note = {
|
| 861 |
+
"note_type": note_type_val,
|
| 862 |
+
"tags_for_note_object": tags_list_for_note_obj,
|
| 863 |
+
"TagsStr": tags_str_for_field,
|
| 864 |
+
"Question": _format_field_as_string(row.get("Question", "")),
|
| 865 |
+
"Answer": _format_field_as_string(row.get("Answer", "")),
|
| 866 |
+
"Explanation": _format_field_as_string(row.get("Explanation", "")),
|
| 867 |
+
"Example": _format_field_as_string(row.get("Example", "")),
|
| 868 |
+
"Prerequisites": _format_field_as_string(row.get("Prerequisites", "")),
|
| 869 |
+
"Learning_Outcomes": _format_field_as_string(
|
| 870 |
+
row.get("Learning_Outcomes", "")
|
| 871 |
+
),
|
| 872 |
+
"Difficulty": difficulty_raw,
|
| 873 |
+
"SourceURL": _format_field_as_string(row.get("Source_URL", "")),
|
| 874 |
+
}
|
| 875 |
+
cards_for_apkg.append(card_data_for_note)
|
| 876 |
+
except Exception as e:
|
| 877 |
+
logger.error(
|
| 878 |
+
f"Error processing DataFrame row for APKG: {row}. Error: {e}",
|
| 879 |
+
exc_info=True,
|
| 880 |
+
)
|
| 881 |
+
continue
|
| 882 |
+
|
| 883 |
+
if not cards_for_apkg:
|
| 884 |
+
logger.error("No cards could be processed from DataFrame for APKG export.")
|
| 885 |
+
raise ValueError("No processable cards found in DataFrame for APKG export.")
|
| 886 |
+
|
| 887 |
+
return export_cards_to_apkg(
|
| 888 |
+
cards_for_apkg, filename=output_path, deck_name=deck_name
|
| 889 |
+
)
|
| 890 |
+
|
| 891 |
+
|
| 892 |
+
# --- Compatibility Exports for Tests and Legacy Code ---
|
| 893 |
+
# These aliases ensure that tests expecting these names will find them.
|
| 894 |
+
|
| 895 |
+
# Export functions under expected names
|
| 896 |
+
export_csv = (
|
| 897 |
+
export_dataframe_to_csv # Update this to export_dataframe_to_csv for compatibility
|
| 898 |
+
)
|
| 899 |
+
|
| 900 |
+
|
| 901 |
+
# MODIFIED: export_deck is now a wrapper to provide a default deck_name
|
| 902 |
+
def export_deck(
|
| 903 |
+
df: pd.DataFrame,
|
| 904 |
+
output_path: Optional[str] = None,
|
| 905 |
+
deck_name: str = "Ankigen Generated Cards",
|
| 906 |
+
) -> str:
|
| 907 |
+
"""Alias for exporting a DataFrame to APKG, providing a default deck name."""
|
| 908 |
+
if df is None or df.empty:
|
| 909 |
+
logger.warning("export_deck called with None or empty DataFrame.")
|
| 910 |
+
# Match the error type and message expected by tests
|
| 911 |
+
raise gr.Error("No card data available")
|
| 912 |
+
|
| 913 |
+
# Original logic to call export_dataframe_to_apkg
|
| 914 |
+
# Ensure all necessary parameters for export_dataframe_to_apkg are correctly passed.
|
| 915 |
+
# The export_dataframe_to_apkg function itself will handle its specific error conditions.
|
| 916 |
+
# The 'output_path' for export_dataframe_to_apkg needs to be handled.
|
| 917 |
+
# If 'output_path' is None here, export_cards_to_apkg (called by export_dataframe_to_apkg)
|
| 918 |
+
# will generate a default filename.
|
| 919 |
+
|
| 920 |
+
# If output_path is not provided to export_deck, it's None.
|
| 921 |
+
# export_dataframe_to_apkg expects output_path: Optional[str].
|
| 922 |
+
# And export_cards_to_apkg (which it calls) also handles Optional[str] filename.
|
| 923 |
+
# So, passing output_path directly should be fine.
|
| 924 |
+
|
| 925 |
+
return export_dataframe_to_apkg(df, output_path=output_path, deck_name=deck_name)
|
| 926 |
+
|
| 927 |
+
|
| 928 |
+
export_dataframe_csv = export_dataframe_to_csv
|
| 929 |
+
export_dataframe_apkg = export_dataframe_to_apkg
|
| 930 |
+
|
| 931 |
+
__all__ = [
|
| 932 |
+
"BASIC_MODEL",
|
| 933 |
+
"CLOZE_MODEL",
|
| 934 |
+
"export_csv",
|
| 935 |
+
"export_deck",
|
| 936 |
+
"export_dataframe_csv",
|
| 937 |
+
"export_dataframe_apkg",
|
| 938 |
+
"export_cards_to_csv",
|
| 939 |
+
"export_cards_to_apkg",
|
| 940 |
+
"export_cards_from_crawled_content",
|
| 941 |
+
"export_dataframe_to_csv",
|
| 942 |
+
"export_dataframe_to_apkg",
|
| 943 |
+
]
|
ankigen/llm_interface.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Module for OpenAI client management and API call logic
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import time
|
| 5 |
+
from typing import Optional, TypeVar
|
| 6 |
+
|
| 7 |
+
from agents import Agent, ModelSettings, Runner, set_default_openai_client
|
| 8 |
+
from openai import (
|
| 9 |
+
APIConnectionError,
|
| 10 |
+
APIStatusError,
|
| 11 |
+
AsyncOpenAI,
|
| 12 |
+
OpenAIError,
|
| 13 |
+
RateLimitError,
|
| 14 |
+
)
|
| 15 |
+
from pydantic import BaseModel
|
| 16 |
+
|
| 17 |
+
from ankigen.logging import logger
|
| 18 |
+
from ankigen.utils import ResponseCache
|
| 19 |
+
|
| 20 |
+
T = TypeVar("T", bound=BaseModel)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class OpenAIClientManager:
|
| 24 |
+
"""Manages the AsyncOpenAI client instance."""
|
| 25 |
+
|
| 26 |
+
def __init__(self):
|
| 27 |
+
self._client: Optional[AsyncOpenAI] = None
|
| 28 |
+
self._api_key: Optional[str] = None
|
| 29 |
+
|
| 30 |
+
async def initialize_client(self, api_key: str):
|
| 31 |
+
"""Initializes the AsyncOpenAI client with the given API key."""
|
| 32 |
+
if not api_key or not api_key.startswith("sk-"):
|
| 33 |
+
logger.error("Invalid OpenAI API key provided for client initialization.")
|
| 34 |
+
raise ValueError("Invalid OpenAI API key format.")
|
| 35 |
+
self._api_key = api_key
|
| 36 |
+
try:
|
| 37 |
+
self._client = AsyncOpenAI(api_key=self._api_key)
|
| 38 |
+
logger.info("AsyncOpenAI client initialized successfully.")
|
| 39 |
+
except OpenAIError as e: # Catch specific OpenAI errors
|
| 40 |
+
logger.error(f"Failed to initialize AsyncOpenAI client: {e}", exc_info=True)
|
| 41 |
+
self._client = None # Ensure client is None on failure
|
| 42 |
+
raise # Re-raise the OpenAIError to be caught by UI
|
| 43 |
+
except Exception as e: # Catch any other unexpected errors
|
| 44 |
+
logger.error(
|
| 45 |
+
f"An unexpected error occurred during AsyncOpenAI client initialization: {e}",
|
| 46 |
+
exc_info=True,
|
| 47 |
+
)
|
| 48 |
+
self._client = None
|
| 49 |
+
raise RuntimeError("Unexpected error initializing AsyncOpenAI client.")
|
| 50 |
+
|
| 51 |
+
def get_client(self) -> AsyncOpenAI:
|
| 52 |
+
"""Returns the initialized AsyncOpenAI client. Raises error if not initialized."""
|
| 53 |
+
if self._client is None:
|
| 54 |
+
logger.error(
|
| 55 |
+
"AsyncOpenAI client accessed before initialization or after a failed initialization."
|
| 56 |
+
)
|
| 57 |
+
raise RuntimeError(
|
| 58 |
+
"AsyncOpenAI client is not initialized. Please provide a valid API key."
|
| 59 |
+
)
|
| 60 |
+
return self._client
|
| 61 |
+
|
| 62 |
+
def __enter__(self):
|
| 63 |
+
"""Context manager entry."""
|
| 64 |
+
return self
|
| 65 |
+
|
| 66 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 67 |
+
"""Context manager exit - cleanup resources."""
|
| 68 |
+
self.close()
|
| 69 |
+
return False
|
| 70 |
+
|
| 71 |
+
async def __aenter__(self):
|
| 72 |
+
"""Async context manager entry."""
|
| 73 |
+
return self
|
| 74 |
+
|
| 75 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 76 |
+
"""Async context manager exit - cleanup resources."""
|
| 77 |
+
await self.aclose()
|
| 78 |
+
return False
|
| 79 |
+
|
| 80 |
+
def close(self) -> None:
|
| 81 |
+
"""Close the OpenAI client synchronously."""
|
| 82 |
+
if self._client:
|
| 83 |
+
try:
|
| 84 |
+
# OpenAI client has a close method for cleanup
|
| 85 |
+
if hasattr(self._client, "close"):
|
| 86 |
+
self._client.close()
|
| 87 |
+
logger.debug("OpenAI client closed")
|
| 88 |
+
except Exception as e:
|
| 89 |
+
logger.warning(f"Error closing OpenAI client: {e}")
|
| 90 |
+
finally:
|
| 91 |
+
self._client = None
|
| 92 |
+
|
| 93 |
+
async def aclose(self) -> None:
|
| 94 |
+
"""Close the OpenAI client asynchronously."""
|
| 95 |
+
if self._client:
|
| 96 |
+
try:
|
| 97 |
+
# OpenAI async client has an aclose method
|
| 98 |
+
if hasattr(self._client, "aclose"):
|
| 99 |
+
await self._client.aclose()
|
| 100 |
+
elif hasattr(self._client, "close"):
|
| 101 |
+
self._client.close()
|
| 102 |
+
logger.debug("OpenAI client closed (async)")
|
| 103 |
+
except Exception as e:
|
| 104 |
+
logger.warning(f"Error closing OpenAI client: {e}")
|
| 105 |
+
finally:
|
| 106 |
+
self._client = None
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
# --- Agents SDK Utility ---
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
async def structured_agent_call(
|
| 113 |
+
openai_client: AsyncOpenAI,
|
| 114 |
+
model: str,
|
| 115 |
+
instructions: str,
|
| 116 |
+
user_input: str,
|
| 117 |
+
output_type: type[T],
|
| 118 |
+
cache: Optional[ResponseCache] = None,
|
| 119 |
+
cache_key: Optional[str] = None,
|
| 120 |
+
temperature: float = 0.7,
|
| 121 |
+
timeout: float = 120.0,
|
| 122 |
+
retry_attempts: int = 3,
|
| 123 |
+
) -> T:
|
| 124 |
+
"""
|
| 125 |
+
Make a single-turn structured output call using the agents SDK.
|
| 126 |
+
|
| 127 |
+
This is a lightweight wrapper for simple structured output calls,
|
| 128 |
+
not intended for complex multi-agent workflows.
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
openai_client: AsyncOpenAI client instance
|
| 132 |
+
model: Model name (e.g., "gpt-5.2", "gpt-5.2-chat-latest")
|
| 133 |
+
instructions: System instructions for the agent
|
| 134 |
+
user_input: User prompt/input
|
| 135 |
+
output_type: Pydantic model class for structured output
|
| 136 |
+
cache: Optional ResponseCache instance
|
| 137 |
+
cache_key: Cache key (required if cache is provided)
|
| 138 |
+
temperature: Model temperature (default 0.7)
|
| 139 |
+
timeout: Request timeout in seconds (default 120)
|
| 140 |
+
retry_attempts: Number of retry attempts (default 3)
|
| 141 |
+
|
| 142 |
+
Returns:
|
| 143 |
+
Instance of output_type with the structured response
|
| 144 |
+
"""
|
| 145 |
+
# 1. Check cache first
|
| 146 |
+
if cache and cache_key:
|
| 147 |
+
cached = cache.get(cache_key, model)
|
| 148 |
+
if cached is not None:
|
| 149 |
+
logger.info(f"Using cached response for model {model}")
|
| 150 |
+
# Reconstruct Pydantic model from cached dict
|
| 151 |
+
if isinstance(cached, dict):
|
| 152 |
+
return output_type.model_validate(cached)
|
| 153 |
+
return cached
|
| 154 |
+
|
| 155 |
+
# 2. Set up the OpenAI client for agents SDK
|
| 156 |
+
set_default_openai_client(openai_client, use_for_tracing=False)
|
| 157 |
+
|
| 158 |
+
# 3. Build model settings with GPT-5.x reasoning support
|
| 159 |
+
model_settings_kwargs: dict = {"temperature": temperature}
|
| 160 |
+
|
| 161 |
+
# GPT-5.x (not chat-latest) supports reasoning_effort
|
| 162 |
+
if model.startswith("gpt-5") and "chat-latest" not in model:
|
| 163 |
+
from openai.types.shared import Reasoning
|
| 164 |
+
|
| 165 |
+
model_settings_kwargs["reasoning"] = Reasoning(effort="none")
|
| 166 |
+
|
| 167 |
+
model_settings = ModelSettings(**model_settings_kwargs)
|
| 168 |
+
|
| 169 |
+
# 4. Create agent with structured output
|
| 170 |
+
agent = Agent(
|
| 171 |
+
name="structured_output_agent",
|
| 172 |
+
instructions=instructions,
|
| 173 |
+
model=model,
|
| 174 |
+
model_settings=model_settings,
|
| 175 |
+
output_type=output_type,
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
# 5. Execute with retry and timeout
|
| 179 |
+
last_error: Optional[Exception] = None
|
| 180 |
+
for attempt in range(retry_attempts):
|
| 181 |
+
try:
|
| 182 |
+
result = await asyncio.wait_for(
|
| 183 |
+
Runner.run(agent, user_input),
|
| 184 |
+
timeout=timeout,
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
# 6. Extract structured output
|
| 188 |
+
output = result.final_output
|
| 189 |
+
|
| 190 |
+
# 7. Cache successful result (as dict for serialization)
|
| 191 |
+
if cache and cache_key and output is not None:
|
| 192 |
+
if isinstance(output, BaseModel):
|
| 193 |
+
cache.set(cache_key, model, output.model_dump())
|
| 194 |
+
else:
|
| 195 |
+
cache.set(cache_key, model, output)
|
| 196 |
+
|
| 197 |
+
logger.debug(f"Successfully received response from model {model}")
|
| 198 |
+
return output
|
| 199 |
+
|
| 200 |
+
except asyncio.TimeoutError as e:
|
| 201 |
+
last_error = e
|
| 202 |
+
if attempt < retry_attempts - 1:
|
| 203 |
+
wait_time = 4 * (2**attempt) # Exponential backoff
|
| 204 |
+
logger.warning(
|
| 205 |
+
f"Agent timed out (attempt {attempt + 1}/{retry_attempts}), "
|
| 206 |
+
f"retrying in {wait_time}s..."
|
| 207 |
+
)
|
| 208 |
+
await asyncio.sleep(wait_time)
|
| 209 |
+
continue
|
| 210 |
+
logger.error(f"Agent timed out after {retry_attempts} attempts")
|
| 211 |
+
raise
|
| 212 |
+
except Exception as e:
|
| 213 |
+
last_error = e
|
| 214 |
+
if attempt < retry_attempts - 1:
|
| 215 |
+
wait_time = 4 * (2**attempt)
|
| 216 |
+
logger.warning(
|
| 217 |
+
f"Agent failed (attempt {attempt + 1}/{retry_attempts}): {e}, "
|
| 218 |
+
f"retrying in {wait_time}s..."
|
| 219 |
+
)
|
| 220 |
+
await asyncio.sleep(wait_time)
|
| 221 |
+
continue
|
| 222 |
+
logger.error(f"Agent failed after {retry_attempts} attempts: {e}")
|
| 223 |
+
raise
|
| 224 |
+
|
| 225 |
+
raise RuntimeError(f"Retry loop exited without result: {last_error}")
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
# Generic schema for arbitrary JSON structured outputs
|
| 229 |
+
class GenericJsonOutput(BaseModel):
|
| 230 |
+
"""Generic container for JSON output - allows any structure."""
|
| 231 |
+
|
| 232 |
+
model_config = {"extra": "allow"} # Allow arbitrary fields
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
async def structured_output_completion(
|
| 236 |
+
openai_client: AsyncOpenAI,
|
| 237 |
+
model: str,
|
| 238 |
+
response_format: dict, # Legacy parameter - kept for API compatibility
|
| 239 |
+
system_prompt: str,
|
| 240 |
+
user_prompt: str,
|
| 241 |
+
cache: ResponseCache,
|
| 242 |
+
) -> Optional[dict]:
|
| 243 |
+
"""
|
| 244 |
+
Makes an API call with structured output using agents SDK.
|
| 245 |
+
|
| 246 |
+
Note: response_format parameter is ignored - the agents SDK handles
|
| 247 |
+
JSON parsing automatically. For typed outputs, use structured_agent_call() directly.
|
| 248 |
+
"""
|
| 249 |
+
cache_key = f"{system_prompt}:{user_prompt}"
|
| 250 |
+
|
| 251 |
+
# Ensure system_prompt includes JSON instruction
|
| 252 |
+
effective_system_prompt = system_prompt
|
| 253 |
+
if "JSON object matching the specified schema" not in system_prompt:
|
| 254 |
+
effective_system_prompt = f"{system_prompt}\nProvide your response as a JSON object matching the specified schema."
|
| 255 |
+
|
| 256 |
+
try:
|
| 257 |
+
result = await structured_agent_call(
|
| 258 |
+
openai_client=openai_client,
|
| 259 |
+
model=model,
|
| 260 |
+
instructions=effective_system_prompt.strip(),
|
| 261 |
+
user_input=user_prompt.strip(),
|
| 262 |
+
output_type=GenericJsonOutput,
|
| 263 |
+
cache=cache,
|
| 264 |
+
cache_key=cache_key,
|
| 265 |
+
temperature=0.7,
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
# Convert Pydantic model back to dict for backward compatibility
|
| 269 |
+
if isinstance(result, BaseModel):
|
| 270 |
+
return result.model_dump()
|
| 271 |
+
return result
|
| 272 |
+
|
| 273 |
+
except Exception as e:
|
| 274 |
+
logger.error(
|
| 275 |
+
f"structured_output_completion failed for model {model}: {e}",
|
| 276 |
+
exc_info=True,
|
| 277 |
+
)
|
| 278 |
+
raise # Re-raise unexpected errors
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
# Specific OpenAI exceptions to retry on
|
| 282 |
+
RETRYABLE_OPENAI_ERRORS = (
|
| 283 |
+
APIConnectionError,
|
| 284 |
+
RateLimitError,
|
| 285 |
+
APIStatusError, # Typically for 5xx server errors
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
# --- New OpenAIRateLimiter Class (Subtask 9.2) ---
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
class OpenAIRateLimiter:
|
| 292 |
+
"""Manages token usage to proactively stay within (estimated) OpenAI rate limits."""
|
| 293 |
+
|
| 294 |
+
def __init__(self, tokens_per_minute: int = 60000): # Default, can be configured
|
| 295 |
+
self.tokens_per_minute_limit: int = tokens_per_minute
|
| 296 |
+
self.tokens_used_current_window: int = 0
|
| 297 |
+
self.current_window_start_time: float = time.monotonic()
|
| 298 |
+
|
| 299 |
+
async def wait_if_needed(self, estimated_tokens_for_request: int):
|
| 300 |
+
"""Waits if adding the estimated tokens would exceed the rate limit for the current window."""
|
| 301 |
+
current_time = time.monotonic()
|
| 302 |
+
|
| 303 |
+
# Check if the 60-second window has passed
|
| 304 |
+
if current_time - self.current_window_start_time >= 60.0:
|
| 305 |
+
# Reset window and token count
|
| 306 |
+
self.current_window_start_time = current_time
|
| 307 |
+
self.tokens_used_current_window = 0
|
| 308 |
+
logger.debug("OpenAIRateLimiter: Window reset.")
|
| 309 |
+
|
| 310 |
+
# Check if the request would exceed the limit in the current window
|
| 311 |
+
if (
|
| 312 |
+
self.tokens_used_current_window + estimated_tokens_for_request
|
| 313 |
+
> self.tokens_per_minute_limit
|
| 314 |
+
):
|
| 315 |
+
time_to_wait = (self.current_window_start_time + 60.0) - current_time
|
| 316 |
+
if time_to_wait > 0:
|
| 317 |
+
logger.info(
|
| 318 |
+
f"OpenAIRateLimiter: Approaching token limit. Waiting for {time_to_wait:.2f} seconds to reset window."
|
| 319 |
+
)
|
| 320 |
+
await asyncio.sleep(time_to_wait)
|
| 321 |
+
# After waiting for the window to reset, reset counters
|
| 322 |
+
self.current_window_start_time = time.monotonic() # New window starts now
|
| 323 |
+
self.tokens_used_current_window = 0
|
| 324 |
+
logger.debug("OpenAIRateLimiter: Window reset after waiting.")
|
| 325 |
+
|
| 326 |
+
# If we are here, it's safe to proceed (or we've waited and reset)
|
| 327 |
+
# Add tokens for the current request
|
| 328 |
+
self.tokens_used_current_window += estimated_tokens_for_request
|
| 329 |
+
logger.debug(
|
| 330 |
+
f"OpenAIRateLimiter: Tokens used in current window: {self.tokens_used_current_window}/{self.tokens_per_minute_limit}"
|
| 331 |
+
)
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
# Global instance of the rate limiter
|
| 335 |
+
# This assumes a single rate limit bucket for all calls from this application instance.
|
| 336 |
+
# More sophisticated scenarios might need per-model or per-key limiters.
|
| 337 |
+
openai_rate_limiter = OpenAIRateLimiter() # Using default 60k TPM for now
|
ankigen/logging.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def setup_logger(name="ankigen", log_level=logging.INFO):
|
| 8 |
+
"""Set up and return a logger with file and console handlers"""
|
| 9 |
+
# Create logger
|
| 10 |
+
logger = logging.getLogger(name)
|
| 11 |
+
logger.setLevel(log_level)
|
| 12 |
+
|
| 13 |
+
# Remove existing handlers if any
|
| 14 |
+
# This ensures that if setup_logger is called multiple times for the same logger name,
|
| 15 |
+
# it doesn't accumulate handlers.
|
| 16 |
+
if logger.hasHandlers():
|
| 17 |
+
logger.handlers.clear()
|
| 18 |
+
|
| 19 |
+
# Create formatter
|
| 20 |
+
formatter = logging.Formatter(
|
| 21 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Create console handler
|
| 25 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
| 26 |
+
console_handler.setFormatter(formatter)
|
| 27 |
+
logger.addHandler(console_handler)
|
| 28 |
+
|
| 29 |
+
# Create file handler
|
| 30 |
+
# Logs will be stored in ~/.ankigen/logs/
|
| 31 |
+
# A new log file is created each day (e.g., ankigen_20231027.log)
|
| 32 |
+
log_dir = os.path.join(os.path.expanduser("~"), ".ankigen", "logs")
|
| 33 |
+
os.makedirs(log_dir, exist_ok=True)
|
| 34 |
+
|
| 35 |
+
timestamp = datetime.now().strftime("%Y%m%d")
|
| 36 |
+
log_file = os.path.join(log_dir, f"{name}_{timestamp}.log")
|
| 37 |
+
|
| 38 |
+
file_handler = logging.FileHandler(log_file)
|
| 39 |
+
file_handler.setFormatter(formatter)
|
| 40 |
+
logger.addHandler(file_handler)
|
| 41 |
+
|
| 42 |
+
return logger
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# Create a default logger instance for easy import and use.
|
| 46 |
+
# Projects can also create their own named loggers using setup_logger(name="my_module_logger")
|
| 47 |
+
logger = setup_logger()
|
ankigen/models.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
|
| 4 |
+
# Module for Pydantic data models
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class Step(BaseModel):
|
| 8 |
+
explanation: str
|
| 9 |
+
output: str
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class Subtopics(BaseModel):
|
| 13 |
+
steps: List[Step]
|
| 14 |
+
result: List[str]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class Topics(BaseModel):
|
| 18 |
+
result: List[Subtopics]
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class CardFront(BaseModel):
|
| 22 |
+
question: Optional[str] = None
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class CardBack(BaseModel):
|
| 26 |
+
answer: Optional[str] = None
|
| 27 |
+
explanation: str
|
| 28 |
+
example: str
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class Card(BaseModel):
|
| 32 |
+
front: CardFront
|
| 33 |
+
back: CardBack
|
| 34 |
+
metadata: Optional[dict] = None
|
| 35 |
+
card_type: str = "basic" # Add card_type, default to basic
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class CardList(BaseModel):
|
| 39 |
+
topic: str
|
| 40 |
+
cards: List[Card]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class ConceptBreakdown(BaseModel):
|
| 44 |
+
main_concept: str
|
| 45 |
+
prerequisites: List[str]
|
| 46 |
+
learning_outcomes: List[str]
|
| 47 |
+
difficulty_level: str # "beginner", "intermediate", "advanced"
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class CardGeneration(BaseModel):
|
| 51 |
+
concept: str
|
| 52 |
+
thought_process: str
|
| 53 |
+
verification_steps: List[str]
|
| 54 |
+
card: Card
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class LearningSequence(BaseModel):
|
| 58 |
+
topic: str
|
| 59 |
+
concepts: List[ConceptBreakdown]
|
| 60 |
+
cards: List[CardGeneration]
|
| 61 |
+
suggested_study_order: List[str]
|
| 62 |
+
review_recommendations: List[str]
|
ankigen/ui_logic.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Module for functions that build or manage UI sections/logic
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from typing import List
|
| 6 |
+
|
| 7 |
+
from ankigen.utils import get_logger
|
| 8 |
+
from ankigen.models import Card
|
| 9 |
+
|
| 10 |
+
logger = get_logger()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def update_mode_visibility(mode: str, current_subject: str):
|
| 14 |
+
"""Updates visibility and values of UI elements based on generation mode.
|
| 15 |
+
|
| 16 |
+
Currently only 'subject' mode is supported. This function is kept for
|
| 17 |
+
future extensibility.
|
| 18 |
+
"""
|
| 19 |
+
# Define standard columns for empty DataFrames
|
| 20 |
+
main_output_df_columns = [
|
| 21 |
+
"Index",
|
| 22 |
+
"Topic",
|
| 23 |
+
"Card_Type",
|
| 24 |
+
"Question",
|
| 25 |
+
"Answer",
|
| 26 |
+
"Explanation",
|
| 27 |
+
"Example",
|
| 28 |
+
"Prerequisites",
|
| 29 |
+
"Learning_Outcomes",
|
| 30 |
+
"Difficulty",
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
return (
|
| 34 |
+
gr.update(visible=True), # subject_mode (Group) - always visible
|
| 35 |
+
gr.update(visible=True), # cards_output - always visible
|
| 36 |
+
gr.update(value=current_subject), # subject textbox value
|
| 37 |
+
gr.update(
|
| 38 |
+
value=pd.DataFrame(columns=main_output_df_columns)
|
| 39 |
+
), # output DataFrame
|
| 40 |
+
gr.update(
|
| 41 |
+
value="<div><b>Total Cards Generated:</b> <span id='total-cards-count'>0</span></div>",
|
| 42 |
+
visible=False,
|
| 43 |
+
), # total_cards_html
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# --- Card Preview and Editing Utilities ---
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def cards_to_dataframe(cards: List[Card]) -> pd.DataFrame:
|
| 51 |
+
"""Converts a list of Card objects to a Pandas DataFrame for UI display."""
|
| 52 |
+
data_for_df = []
|
| 53 |
+
for i, card in enumerate(cards):
|
| 54 |
+
# Extract tags from metadata if they exist
|
| 55 |
+
tags_list = card.metadata.get("tags", []) if card.metadata else []
|
| 56 |
+
tags_str = ", ".join(tags_list) if tags_list else ""
|
| 57 |
+
|
| 58 |
+
# Topic from metadata or a default
|
| 59 |
+
topic_str = card.metadata.get("topic", "N/A") if card.metadata else "N/A"
|
| 60 |
+
|
| 61 |
+
data_for_df.append(
|
| 62 |
+
{
|
| 63 |
+
"ID": i + 1, # 1-indexed ID for display
|
| 64 |
+
"Topic": topic_str,
|
| 65 |
+
"Front": card.front.question,
|
| 66 |
+
"Back": card.back.answer,
|
| 67 |
+
"Tags": tags_str,
|
| 68 |
+
"Card Type": card.card_type or "Basic",
|
| 69 |
+
"Explanation": card.back.explanation or "",
|
| 70 |
+
"Example": card.back.example or "",
|
| 71 |
+
"Source_URL": card.metadata.get("source_url", "")
|
| 72 |
+
if card.metadata
|
| 73 |
+
else "",
|
| 74 |
+
}
|
| 75 |
+
)
|
| 76 |
+
# Define all columns explicitly for consistent DataFrame structure
|
| 77 |
+
df_columns = [
|
| 78 |
+
"ID",
|
| 79 |
+
"Topic",
|
| 80 |
+
"Front",
|
| 81 |
+
"Back",
|
| 82 |
+
"Tags",
|
| 83 |
+
"Card Type",
|
| 84 |
+
"Explanation",
|
| 85 |
+
"Example",
|
| 86 |
+
"Source_URL",
|
| 87 |
+
]
|
| 88 |
+
df = pd.DataFrame(data_for_df, columns=df_columns)
|
| 89 |
+
return df
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def dataframe_to_cards(df: pd.DataFrame, original_cards: List[Card]) -> List[Card]:
|
| 93 |
+
"""
|
| 94 |
+
Updates a list of Card objects based on edits from a Pandas DataFrame.
|
| 95 |
+
Assumes the DataFrame 'ID' column corresponds to the 1-based index of original_cards.
|
| 96 |
+
"""
|
| 97 |
+
updated_cards: List[Card] = []
|
| 98 |
+
if df.empty and not original_cards:
|
| 99 |
+
return []
|
| 100 |
+
if df.empty and original_cards:
|
| 101 |
+
return []
|
| 102 |
+
|
| 103 |
+
for index, row in df.iterrows():
|
| 104 |
+
try:
|
| 105 |
+
card_id = int(row["ID"]) # DataFrame ID is 1-indexed
|
| 106 |
+
original_card_index = card_id - 1
|
| 107 |
+
|
| 108 |
+
if 0 <= original_card_index < len(original_cards):
|
| 109 |
+
card_to_update = original_cards[original_card_index]
|
| 110 |
+
|
| 111 |
+
new_front = card_to_update.front.copy(
|
| 112 |
+
update={
|
| 113 |
+
"question": str(row.get("Front", card_to_update.front.question))
|
| 114 |
+
}
|
| 115 |
+
)
|
| 116 |
+
new_back = card_to_update.back.copy(
|
| 117 |
+
update={
|
| 118 |
+
"answer": str(row.get("Back", card_to_update.back.answer)),
|
| 119 |
+
"explanation": str(
|
| 120 |
+
row.get("Explanation", card_to_update.back.explanation)
|
| 121 |
+
),
|
| 122 |
+
"example": str(row.get("Example", card_to_update.back.example)),
|
| 123 |
+
}
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
tags_str = str(
|
| 127 |
+
row.get(
|
| 128 |
+
"Tags",
|
| 129 |
+
",".join(
|
| 130 |
+
card_to_update.metadata.get("tags", [])
|
| 131 |
+
if card_to_update.metadata
|
| 132 |
+
else []
|
| 133 |
+
),
|
| 134 |
+
)
|
| 135 |
+
)
|
| 136 |
+
new_tags = [t.strip() for t in tags_str.split(",") if t.strip()]
|
| 137 |
+
|
| 138 |
+
new_metadata = (
|
| 139 |
+
card_to_update.metadata.copy() if card_to_update.metadata else {}
|
| 140 |
+
)
|
| 141 |
+
new_metadata["tags"] = new_tags
|
| 142 |
+
new_metadata["topic"] = str(
|
| 143 |
+
row.get("Topic", new_metadata.get("topic", "N/A"))
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
updated_card = card_to_update.copy(
|
| 147 |
+
update={
|
| 148 |
+
"front": new_front,
|
| 149 |
+
"back": new_back,
|
| 150 |
+
"card_type": str(
|
| 151 |
+
row.get("Card Type", card_to_update.card_type or "Basic")
|
| 152 |
+
),
|
| 153 |
+
"metadata": new_metadata,
|
| 154 |
+
}
|
| 155 |
+
)
|
| 156 |
+
updated_cards.append(updated_card)
|
| 157 |
+
else:
|
| 158 |
+
logger.warning(
|
| 159 |
+
f"Card ID {card_id} from DataFrame is out of bounds for original_cards list."
|
| 160 |
+
)
|
| 161 |
+
except (ValueError, KeyError, AttributeError) as e:
|
| 162 |
+
logger.error(
|
| 163 |
+
f"Error processing row {index} from DataFrame: {row}. Error: {e}"
|
| 164 |
+
)
|
| 165 |
+
if 0 <= original_card_index < len(original_cards):
|
| 166 |
+
updated_cards.append(original_cards[original_card_index])
|
| 167 |
+
continue
|
| 168 |
+
return updated_cards
|
ankigen/utils.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Module for utility functions (logging, caching, web fetching)
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
from logging.handlers import RotatingFileHandler
|
| 5 |
+
import sys
|
| 6 |
+
import hashlib
|
| 7 |
+
import requests
|
| 8 |
+
from bs4 import BeautifulSoup
|
| 9 |
+
from typing import Any, Optional
|
| 10 |
+
import time
|
| 11 |
+
|
| 12 |
+
# --- Logging Setup ---
|
| 13 |
+
_logger_instance = None
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def setup_logging() -> logging.Logger:
|
| 17 |
+
"""Configure logging to both file and console"""
|
| 18 |
+
global _logger_instance
|
| 19 |
+
if _logger_instance:
|
| 20 |
+
return _logger_instance
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger("ankigen")
|
| 23 |
+
logger.setLevel(logging.DEBUG) # Keep debug level for the root logger
|
| 24 |
+
|
| 25 |
+
# Prevent duplicate handlers if called multiple times (though get_logger should prevent this)
|
| 26 |
+
if logger.hasHandlers():
|
| 27 |
+
logger.handlers.clear()
|
| 28 |
+
|
| 29 |
+
detailed_formatter = logging.Formatter(
|
| 30 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s"
|
| 31 |
+
)
|
| 32 |
+
simple_formatter = logging.Formatter("%(levelname)s: %(message)s")
|
| 33 |
+
|
| 34 |
+
file_handler = RotatingFileHandler(
|
| 35 |
+
"ankigen.log", maxBytes=1024 * 1024, backupCount=5
|
| 36 |
+
)
|
| 37 |
+
file_handler.setLevel(logging.DEBUG) # File handler logs everything from DEBUG up
|
| 38 |
+
file_handler.setFormatter(detailed_formatter)
|
| 39 |
+
|
| 40 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
| 41 |
+
console_handler.setLevel(logging.INFO) # Console handler logs INFO and above
|
| 42 |
+
console_handler.setFormatter(simple_formatter)
|
| 43 |
+
|
| 44 |
+
logger.addHandler(file_handler)
|
| 45 |
+
logger.addHandler(console_handler)
|
| 46 |
+
|
| 47 |
+
_logger_instance = logger
|
| 48 |
+
return logger
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def get_logger() -> logging.Logger:
|
| 52 |
+
"""Returns the initialized logger instance."""
|
| 53 |
+
if _logger_instance is None:
|
| 54 |
+
return setup_logging()
|
| 55 |
+
return _logger_instance
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Initialize logger when module is loaded
|
| 59 |
+
logger = get_logger()
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# --- Caching ---
|
| 63 |
+
class ResponseCache:
|
| 64 |
+
"""Simple and efficient LRU cache for API responses with proper eviction."""
|
| 65 |
+
|
| 66 |
+
def __init__(self, maxsize: int = 128):
|
| 67 |
+
self.maxsize = maxsize
|
| 68 |
+
self._cache = {} # {key: response}
|
| 69 |
+
self._access_order = [] # Track access order for LRU eviction
|
| 70 |
+
self.hits = 0
|
| 71 |
+
self.misses = 0
|
| 72 |
+
|
| 73 |
+
def get(self, prompt: str, model: str) -> Optional[Any]:
|
| 74 |
+
"""Retrieve item from cache, updating LRU order."""
|
| 75 |
+
cache_key = self._create_key(prompt, model)
|
| 76 |
+
|
| 77 |
+
if cache_key in self._cache:
|
| 78 |
+
# Move to end (most recently used)
|
| 79 |
+
self._access_order.remove(cache_key)
|
| 80 |
+
self._access_order.append(cache_key)
|
| 81 |
+
self.hits += 1
|
| 82 |
+
logger.debug(
|
| 83 |
+
f"Cache HIT: {cache_key[:16]}... (hits={self.hits}, misses={self.misses})"
|
| 84 |
+
)
|
| 85 |
+
return self._cache[cache_key]
|
| 86 |
+
|
| 87 |
+
self.misses += 1
|
| 88 |
+
logger.debug(
|
| 89 |
+
f"Cache MISS: {cache_key[:16]}... (hits={self.hits}, misses={self.misses})"
|
| 90 |
+
)
|
| 91 |
+
return None
|
| 92 |
+
|
| 93 |
+
def set(self, prompt: str, model: str, response: Any):
|
| 94 |
+
"""Store item in cache with LRU eviction when full."""
|
| 95 |
+
cache_key = self._create_key(prompt, model)
|
| 96 |
+
|
| 97 |
+
# If key exists, update and move to end
|
| 98 |
+
if cache_key in self._cache:
|
| 99 |
+
self._access_order.remove(cache_key)
|
| 100 |
+
# If cache is full, evict least recently used
|
| 101 |
+
elif len(self._cache) >= self.maxsize:
|
| 102 |
+
evicted_key = self._access_order.pop(0)
|
| 103 |
+
del self._cache[evicted_key]
|
| 104 |
+
logger.debug(
|
| 105 |
+
f"Cache EVICT: {evicted_key[:16]}... (size={len(self._cache)})"
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
self._cache[cache_key] = response
|
| 109 |
+
self._access_order.append(cache_key)
|
| 110 |
+
logger.debug(f"Cache SET: {cache_key[:16]}... (size={len(self._cache)})")
|
| 111 |
+
|
| 112 |
+
def clear(self) -> None:
|
| 113 |
+
"""Clear all cache entries and statistics."""
|
| 114 |
+
self._cache.clear()
|
| 115 |
+
self._access_order.clear()
|
| 116 |
+
self.hits = 0
|
| 117 |
+
self.misses = 0
|
| 118 |
+
logger.debug("Cache CLEARED")
|
| 119 |
+
|
| 120 |
+
def _create_key(self, prompt: str, model: str) -> str:
|
| 121 |
+
"""Create cache key from prompt and model (MD5 hash for size efficiency)."""
|
| 122 |
+
# Hash to keep keys manageable size while maintaining uniqueness
|
| 123 |
+
return hashlib.md5(f"{model}:{prompt}".encode("utf-8")).hexdigest()
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# --- Web Content Fetching ---
|
| 127 |
+
def fetch_webpage_text(url: str) -> str:
|
| 128 |
+
"""Fetches and extracts main text content from a URL."""
|
| 129 |
+
logger_util = get_logger() # Use the logger from this module
|
| 130 |
+
try:
|
| 131 |
+
logger_util.info(f"Fetching content from URL: {url}")
|
| 132 |
+
headers = {
|
| 133 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
| 134 |
+
}
|
| 135 |
+
response = requests.get(url, headers=headers, timeout=15)
|
| 136 |
+
response.raise_for_status()
|
| 137 |
+
|
| 138 |
+
logger_util.debug(f"Parsing HTML content for {url}")
|
| 139 |
+
try:
|
| 140 |
+
soup = BeautifulSoup(response.text, "lxml")
|
| 141 |
+
except ImportError: # Keep existing fallback
|
| 142 |
+
logger_util.warning("lxml not found, using html.parser instead.")
|
| 143 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 144 |
+
except Exception as e: # Catch other BeautifulSoup init errors
|
| 145 |
+
logger_util.error(
|
| 146 |
+
f"BeautifulSoup initialization failed for {url}: {e}", exc_info=True
|
| 147 |
+
)
|
| 148 |
+
raise RuntimeError(f"Failed to parse HTML content for {url}.")
|
| 149 |
+
|
| 150 |
+
for script_or_style in soup(["script", "style"]):
|
| 151 |
+
script_or_style.extract()
|
| 152 |
+
|
| 153 |
+
main_content = soup.find("main")
|
| 154 |
+
if not main_content:
|
| 155 |
+
main_content = soup.find("article")
|
| 156 |
+
|
| 157 |
+
if main_content:
|
| 158 |
+
text = main_content.get_text()
|
| 159 |
+
logger_util.debug(f"Extracted text from <{main_content.name}> tag.")
|
| 160 |
+
else:
|
| 161 |
+
body = soup.find("body")
|
| 162 |
+
if body:
|
| 163 |
+
text = body.get_text()
|
| 164 |
+
logger_util.debug("Extracted text from <body> tag (fallback).")
|
| 165 |
+
else:
|
| 166 |
+
text = ""
|
| 167 |
+
logger_util.warning(f"Could not find <body> tag in {url}")
|
| 168 |
+
|
| 169 |
+
# Simpler text cleaning: join stripped lines
|
| 170 |
+
lines = (line.strip() for line in text.splitlines())
|
| 171 |
+
cleaned_text = "\n".join(line for line in lines if line)
|
| 172 |
+
|
| 173 |
+
if not cleaned_text:
|
| 174 |
+
logger_util.warning(f"Could not extract meaningful text from {url}")
|
| 175 |
+
return ""
|
| 176 |
+
|
| 177 |
+
logger_util.info(
|
| 178 |
+
f"Successfully extracted text from {url} (Length: {len(cleaned_text)} chars)"
|
| 179 |
+
)
|
| 180 |
+
return cleaned_text
|
| 181 |
+
|
| 182 |
+
except requests.exceptions.RequestException as e:
|
| 183 |
+
logger_util.error(f"Network error fetching URL {url}: {e}", exc_info=True)
|
| 184 |
+
raise ConnectionError(f"Could not fetch URL: {e}")
|
| 185 |
+
except Exception as e:
|
| 186 |
+
logger_util.error(f"Error processing URL {url}: {e}", exc_info=True)
|
| 187 |
+
if isinstance(e, (ValueError, ConnectionError, RuntimeError)):
|
| 188 |
+
raise e
|
| 189 |
+
else:
|
| 190 |
+
raise RuntimeError(
|
| 191 |
+
f"An unexpected error occurred while processing the URL: {e}"
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
# --- New Synchronous RateLimiter Class ---
|
| 196 |
+
class RateLimiter:
|
| 197 |
+
"""A simple synchronous rate limiter."""
|
| 198 |
+
|
| 199 |
+
def __init__(self, requests_per_second: float):
|
| 200 |
+
if requests_per_second <= 0:
|
| 201 |
+
raise ValueError("Requests per second must be positive.")
|
| 202 |
+
self.min_interval_seconds: float = 1.0 / requests_per_second
|
| 203 |
+
self.last_request_timestamp: float = 0.0
|
| 204 |
+
# Use a lock if this were to be used by multiple threads, but for now assuming single thread access per instance
|
| 205 |
+
|
| 206 |
+
def wait(self) -> None:
|
| 207 |
+
"""Blocks until it's safe to make the next request."""
|
| 208 |
+
current_time = time.monotonic() # Use monotonic clock for intervals
|
| 209 |
+
time_since_last_request = current_time - self.last_request_timestamp
|
| 210 |
+
|
| 211 |
+
if time_since_last_request < self.min_interval_seconds:
|
| 212 |
+
wait_duration = self.min_interval_seconds - time_since_last_request
|
| 213 |
+
# logger.debug(f"RateLimiter waiting for {wait_duration:.3f} seconds.") # Optional: add logging
|
| 214 |
+
time.sleep(wait_duration)
|
| 215 |
+
|
| 216 |
+
self.last_request_timestamp = time.monotonic()
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
# --- Existing Utility Functions (if any) ---
|
| 220 |
+
# def some_other_util_function():
|
| 221 |
+
# pass
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def strip_html_tags(text: str) -> str:
|
| 225 |
+
"""Removes HTML tags from a string using a safe, non-regex approach."""
|
| 226 |
+
if not isinstance(text, str):
|
| 227 |
+
return str(text) # Ensure it's a string, or return as is if not coercible
|
| 228 |
+
|
| 229 |
+
# Use BeautifulSoup for safe HTML parsing
|
| 230 |
+
soup = BeautifulSoup(text, "html.parser")
|
| 231 |
+
return soup.get_text().strip()
|
app.py
CHANGED
|
@@ -7,23 +7,23 @@ from datetime import datetime
|
|
| 7 |
import gradio as gr
|
| 8 |
import pandas as pd
|
| 9 |
|
| 10 |
-
from
|
| 11 |
AVAILABLE_MODELS,
|
| 12 |
orchestrate_card_generation,
|
| 13 |
) # GENERATION_MODES is internal to card_generator
|
| 14 |
-
from
|
| 15 |
export_dataframe_to_apkg,
|
| 16 |
export_dataframe_to_csv,
|
| 17 |
) # Anki models (BASIC_MODEL, CLOZE_MODEL) are internal to exporters
|
| 18 |
-
from
|
| 19 |
OpenAIClientManager,
|
| 20 |
) # structured_output_completion is internal to core modules
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
ResponseCache,
|
| 24 |
get_logger,
|
| 25 |
) # fetch_webpage_text is used by card_generator
|
| 26 |
-
from
|
| 27 |
|
| 28 |
# --- Initialization ---
|
| 29 |
logger = get_logger()
|
|
@@ -341,7 +341,7 @@ def create_ankigen_interface(theme=None, css=None, js=None):
|
|
| 341 |
label="Download Deck", visible=False
|
| 342 |
)
|
| 343 |
|
| 344 |
-
# --- Event Handlers --- (Updated to use functions from
|
| 345 |
generation_mode.change(
|
| 346 |
fn=update_mode_visibility,
|
| 347 |
inputs=[
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
import pandas as pd
|
| 9 |
|
| 10 |
+
from ankigen.card_generator import (
|
| 11 |
AVAILABLE_MODELS,
|
| 12 |
orchestrate_card_generation,
|
| 13 |
) # GENERATION_MODES is internal to card_generator
|
| 14 |
+
from ankigen.exporters import (
|
| 15 |
export_dataframe_to_apkg,
|
| 16 |
export_dataframe_to_csv,
|
| 17 |
) # Anki models (BASIC_MODEL, CLOZE_MODEL) are internal to exporters
|
| 18 |
+
from ankigen.llm_interface import (
|
| 19 |
OpenAIClientManager,
|
| 20 |
) # structured_output_completion is internal to core modules
|
| 21 |
+
from ankigen.ui_logic import update_mode_visibility
|
| 22 |
+
from ankigen.utils import (
|
| 23 |
ResponseCache,
|
| 24 |
get_logger,
|
| 25 |
) # fetch_webpage_text is used by card_generator
|
| 26 |
+
from ankigen.auto_config import AutoConfigService
|
| 27 |
|
| 28 |
# --- Initialization ---
|
| 29 |
logger = get_logger()
|
|
|
|
| 341 |
label="Download Deck", visible=False
|
| 342 |
)
|
| 343 |
|
| 344 |
+
# --- Event Handlers --- (Updated to use functions from ankigen)
|
| 345 |
generation_mode.change(
|
| 346 |
fn=update_mode_visibility,
|
| 347 |
inputs=[
|
pyproject.toml
CHANGED
|
@@ -49,10 +49,13 @@ cli = [
|
|
| 49 |
]
|
| 50 |
|
| 51 |
[project.scripts]
|
| 52 |
-
ankigen = "
|
| 53 |
|
| 54 |
[tool.setuptools]
|
| 55 |
py-modules = ["app"]
|
| 56 |
|
|
|
|
|
|
|
|
|
|
| 57 |
[tool.pytest.ini_options]
|
| 58 |
anyio_backend = "asyncio"
|
|
|
|
| 49 |
]
|
| 50 |
|
| 51 |
[project.scripts]
|
| 52 |
+
ankigen = "ankigen.cli:main"
|
| 53 |
|
| 54 |
[tool.setuptools]
|
| 55 |
py-modules = ["app"]
|
| 56 |
|
| 57 |
+
[tool.setuptools.packages.find]
|
| 58 |
+
include = ["ankigen*"]
|
| 59 |
+
|
| 60 |
[tool.pytest.ini_options]
|
| 61 |
anyio_backend = "asyncio"
|