Spaces:

brickfrog
/

ankigen

Build error

App Files Files Community

brickfrog commited on Mar 12

Commit

f5605ad

verified ·

1 Parent(s): f403842

Upload folder using huggingface_hub

Browse files

Files changed (26) hide show

README.md +6 -6
ankigen/__init__.py +1 -0
ankigen/agents/.env.example +199 -0
ankigen/agents/__init__.py +12 -0
ankigen/agents/base.py +257 -0
ankigen/agents/config.py +260 -0
ankigen/agents/generators.py +405 -0
ankigen/agents/integration.py +287 -0
ankigen/agents/performance.py +475 -0
ankigen/agents/schemas.py +189 -0
ankigen/agents/security.py +397 -0
ankigen/agents/templates/generators.j2 +16 -0
ankigen/agents/token_tracker.py +221 -0
ankigen/auto_config.py +211 -0
ankigen/card_generator.py +311 -0
ankigen/cli.py +372 -0
ankigen/context7.py +339 -0
ankigen/exceptions.py +86 -0
ankigen/exporters.py +943 -0
ankigen/llm_interface.py +337 -0
ankigen/logging.py +47 -0
ankigen/models.py +62 -0
ankigen/ui_logic.py +168 -0
ankigen/utils.py +231 -0
app.py +7 -7
pyproject.toml +4 -1

README.md CHANGED Viewed

@@ -56,21 +56,21 @@ Generate flashcards directly from your terminal with intelligent auto-configurat
 ```bash
 # Quick generation (auto-detects best settings)
-uv run python -m ankigen_core.cli -p "Basic SQL"
 # Custom settings
-uv run python -m ankigen_core.cli -p "React Hooks" \
   --topics 5 \
   --cards-per-topic 8 \
   --output hooks.apkg
 # Export to CSV
-uv run python -m ankigen_core.cli -p "Docker basics" \
   --format csv \
   -o docker.csv
 # Skip confirmation prompt
-uv run python -m ankigen_core.cli -p "Python Lists" --no-confirm
 ```
 **CLI Options:**
@@ -100,7 +100,7 @@ uv run python -m ankigen_core.cli -p "Python Lists" --no-confirm
 ## Project Structure
 - `app.py`: Main Gradio web application
-- `ankigen_core/`: Core logic modules
   - `cli.py`: Command-line interface
   - `agents/`: Agent system implementation
   - `card_generator.py`: Card generation orchestration
@@ -123,7 +123,7 @@ uv run python -m ankigen_core.cli -p "Python Lists" --no-confirm
 3. Run with coverage:
    ```bash
-   uv run pytest --cov=ankigen_core tests/
    ```
 ## License

 ```bash
 # Quick generation (auto-detects best settings)
+uv run python -m ankigen.cli -p "Basic SQL"
 # Custom settings
+uv run python -m ankigen.cli -p "React Hooks" \
   --topics 5 \
   --cards-per-topic 8 \
   --output hooks.apkg
 # Export to CSV
+uv run python -m ankigen.cli -p "Docker basics" \
   --format csv \
   -o docker.csv
 # Skip confirmation prompt
+uv run python -m ankigen.cli -p "Python Lists" --no-confirm
 ```
 **CLI Options:**
 ## Project Structure
 - `app.py`: Main Gradio web application
+- `ankigen/`: Core logic modules
   - `cli.py`: Command-line interface
   - `agents/`: Agent system implementation
   - `card_generator.py`: Card generation orchestration
 3. Run with coverage:
    ```bash
+   uv run pytest --cov=ankigen tests/
    ```
 ## License

ankigen/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This file marks ankigen as a Python package

ankigen/agents/.env.example ADDED Viewed

	@@ -0,0 +1,199 @@

+# AnkiGen Agent System Configuration
+# Copy this file to .env and modify as needed
+# =====================================
+# AGENT OPERATING MODE
+# =====================================
+# Main operating mode: legacy, agent_only, hybrid, a_b_test
+ANKIGEN_AGENT_MODE=hybrid
+# A/B testing configuration (only used when mode=a_b_test)
+ANKIGEN_AB_TEST_RATIO=0.5
+ANKIGEN_AB_TEST_USER_HASH=
+# =====================================
+# GENERATION AGENTS
+# =====================================
+# Subject Expert Agent - domain-specific card generation
+ANKIGEN_ENABLE_SUBJECT_EXPERT=true
+# Pedagogical Agent - educational effectiveness review
+ANKIGEN_ENABLE_PEDAGOGICAL_AGENT=false
+# Content Structuring Agent - formatting and organization
+ANKIGEN_ENABLE_CONTENT_STRUCTURING=false
+# Generation Coordinator - orchestrates multi-agent workflows
+ANKIGEN_ENABLE_GENERATION_COORDINATOR=false
+# =====================================
+# JUDGE AGENTS
+# =====================================
+# Content Accuracy Judge - fact-checking and accuracy
+ANKIGEN_ENABLE_CONTENT_JUDGE=true
+# Pedagogical Judge - educational effectiveness
+ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE=false
+# Clarity Judge - communication and readability
+ANKIGEN_ENABLE_CLARITY_JUDGE=false
+# Technical Judge - code and technical content
+ANKIGEN_ENABLE_TECHNICAL_JUDGE=false
+# Completeness Judge - quality standards and completeness
+ANKIGEN_ENABLE_COMPLETENESS_JUDGE=false
+# Judge Coordinator - orchestrates multi-judge workflows
+ANKIGEN_ENABLE_JUDGE_COORDINATOR=false
+# =====================================
+# ENHANCEMENT AGENTS
+# =====================================
+# Revision Agent - improves rejected cards
+ANKIGEN_ENABLE_REVISION_AGENT=false
+# Enhancement Agent - enriches content and metadata
+ANKIGEN_ENABLE_ENHANCEMENT_AGENT=false
+# =====================================
+# WORKFLOW FEATURES
+# =====================================
+# Multi-agent generation workflows
+ANKIGEN_ENABLE_MULTI_AGENT_GEN=false
+# Parallel judge execution
+ANKIGEN_ENABLE_PARALLEL_JUDGING=true
+# Agent handoff capabilities
+ANKIGEN_ENABLE_AGENT_HANDOFFS=false
+# Agent tracing and debugging
+ANKIGEN_ENABLE_AGENT_TRACING=true
+# =====================================
+# PERFORMANCE SETTINGS
+# =====================================
+# Agent execution timeout (seconds)
+ANKIGEN_AGENT_TIMEOUT=30.0
+# Maximum retry attempts for failed agents
+ANKIGEN_MAX_AGENT_RETRIES=3
+# Enable response caching for efficiency
+ANKIGEN_ENABLE_AGENT_CACHING=true
+# =====================================
+# QUALITY CONTROL
+# =====================================
+# Minimum judge consensus for card approval (0.0-1.0)
+ANKIGEN_MIN_JUDGE_CONSENSUS=0.6
+# Maximum revision iterations for rejected cards
+ANKIGEN_MAX_REVISION_ITERATIONS=3
+# =====================================
+# PRESET CONFIGURATIONS
+# =====================================
+# Uncomment one of these preset configurations:
+# MINIMAL SETUP - Single subject expert + content judge
+# ANKIGEN_AGENT_MODE=hybrid
+# ANKIGEN_ENABLE_SUBJECT_EXPERT=true
+# ANKIGEN_ENABLE_CONTENT_JUDGE=true
+# ANKIGEN_ENABLE_AGENT_TRACING=true
+# QUALITY FOCUSED - Full judge pipeline
+# ANKIGEN_AGENT_MODE=hybrid
+# ANKIGEN_ENABLE_SUBJECT_EXPERT=true
+# ANKIGEN_ENABLE_CONTENT_JUDGE=true
+# ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE=true
+# ANKIGEN_ENABLE_CLARITY_JUDGE=true
+# ANKIGEN_ENABLE_COMPLETENESS_JUDGE=true
+# ANKIGEN_ENABLE_JUDGE_COORDINATOR=true
+# ANKIGEN_ENABLE_PARALLEL_JUDGING=true
+# ANKIGEN_MIN_JUDGE_CONSENSUS=0.7
+# FULL PIPELINE - All agents enabled
+# ANKIGEN_AGENT_MODE=agent_only
+# ANKIGEN_ENABLE_SUBJECT_EXPERT=true
+# ANKIGEN_ENABLE_PEDAGOGICAL_AGENT=true
+# ANKIGEN_ENABLE_CONTENT_STRUCTURING=true
+# ANKIGEN_ENABLE_GENERATION_COORDINATOR=true
+# ANKIGEN_ENABLE_CONTENT_JUDGE=true
+# ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE=true
+# ANKIGEN_ENABLE_CLARITY_JUDGE=true
+# ANKIGEN_ENABLE_TECHNICAL_JUDGE=true
+# ANKIGEN_ENABLE_COMPLETENESS_JUDGE=true
+# ANKIGEN_ENABLE_JUDGE_COORDINATOR=true
+# ANKIGEN_ENABLE_REVISION_AGENT=true
+# ANKIGEN_ENABLE_ENHANCEMENT_AGENT=true
+# ANKIGEN_ENABLE_PARALLEL_JUDGING=true
+# ANKIGEN_ENABLE_AGENT_HANDOFFS=true
+# A/B TESTING SETUP - Compare agents vs legacy
+# ANKIGEN_AGENT_MODE=a_b_test
+# ANKIGEN_AB_TEST_RATIO=0.5
+# ANKIGEN_ENABLE_SUBJECT_EXPERT=true
+# ANKIGEN_ENABLE_CONTENT_JUDGE=true
+# ANKIGEN_ENABLE_AGENT_TRACING=true
+# =====================================
+# MONITORING & DEBUGGING
+# =====================================
+# Agent metrics persistence directory
+# ANKIGEN_METRICS_DIR=metrics/agents
+# Agent configuration directory
+# ANKIGEN_CONFIG_DIR=config/agents
+# Enable detailed debug logging
+# ANKIGEN_DEBUG_MODE=false
+# =====================================
+# COST OPTIMIZATION
+# =====================================
+# Model preferences for different agent types
+# ANKIGEN_GENERATION_MODEL=gpt-4o
+# ANKIGEN_JUDGE_MODEL=gpt-4o-mini
+# ANKIGEN_CRITICAL_JUDGE_MODEL=gpt-4o
+# Token usage limits per request
+# ANKIGEN_MAX_INPUT_TOKENS=4000
+# ANKIGEN_MAX_OUTPUT_TOKENS=2000
+# =====================================
+# NOTES
+# =====================================
+# Performance Impact:
+# - Each enabled agent adds processing time and cost
+# - Parallel judging reduces latency but increases concurrent API calls
+# - Caching significantly improves performance for similar requests
+# Quality vs Speed:
+# - More judges = better quality but slower generation
+# - Agent coordination adds overhead but improves consistency
+# - Enhancement agents provide best quality but highest cost
+# Recommended Starting Configuration:
+# 1. Start with hybrid mode + subject expert + content judge
+# 2. Enable A/B testing to compare with legacy system
+# 3. Gradually add more agents based on quality needs
+# 4. Monitor metrics and adjust consensus thresholds
+# Cost Considerations:
+# - Subject Expert: ~2-3x cost of legacy (higher quality)
+# - Judge Pipeline: ~1.5-2x additional cost (significant quality improvement)
+# - Enhancement Pipeline: ~1.2-1.5x additional cost (marginal improvement)
+# - Full pipeline: ~4-6x cost of legacy (maximum quality)

ankigen/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Agent system for AnkiGen agentic workflows
+from .base import BaseAgentWrapper, AgentConfig
+from .generators import SubjectExpertAgent
+from .config import AgentConfigManager
+__all__ = [
+    "BaseAgentWrapper",
+    "AgentConfig",
+    "SubjectExpertAgent",
+    "AgentConfigManager",
+]

ankigen/agents/base.py ADDED Viewed

	@@ -0,0 +1,257 @@

+# Base agent wrapper and configuration classes
+from typing import Dict, Any, Optional, List
+from dataclasses import dataclass
+from pydantic import BaseModel
+import asyncio
+import json
+from openai import AsyncOpenAI
+from agents import Agent, Runner, ModelSettings
+from ankigen.logging import logger
+from .token_tracker import track_usage_from_agents_sdk
+def parse_agent_json_response(response: Any) -> Dict[str, Any]:
+    """Parse agent response, handling markdown code blocks if present"""
+    if isinstance(response, str):
+        # Strip markdown code blocks
+        response = response.strip()
+        if response.startswith("```json"):
+            response = response[7:]  # Remove ```json
+        if response.startswith("```"):
+            response = response[3:]  # Remove ```
+        if response.endswith("```"):
+            response = response[:-3]  # Remove trailing ```
+        response = response.strip()
+        return json.loads(response)
+    else:
+        return response
+@dataclass
+class AgentConfig:
+    """Configuration for individual agents"""
+    name: str
+    instructions: str
+    model: str = "gpt-5.2"
+    reasoning_effort: Optional[str] = None
+    temperature: float = 0.7
+    max_tokens: Optional[int] = None
+    timeout: float = 30.0
+    retry_attempts: int = 3
+    enable_tracing: bool = True
+    custom_prompts: Optional[Dict[str, str]] = None
+    output_type: Optional[type] = None  # For structured outputs
+    def __post_init__(self):
+        if self.custom_prompts is None:
+            self.custom_prompts = {}
+class BaseAgentWrapper:
+    """Base wrapper for OpenAI Agents SDK integration"""
+    def __init__(self, config: AgentConfig, openai_client: AsyncOpenAI):
+        self.config = config
+        self.openai_client = openai_client
+        self.agent = None
+        self.runner = None
+    async def initialize(self):
+        """Initialize the OpenAI agent with structured output support"""
+        try:
+            # Set the default OpenAI client for the agents SDK
+            from agents import set_default_openai_client
+            set_default_openai_client(self.openai_client, use_for_tracing=False)
+            # Create model settings with temperature and optional reasoning effort
+            model_settings_kwargs = {"temperature": self.config.temperature}
+            effort = self.config.reasoning_effort
+            if effort in ("auto", "", None):
+                effort = None
+            # GPT-5.x (not chat-latest) supports reasoning_effort
+            if (
+                effort
+                and self.config.model.startswith("gpt-5")
+                and "chat-latest" not in self.config.model
+            ):
+                from openai.types.shared import Reasoning
+                model_settings_kwargs["reasoning"] = Reasoning(effort=effort)
+            model_settings = ModelSettings(**model_settings_kwargs)
+            # Use clean instructions without JSON formatting hacks
+            clean_instructions = self.config.instructions
+            # Create agent with structured output if output_type is provided
+            if self.config.output_type:
+                self.agent = Agent(
+                    name=self.config.name,
+                    instructions=clean_instructions,
+                    model=self.config.model,
+                    model_settings=model_settings,
+                    output_type=self.config.output_type,
+                )
+                logger.info(
+                    f"Initialized agent with structured output: {self.config.name} -> {self.config.output_type}"
+                )
+            else:
+                self.agent = Agent(
+                    name=self.config.name,
+                    instructions=clean_instructions,
+                    model=self.config.model,
+                    model_settings=model_settings,
+                )
+                logger.info(
+                    f"Initialized agent (no structured output): {self.config.name}"
+                )
+        except Exception as e:
+            logger.error(f"Failed to initialize agent {self.config.name}: {e}")
+            raise
+    def _enhance_input_with_context(
+        self, user_input: str, context: Optional[Dict[str, Any]]
+    ) -> str:
+        """Add context to user input if provided."""
+        if context is None:
+            return user_input
+        context_str = "\n".join([f"{k}: {v}" for k, v in context.items()])
+        return f"{user_input}\n\nContext:\n{context_str}"
+    async def _execute_with_retry(self, enhanced_input: str) -> Any:
+        """Execute agent with retry logic on timeout."""
+        for attempt in range(self.config.retry_attempts):
+            try:
+                result = await asyncio.wait_for(
+                    Runner.run(
+                        starting_agent=self.agent,
+                        input=enhanced_input,
+                    ),
+                    timeout=self.config.timeout,
+                )
+                return result
+            except asyncio.TimeoutError:
+                if attempt < self.config.retry_attempts - 1:
+                    logger.warning(
+                        f"Agent {self.config.name} timed out "
+                        f"(attempt {attempt + 1}/{self.config.retry_attempts}), retrying..."
+                    )
+                    continue
+                logger.error(
+                    f"Agent {self.config.name} timed out after {self.config.retry_attempts} attempts"
+                )
+                raise
+        raise RuntimeError("Retry loop exited without result")
+    def _extract_and_track_usage(self, result: Any) -> Dict[str, Any]:
+        """Extract usage info from result and track it."""
+        total_usage = {
+            "input_tokens": 0,
+            "output_tokens": 0,
+            "total_tokens": 0,
+            "requests": 0,
+        }
+        if hasattr(result, "raw_responses") and result.raw_responses:
+            for response in result.raw_responses:
+                if hasattr(response, "usage") and response.usage:
+                    total_usage["input_tokens"] += response.usage.input_tokens
+                    total_usage["output_tokens"] += response.usage.output_tokens
+                    total_usage["total_tokens"] += response.usage.total_tokens
+                    total_usage["requests"] += response.usage.requests
+            track_usage_from_agents_sdk(total_usage, self.config.model)
+            logger.info(f"Agent usage: {total_usage}")
+        return total_usage
+    def _extract_output(self, result: Any) -> Any:
+        """Extract final output from agent result."""
+        if not (hasattr(result, "new_items") and result.new_items):
+            return str(result)
+        from agents.items import ItemHelpers
+        text_output = ItemHelpers.text_message_outputs(result.new_items)
+        if self.config.output_type and self.config.output_type is not str:
+            logger.info(
+                f"Structured output: {type(text_output)} -> {self.config.output_type}"
+            )
+        return text_output
+    async def execute(
+        self, user_input: str, context: Optional[Dict[str, Any]] = None
+    ) -> tuple[Any, Dict[str, Any]]:
+        """Execute the agent with user input and optional context."""
+        if not self.agent:
+            await self.initialize()
+        if self.agent is None:
+            raise ValueError("Agent not initialized")
+        enhanced_input = self._enhance_input_with_context(user_input, context)
+        logger.info(f"Executing agent: {self.config.name}")
+        logger.info(f"Input: {enhanced_input[:200]}...")
+        import time
+        start_time = time.time()
+        try:
+            result = await self._execute_with_retry(enhanced_input)
+            execution_time = time.time() - start_time
+            logger.info(f"Agent {self.config.name} executed in {execution_time:.2f}s")
+            total_usage = self._extract_and_track_usage(result)
+            output = self._extract_output(result)
+            return output, total_usage
+        except asyncio.TimeoutError:
+            logger.error(
+                f"Agent {self.config.name} timed out after {self.config.timeout}s"
+            )
+            raise
+        except Exception as e:
+            logger.error(f"Agent {self.config.name} execution failed: {e}")
+            raise
+    async def handoff_to(
+        self, target_agent: "BaseAgentWrapper", context: Dict[str, Any]
+    ) -> Any:
+        """Hand off execution to another agent with context"""
+        logger.info(
+            f"Handing off from {self.config.name} to {target_agent.config.name}"
+        )
+        # Prepare handoff context
+        handoff_context = {
+            "from_agent": self.config.name,
+            "handoff_reason": context.get("reason", "Standard workflow handoff"),
+            **context,
+        }
+        # Execute the target agent
+        return await target_agent.execute(
+            context.get("user_input", "Continue processing"), handoff_context
+        )
+class AgentResponse(BaseModel):
+    """Standard response format for agents"""
+    success: bool
+    data: Any
+    agent_name: str
+    metadata: Dict[str, Any] = {}
+    errors: List[str] = []

ankigen/agents/config.py ADDED Viewed

	@@ -0,0 +1,260 @@

+# Agent configuration management system
+import json
+from typing import Dict, Any, Optional, List
+from pathlib import Path
+from dataclasses import dataclass, asdict
+from jinja2 import Environment, FileSystemLoader
+from ankigen.logging import logger
+from .base import AgentConfig
+@dataclass
+class AgentPromptTemplate:
+    """Template for agent prompts with variables"""
+    system_prompt: str
+    user_prompt_template: str
+    variables: Optional[Dict[str, str]] = None
+    def __post_init__(self):
+        if self.variables is None:
+            self.variables = {}
+    def render_system_prompt(self, **kwargs) -> str:
+        """Render system prompt with provided variables"""
+        try:
+            variables = self.variables or {}
+            return self.system_prompt.format(**{**variables, **kwargs})
+        except KeyError as e:
+            logger.error(f"Missing variable in system prompt template: {e}")
+            return self.system_prompt
+    def render_user_prompt(self, **kwargs) -> str:
+        """Render user prompt template with provided variables"""
+        try:
+            variables = self.variables or {}
+            return self.user_prompt_template.format(**{**variables, **kwargs})
+        except KeyError as e:
+            logger.error(f"Missing variable in user prompt template: {e}")
+            return self.user_prompt_template
+class AgentConfigManager:
+    """Manages agent configurations using Jinja templates and runtime updates"""
+    def __init__(
+        self,
+        model_overrides: Optional[Dict[str, str]] = None,
+        template_vars: Optional[Dict[str, Any]] = None,
+    ):
+        self.model_overrides = model_overrides or {}
+        self.template_vars = template_vars or {}
+        self.configs: Dict[str, AgentConfig] = {}
+        self.prompt_templates: Dict[str, AgentPromptTemplate] = {}
+        template_dir = Path(__file__).parent / "templates"
+        self.jinja_env = Environment(loader=FileSystemLoader(template_dir))
+        self._load_default_configs()
+    def update_models(self, model_overrides: Dict[str, str]):
+        """Update model selections and regenerate configs"""
+        self.model_overrides = model_overrides
+        self._load_default_configs()
+        logger.info(f"Updated model overrides: {model_overrides}")
+    def update_template_vars(self, template_vars: Dict[str, Any]):
+        logger.info(
+            "Template vars are no longer used in the simplified agent pipeline."
+        )
+    def _load_default_configs(self):
+        """Load all default configurations from Jinja templates"""
+        try:
+            self._load_configs_from_template("generators.j2")
+            self.prompt_templates.clear()
+            logger.info(
+                f"Loaded {len(self.configs)} agent configurations from Jinja templates"
+            )
+        except Exception as e:
+            logger.error(f"Failed to load agent configurations from templates: {e}")
+    def _get_model_for_agent(self, agent_name: str, default_model: str) -> str:
+        """Get model for agent, using override if available"""
+        return self.model_overrides.get(agent_name, default_model)
+    def _load_configs_from_template(self, template_name: str):
+        """Load agent configurations from a Jinja template"""
+        try:
+            template = self.jinja_env.get_template(template_name)
+            # Default models for each agent type
+            default_models = {
+                "subject_expert_model": "gpt-5.2",
+            }
+            # Simple mapping: agent_name -> agent_name_model
+            model_vars = {}
+            for agent_name, model in self.model_overrides.items():
+                model_vars[f"{agent_name}_model"] = model
+            # Merge all template variables with defaults
+            render_vars = {**default_models, **self.template_vars, **model_vars}
+            logger.info(f"Rendering template {template_name} with vars: {render_vars}")
+            rendered_json = template.render(**render_vars)
+            config_data = json.loads(rendered_json)
+            # Create AgentConfig objects from the rendered data
+            for agent_name, agent_data in config_data.items():
+                config = AgentConfig(
+                    name=agent_data.get("name", agent_name),
+                    instructions=agent_data.get("instructions", ""),
+                    model=agent_data.get("model", "gpt-5.2"),
+                    reasoning_effort=agent_data.get("reasoning_effort"),
+                    temperature=agent_data.get("temperature", 0.7),
+                    max_tokens=agent_data.get("max_tokens"),
+                    timeout=agent_data.get("timeout", 30.0),
+                    retry_attempts=agent_data.get("retry_attempts", 3),
+                    enable_tracing=agent_data.get("enable_tracing", True),
+                    custom_prompts=agent_data.get("custom_prompts", {}),
+                )
+                self.configs[agent_name] = config
+                logger.info(f"Loaded config for {agent_name}: model={config.model}")
+        except Exception as e:
+            logger.error(f"Failed to load configs from template {template_name}: {e}")
+    def get_agent_config(self, agent_name: str) -> Optional[AgentConfig]:
+        """Get configuration for a specific agent"""
+        return self.configs.get(agent_name)
+    def get_config(self, agent_name: str) -> Optional[AgentConfig]:
+        """Alias for get_agent_config for compatibility"""
+        return self.get_agent_config(agent_name)
+    def get_prompt_template(self, template_name: str) -> Optional[AgentPromptTemplate]:
+        """Get a prompt template by name"""
+        return self.prompt_templates.get(template_name)
+    def update_agent_config(self, agent_name: str, **kwargs):
+        """Update an agent's configuration at runtime"""
+        if agent_name in self.configs:
+            config = self.configs[agent_name]
+            for key, value in kwargs.items():
+                if hasattr(config, key):
+                    setattr(config, key, value)
+                    logger.info(f"Updated {agent_name} config: {key} = {value}")
+    def update_config(
+        self, agent_name: str, updates: Dict[str, Any]
+    ) -> Optional[AgentConfig]:
+        """Update agent configuration with a dictionary of updates"""
+        if agent_name not in self.configs:
+            return None
+        config = self.configs[agent_name]
+        for key, value in updates.items():
+            if hasattr(config, key):
+                setattr(config, key, value)
+        return config
+    def list_configs(self) -> List[str]:
+        """List all agent configuration names"""
+        return list(self.configs.keys())
+    def list_prompt_templates(self) -> List[str]:
+        """List all prompt template names"""
+        return list(self.prompt_templates.keys())
+    def load_config_from_dict(self, config_dict: Dict[str, Any]):
+        """Load configuration from a dictionary"""
+        # Load agent configs
+        if "agents" in config_dict:
+            for agent_name, agent_data in config_dict["agents"].items():
+                config = AgentConfig(
+                    name=agent_name,
+                    instructions=agent_data.get("instructions", ""),
+                    model=agent_data.get("model", "gpt-5.2"),
+                    reasoning_effort=agent_data.get("reasoning_effort"),
+                    temperature=agent_data.get("temperature", 0.7),
+                    max_tokens=agent_data.get("max_tokens"),
+                    timeout=agent_data.get("timeout", 30.0),
+                    retry_attempts=agent_data.get("retry_attempts", 3),
+                    enable_tracing=agent_data.get("enable_tracing", True),
+                    custom_prompts=agent_data.get("custom_prompts", {}),
+                )
+                self.configs[agent_name] = config
+        # Load prompt templates
+        if "prompt_templates" in config_dict:
+            for template_name, template_data in config_dict["prompt_templates"].items():
+                template = AgentPromptTemplate(
+                    system_prompt=template_data.get("system_prompt", ""),
+                    user_prompt_template=template_data.get("user_prompt_template", ""),
+                    variables=template_data.get("variables", {}),
+                )
+                self.prompt_templates[template_name] = template
+    def _validate_config(self, config_data: Dict[str, Any]) -> bool:
+        """Validate agent configuration data"""
+        # Check required fields
+        if "name" not in config_data or "instructions" not in config_data:
+            return False
+        # Check temperature range
+        temperature = config_data.get("temperature", 0.7)
+        if not 0.0 <= temperature <= 2.0:
+            return False
+        # Check timeout is positive
+        timeout = config_data.get("timeout", 30.0)
+        if timeout <= 0:
+            return False
+        return True
+    def save_config_to_file(self, filename: str, agents: List[str] = None):
+        """Save current configurations to a file"""
+        # Prepare data structure
+        data = {"agents": {}, "prompt_templates": {}}
+        # Add agent configs
+        agents_to_save = agents if agents else list(self.configs.keys())
+        for agent_name in agents_to_save:
+            if agent_name in self.configs:
+                config = self.configs[agent_name]
+                data["agents"][agent_name] = asdict(config)
+        # Add prompt templates
+        for template_name, template in self.prompt_templates.items():
+            data["prompt_templates"][template_name] = asdict(template)
+        try:
+            with open(filename, "w") as f:
+                json.dump(data, f, indent=2)
+            logger.info(f"Saved agent configurations to {filename}")
+        except Exception as e:
+            logger.error(f"Failed to save agent config to {filename}: {e}")
+# Global config manager instance
+_global_config_manager: Optional[AgentConfigManager] = None
+def get_config_manager(
+    model_overrides: Optional[Dict[str, str]] = None,
+    template_vars: Optional[Dict[str, Any]] = None,
+) -> AgentConfigManager:
+    """Get the global agent configuration manager"""
+    global _global_config_manager
+    if _global_config_manager is None:
+        _global_config_manager = AgentConfigManager(model_overrides, template_vars)
+    else:
+        if model_overrides:
+            _global_config_manager.update_models(model_overrides)
+        if template_vars:
+            _global_config_manager.update_template_vars(template_vars)
+    return _global_config_manager

ankigen/agents/generators.py ADDED Viewed

	@@ -0,0 +1,405 @@

+# Specialized generator agents for card generation
+import json
+from typing import List, Dict, Any, Optional, Tuple
+from openai import AsyncOpenAI
+from ankigen.logging import logger
+from ankigen.models import Card, CardFront, CardBack
+from .base import BaseAgentWrapper, AgentConfig
+from .config import get_config_manager
+from .schemas import CardsGenerationSchema
+def card_dict_to_card(
+    card_data: Dict[str, Any],
+    default_topic: str,
+    default_subject: str,
+) -> Card:
+    """Convert a dictionary representation of a card into a Card object."""
+    if not isinstance(card_data, dict):
+        raise ValueError("Card payload must be a dictionary")
+    front_data = card_data.get("front")
+    back_data = card_data.get("back")
+    if not isinstance(front_data, dict) or "question" not in front_data:
+        raise ValueError("Card front must include a question field")
+    if not isinstance(back_data, dict) or "answer" not in back_data:
+        raise ValueError("Card back must include an answer field")
+    metadata = card_data.get("metadata", {}) or {}
+    if not isinstance(metadata, dict):
+        metadata = {}
+    subject = metadata.get("subject") or default_subject or "general"
+    topic = metadata.get("topic") or default_topic or "General Concepts"
+    card = Card(
+        card_type=str(card_data.get("card_type", "basic")),
+        front=CardFront(question=str(front_data.get("question", ""))),
+        back=CardBack(
+            answer=str(back_data.get("answer", "")),
+            explanation=str(back_data.get("explanation", "")),
+            example=str(back_data.get("example", "")),
+        ),
+        metadata=metadata,
+    )
+    if card.metadata is not None:
+        card.metadata.setdefault("subject", subject)
+        card.metadata.setdefault("topic", topic)
+    return card
+class SubjectExpertAgent(BaseAgentWrapper):
+    """Subject matter expert agent for domain-specific card generation"""
+    def __init__(self, openai_client: AsyncOpenAI, subject: str = "general"):
+        config_manager = get_config_manager()
+        base_config = config_manager.get_agent_config("subject_expert")
+        if not base_config:
+            raise ValueError(
+                "subject_expert configuration not found - agent system not properly initialized"
+            )
+        base_config.output_type = CardsGenerationSchema
+        if subject != "general" and base_config.custom_prompts:
+            subject_prompt = base_config.custom_prompts.get(subject.lower(), "")
+            if subject_prompt:
+                base_config.instructions += (
+                    f"\n\nSubject-specific guidance: {subject_prompt}"
+                )
+        super().__init__(base_config, openai_client)
+        self.subject = subject
+    def _build_batch_prompt(
+        self,
+        topic: str,
+        cards_in_batch: int,
+        batch_num: int,
+        context: Optional[Dict[str, Any]],
+        previous_topics: List[str],
+    ) -> str:
+        """Build user input prompt for a batch of cards."""
+        user_input = f"Generate {cards_in_batch} flashcards for the topic: {topic}"
+        if context and context.get("generate_cloze"):
+            user_input += (
+                "\n\nIMPORTANT: Generate a mix of card types including cloze cards. "
+                "For code examples, syntax, and fill-in-the-blank concepts, use cloze cards "
+                "(card_type='cloze'). Aim for roughly 50% cloze cards when dealing with technical/programming content."
+            )
+        if context:
+            learning_preferences = context.get("learning_preferences")
+            if learning_preferences:
+                user_input += f"\n\nLearning focus: {learning_preferences}"
+            user_input += f"\n\nAdditional context: {context}"
+        if previous_topics:
+            topics_summary = ", ".join(previous_topics[-20:])
+            user_input += f"\n\nAvoid creating cards about these already covered topics: {topics_summary}"
+        if batch_num > 1:
+            user_input += f"\n\nThis is batch {batch_num} of cards. Ensure these cards cover different aspects of the topic."
+        return user_input
+    def _extract_topics_for_dedup(self, batch_cards: List[Card]) -> List[str]:
+        """Extract key terms from card questions for deduplication."""
+        topics = []
+        for card in batch_cards:
+            if hasattr(card, "front") and card.front and card.front.question:
+                question_words = card.front.question.lower().split()
+                key_terms = [word for word in question_words if len(word) > 3][:3]
+                if key_terms:
+                    topics.append(" ".join(key_terms))
+        return topics
+    def _accumulate_usage(
+        self, total_usage: Dict[str, int], batch_usage: Optional[Dict[str, Any]]
+    ) -> None:
+        """Accumulate batch usage into total usage."""
+        if batch_usage:
+            for key in total_usage:
+                total_usage[key] += batch_usage.get(key, 0)
+    async def generate_cards(
+        self, topic: str, num_cards: int = 5, context: Optional[Dict[str, Any]] = None
+    ) -> List[Card]:
+        """Generate flashcards for a given topic with automatic batching."""
+        batch_size = 10
+        all_cards: List[Card] = []
+        total_usage: Dict[str, int] = {
+            "total_tokens": 0,
+            "input_tokens": 0,
+            "output_tokens": 0,
+        }
+        previous_topics: List[str] = []
+        cards_remaining = num_cards
+        batch_num = 1
+        num_batches = ((num_cards - 1) // batch_size) + 1
+        logger.info(
+            f"Generating {num_cards} cards for '{topic}' using {num_batches} batches"
+        )
+        try:
+            while cards_remaining > 0:
+                cards_in_batch = min(batch_size, cards_remaining)
+                logger.info(f"Generating batch {batch_num}: {cards_in_batch} cards")
+                if not self.agent:
+                    await self.initialize()
+                user_input = self._build_batch_prompt(
+                    topic, cards_in_batch, batch_num, context, previous_topics
+                )
+                response, usage = await self.execute(user_input, context)
+                self._accumulate_usage(total_usage, usage)
+                batch_cards = self._parse_cards_response(response, topic)
+                all_cards.extend(batch_cards)
+                previous_topics.extend(self._extract_topics_for_dedup(batch_cards))
+                cards_remaining -= len(batch_cards)
+                logger.info(
+                    f"Batch {batch_num} generated {len(batch_cards)} cards. {cards_remaining} remaining."
+                )
+                if len(batch_cards) == 0:
+                    logger.warning(f"No cards generated in batch {batch_num}, stopping")
+                    break
+                batch_num += 1
+            if total_usage.get("total_tokens", 0) > 0:
+                logger.info(
+                    f"Total usage: {total_usage['total_tokens']} tokens "
+                    f"(Input: {total_usage['input_tokens']}, Output: {total_usage['output_tokens']})"
+                )
+            logger.info(
+                f"Generated {len(all_cards)} cards across {batch_num} batches for '{topic}'"
+            )
+            return all_cards
+        except Exception as e:
+            logger.error(f"Card generation failed: {e}")
+            raise
+    def _build_generation_prompt(
+        self,
+        topic: str,
+        num_cards: int,
+        difficulty: str,
+        prerequisites: List[str],
+        context: Dict[str, Any],
+    ) -> str:
+        """Build the generation prompt"""
+        prerequisites_str = ", ".join(prerequisites) if prerequisites else "None"
+        prompt = f"""Generate {num_cards} high-quality flashcards for the topic: {topic}
+Subject: {self.subject}
+Difficulty Level: {difficulty}
+Prerequisites: {prerequisites_str}
+Requirements:
+- Focus on {self.subject} concepts and terminology
+- Ensure technical accuracy and depth appropriate for {difficulty} level
+- Include practical applications and real-world examples
+- Test understanding, not just memorization
+- Use clear, unambiguous questions
+Return your response as a JSON object with this structure:
+{{
+    "cards": [
+        {{
+            "card_type": "basic",
+            "front": {{
+                "question": "Clear, specific question"
+            }},
+            "back": {{
+                "answer": "Concise, accurate answer",
+                "explanation": "Detailed explanation with reasoning",
+                "example": "Practical example or application"
+            }},
+            "metadata": {{
+                "difficulty": "{difficulty}",
+                "prerequisites": {json.dumps(prerequisites)},
+                "topic": "{topic}",
+                "subject": "{self.subject}",
+                "learning_outcomes": ["outcome1", "outcome2"],
+                "common_misconceptions": ["misconception1"]
+            }}
+        }}
+    ]
+}}"""
+        if context.get("source_text"):
+            prompt += f"\n\nBase the cards on this source material:\n{context['source_text'][:2000]}..."
+        return prompt
+    def _parse_cards_response(self, response: Any, topic: str) -> List[Card]:
+        """Parse the agent response into Card objects"""
+        try:
+            # Handle structured output from CardsGenerationSchema
+            if hasattr(response, "cards"):
+                # Response is already a CardsGenerationSchema object
+                logger.info(f"✅ STRUCTURED OUTPUT RECEIVED: {type(response)}")
+                card_data_list = response.cards
+            elif isinstance(response, dict) and "cards" in response:
+                # Response is a dict with cards
+                card_data_list = response["cards"]
+            elif isinstance(response, str):
+                # Fallback: Clean up the response - remove markdown code blocks if present
+                response = response.strip()
+                if response.startswith("```json"):
+                    response = response[7:]  # Remove ```json
+                if response.startswith("```"):
+                    response = response[3:]  # Remove ```
+                if response.endswith("```"):
+                    response = response[:-3]  # Remove trailing ```
+                response = response.strip()
+                data = json.loads(response)
+                if "cards" not in data:
+                    raise ValueError("Response missing 'cards' field")
+                card_data_list = data["cards"]
+            else:
+                raise ValueError(f"Unexpected response format: {type(response)}")
+            cards = []
+            for i, card_data in enumerate(card_data_list):
+                try:
+                    if hasattr(card_data, "dict"):
+                        payload = card_data.dict()
+                    elif isinstance(card_data, dict):
+                        payload = card_data
+                    else:
+                        logger.warning(
+                            f"Skipping card {i}: unsupported payload type {type(card_data)}"
+                        )
+                        continue
+                    card = card_dict_to_card(payload, topic, self.subject)
+                    cards.append(card)
+                except Exception as e:
+                    logger.warning(f"Failed to parse card {i}: {e}")
+                    continue
+            logger.info(f"✅ PARSED {len(cards)} CARDS FROM STRUCTURED OUTPUT")
+            return cards
+        except json.JSONDecodeError as e:
+            logger.error(f"💥 JSON DECODE ERROR: {e}")
+            logger.error("💥 RAW RESPONSE THAT FAILED TO PARSE:")
+            logger.error("---FAILED RESPONSE START---")
+            logger.error(f"{response}")
+            logger.error("---FAILED RESPONSE END---")
+            logger.error(f"💥 RESPONSE TYPE: {type(response)}")
+            if isinstance(response, str):
+                logger.error(f"💥 RESPONSE LENGTH: {len(response)}")
+                logger.error(f"💥 FIRST 200 CHARS: {repr(response[:200])}")
+                logger.error(f"💥 LAST 200 CHARS: {repr(response[-200:])}")
+            raise ValueError(f"Invalid JSON response from agent: {e}")
+        except Exception as e:
+            logger.error(f"💥 GENERAL PARSING ERROR: {e}")
+            logger.error(f"💥 RESPONSE THAT CAUSED ERROR: {response}")
+            raise
+class QualityReviewAgent(BaseAgentWrapper):
+    """Single-pass quality review agent for lightweight validation and fixes."""
+    def __init__(self, openai_client: AsyncOpenAI, model: str):
+        config = AgentConfig(
+            name="quality_reviewer",
+            instructions=(
+                "You are a meticulous flashcard reviewer. Review each card for factual accuracy, clarity,"
+                " atomic scope, and answer quality. When needed, revise the card while keeping it concise and"
+                " faithful to the original intent. Always respond with a JSON object containing:"
+                ' {"approved": bool, "reason": string, "revised_card": object or null}.'
+                " The revised card must follow the input schema with fields card_type, front.question,"
+                " back.answer/explanation/example, and metadata."
+            ),
+            model=model,
+            temperature=0.2,
+            timeout=45.0,
+            retry_attempts=2,
+            enable_tracing=False,
+        )
+        super().__init__(config, openai_client)
+    async def review_card(self, card: Card) -> Tuple[Optional[Card], bool, str]:
+        """Review a card and optionally return a revised version."""
+        card_payload = {
+            "card_type": card.card_type,
+            "front": {"question": card.front.question if card.front else ""},
+            "back": {
+                "answer": card.back.answer if card.back else "",
+                "explanation": card.back.explanation if card.back else "",
+                "example": card.back.example if card.back else "",
+            },
+            "metadata": card.metadata or {},
+        }
+        user_input = (
+            "Review the following flashcard. Approve it if it is accurate, clear, and atomic."
+            " If improvements are needed, provide a revised_card with the corrections applied.\n\n"
+            "Flashcard JSON:\n"
+            f"{json.dumps(card_payload, ensure_ascii=False)}\n\n"
+            "Respond with JSON matching this schema:\n"
+            '{\n  "approved": true | false,\n  "reason": "short explanation",\n'
+            '  "revised_card": { ... } | null\n}'
+        )
+        try:
+            response, _ = await self.execute(user_input)
+        except Exception as e:
+            logger.error(f"Quality review failed to execute: {e}")
+            return card, True, "Review failed; keeping original card"
+        try:
+            parsed = json.loads(response) if isinstance(response, str) else response
+        except Exception as e:
+            logger.warning(f"Failed to parse review response as JSON: {e}")
+            return card, True, "Reviewer returned invalid JSON; keeping original"
+        approved = bool(parsed.get("approved", True))
+        reason = str(parsed.get("reason", ""))
+        revised_payload = parsed.get("revised_card")
+        revised_card: Optional[Card] = None
+        if isinstance(revised_payload, dict):
+            try:
+                metadata = revised_payload.get("metadata", {}) or {}
+                revised_subject = metadata.get("subject") or (card.metadata or {}).get(
+                    "subject",
+                    "general",
+                )
+                revised_topic = metadata.get("topic") or (card.metadata or {}).get(
+                    "topic",
+                    "General Concepts",
+                )
+                revised_card = card_dict_to_card(
+                    revised_payload, revised_topic, revised_subject
+                )
+            except Exception as e:
+                logger.warning(f"Failed to build revised card from review payload: {e}")
+                revised_card = None
+        return revised_card or card, approved, reason or ""

ankigen/agents/integration.py ADDED Viewed

	@@ -0,0 +1,287 @@

+# Main integration module for AnkiGen agent system
+from typing import List, Dict, Any, Tuple, Optional
+from datetime import datetime
+from ankigen.logging import logger
+from ankigen.models import Card
+from ankigen.llm_interface import OpenAIClientManager
+from ankigen.context7 import Context7Client
+from .generators import SubjectExpertAgent
+class AgentOrchestrator:
+    """Main orchestrator for the AnkiGen agent system"""
+    def __init__(self, client_manager: OpenAIClientManager):
+        self.client_manager = client_manager
+        self.openai_client = None
+        self.subject_expert = None
+    async def initialize(
+        self,
+        api_key: str,
+        model_overrides: Dict[str, str] = None,
+        reasoning_overrides: Dict[str, Optional[str]] = None,
+    ):
+        """Initialize the agent system"""
+        try:
+            # Initialize OpenAI client
+            await self.client_manager.initialize_client(api_key)
+            self.openai_client = self.client_manager.get_client()
+            # Set up model overrides if provided
+            config_manager = None
+            if model_overrides:
+                from ankigen.agents.config import get_config_manager
+                config_manager = get_config_manager()
+                config_manager.update_models(model_overrides)
+                logger.info(f"Applied model overrides: {model_overrides}")
+            if reasoning_overrides:
+                if config_manager is None:
+                    from ankigen.agents.config import get_config_manager
+                    config_manager = get_config_manager()
+                for agent_name, effort in reasoning_overrides.items():
+                    config_manager.update_agent_config(
+                        agent_name, reasoning_effort=effort
+                    )
+                logger.info(f"Applied reasoning overrides: {reasoning_overrides}")
+            logger.info("Agent system initialized successfully (simplified pipeline)")
+        except Exception as e:
+            logger.error(f"Failed to initialize agent system: {e}")
+            raise
+    async def generate_cards_with_agents(
+        self,
+        topic: str,
+        subject: str = "general",
+        num_cards: int = 5,
+        difficulty: str = "intermediate",
+        context: Dict[str, Any] = None,
+        library_name: Optional[str] = None,
+        library_topic: Optional[str] = None,
+        generate_cloze: bool = False,
+        topics_list: Optional[List[str]] = None,
+        cards_per_topic: int = 8,
+    ) -> Tuple[List[Card], Dict[str, Any]]:
+        """Generate cards using the agent system.
+        If topics_list is provided, generates cards for each subtopic separately
+        to ensure comprehensive coverage. Otherwise falls back to single-topic mode.
+        """
+        start_time = datetime.now()
+        try:
+            if not self.openai_client:
+                raise ValueError("Agent system not initialized")
+            # Enhance context with library documentation if requested
+            enhanced_context = context or {}
+            library_docs = None
+            if library_name:
+                library_docs = await self._fetch_library_docs(
+                    library_name, library_topic, num_cards
+                )
+                if library_docs:
+                    enhanced_context["library_documentation"] = library_docs
+                    enhanced_context["library_name"] = library_name
+            # Generate cards - either per-topic or single-topic mode
+            if topics_list and len(topics_list) > 0:
+                logger.info(
+                    f"Starting multi-topic generation: {len(topics_list)} topics, "
+                    f"{cards_per_topic} cards each for '{topic}'"
+                )
+                cards = await self._generate_cards_per_topic(
+                    main_subject=topic,
+                    subject=subject,
+                    topics_list=topics_list,
+                    cards_per_topic=cards_per_topic,
+                    difficulty=difficulty,
+                    context=enhanced_context,
+                    generate_cloze=generate_cloze,
+                )
+            else:
+                # Fallback to single-topic mode
+                logger.info(f"Starting single-topic generation: {topic} ({subject})")
+                cards = await self._generation_phase(
+                    topic=topic,
+                    subject=subject,
+                    num_cards=num_cards,
+                    difficulty=difficulty,
+                    context=enhanced_context,
+                    generate_cloze=generate_cloze,
+                )
+            # Collect metadata
+            metadata = {
+                "generation_method": "agent_system",
+                "generation_time": (datetime.now() - start_time).total_seconds(),
+                "cards_generated": len(cards),
+                "topic": topic,
+                "subject": subject,
+                "difficulty": difficulty,
+                "library_name": library_name if library_name else None,
+                "library_docs_used": bool(library_docs),
+                "topics_list": topics_list,
+                "multi_topic_mode": topics_list is not None and len(topics_list) > 0,
+            }
+            logger.info(
+                f"Agent-based generation complete: {len(cards)} cards generated"
+            )
+            return cards, metadata
+        except Exception as e:
+            logger.error(f"Agent-based generation failed: {e}")
+            raise
+    async def _fetch_library_docs(
+        self, library_name: str, library_topic: Optional[str], num_cards: int
+    ) -> Optional[str]:
+        """Fetch library documentation from Context7."""
+        logger.info(f"Fetching library documentation for: {library_name}")
+        try:
+            context7_client = Context7Client()
+            # Dynamic token allocation based on card generation needs
+            base_tokens = 8000
+            if num_cards > 40:
+                token_limit = 12000
+            elif num_cards > 20:
+                token_limit = 10000
+            else:
+                token_limit = base_tokens
+            if library_topic:
+                token_limit = int(token_limit * 0.8)
+            logger.info(
+                f"Fetching {token_limit} tokens of documentation"
+                + (f" for topic: {library_topic}" if library_topic else "")
+            )
+            library_docs = await context7_client.fetch_library_documentation(
+                library_name, topic=library_topic, tokens=token_limit
+            )
+            if library_docs:
+                logger.info(
+                    f"Added {len(library_docs)} chars of {library_name} documentation to context"
+                )
+                return library_docs
+            else:
+                logger.warning(
+                    f"Could not fetch documentation for library: {library_name}"
+                )
+                return None
+        except Exception as e:
+            logger.error(f"Error fetching library documentation: {e}")
+            return None
+    async def _generate_cards_per_topic(
+        self,
+        main_subject: str,
+        subject: str,
+        topics_list: List[str],
+        cards_per_topic: int,
+        difficulty: str,
+        context: Dict[str, Any],
+        generate_cloze: bool,
+    ) -> List[Card]:
+        """Generate cards for each topic in the topics_list."""
+        all_cards: List[Card] = []
+        total_topics = len(topics_list)
+        for i, subtopic in enumerate(topics_list):
+            topic_num = i + 1
+            logger.info(
+                f"Generating topic {topic_num}/{total_topics}: {subtopic} "
+                f"({cards_per_topic} cards)"
+            )
+            # Add topic context
+            topic_context = {
+                **context,
+                "main_subject": main_subject,
+                "topic_index": topic_num,
+                "total_topics": total_topics,
+                "current_subtopic": subtopic,
+            }
+            cards = await self._generation_phase(
+                topic=subtopic,
+                subject=subject,
+                num_cards=cards_per_topic,
+                difficulty=difficulty,
+                context=topic_context,
+                generate_cloze=generate_cloze,
+            )
+            all_cards.extend(cards)
+            logger.info(
+                f"Topic {topic_num}/{total_topics} complete: {len(cards)} cards. "
+                f"Total: {len(all_cards)}"
+            )
+        return all_cards
+    async def _generation_phase(
+        self,
+        topic: str,
+        subject: str,
+        num_cards: int,
+        difficulty: str,
+        context: Dict[str, Any] = None,
+        generate_cloze: bool = False,
+    ) -> List[Card]:
+        """Execute the card generation phase"""
+        if not self.subject_expert or self.subject_expert.subject != subject:
+            self.subject_expert = SubjectExpertAgent(self.openai_client, subject)
+        # Add difficulty and cloze preference to context
+        if context is None:
+            context = {}
+        context["difficulty"] = difficulty
+        context["generate_cloze"] = generate_cloze
+        cards = await self.subject_expert.generate_cards(
+            topic=topic, num_cards=num_cards, context=context
+        )
+        logger.info(f"Generation phase complete: {len(cards)} cards generated")
+        return cards
+    def get_performance_metrics(self) -> Dict[str, Any]:
+        """Get performance metrics for the agent system"""
+        # Basic performance info only
+        return {
+            "agents_enabled": True,
+        }
+async def integrate_with_existing_workflow(
+    client_manager: OpenAIClientManager, api_key: str, **generation_params
+) -> Tuple[List[Card], Dict[str, Any]]:
+    """Integration point for existing AnkiGen workflow"""
+    # Agents are always enabled
+    # Initialize and use agent system
+    orchestrator = AgentOrchestrator(client_manager)
+    await orchestrator.initialize(api_key)
+    cards, metadata = await orchestrator.generate_cards_with_agents(**generation_params)
+    return cards, metadata

ankigen/agents/performance.py ADDED Viewed

	@@ -0,0 +1,475 @@

+# Performance optimizations for agent system
+import asyncio
+import time
+import hashlib
+from typing import Dict, Any, List, Optional, Callable, TypeVar, Generic
+from dataclasses import dataclass, field
+from functools import wraps, lru_cache
+import json
+from ankigen.logging import logger
+from ankigen.models import Card
+T = TypeVar("T")
+@dataclass
+class CacheConfig:
+    """Configuration for agent response caching"""
+    enable_caching: bool = True
+    cache_ttl: int = 3600  # seconds
+    max_cache_size: int = 1000
+    cache_backend: str = "memory"  # "memory" or "file"
+    cache_directory: Optional[str] = None
+    def __post_init__(self):
+        if self.cache_backend == "file" and not self.cache_directory:
+            self.cache_directory = "cache/agents"
+@dataclass
+class PerformanceConfig:
+    """Configuration for performance optimizations"""
+    enable_batch_processing: bool = True
+    max_batch_size: int = 10
+    batch_timeout: float = 2.0  # seconds
+    enable_parallel_execution: bool = True
+    max_concurrent_requests: int = 5
+    enable_request_deduplication: bool = True
+    enable_response_caching: bool = True
+    cache_config: CacheConfig = field(default_factory=CacheConfig)
+@dataclass
+class CacheEntry(Generic[T]):
+    """Cache entry with metadata"""
+    value: T
+    created_at: float
+    access_count: int = 0
+    last_accessed: float = field(default_factory=time.time)
+    cache_key: str = ""
+    def is_expired(self, ttl: int) -> bool:
+        """Check if cache entry is expired"""
+        return time.time() - self.created_at > ttl
+    def touch(self):
+        """Update access metadata"""
+        self.access_count += 1
+        self.last_accessed = time.time()
+class MemoryCache(Generic[T]):
+    """In-memory cache with LRU eviction"""
+    def __init__(self, config: CacheConfig):
+        self.config = config
+        self._cache: Dict[str, CacheEntry[T]] = {}
+        self._access_order: List[str] = []
+        self._lock = asyncio.Lock()
+    async def get(self, key: str) -> Optional[T]:
+        """Get value from cache"""
+        async with self._lock:
+            entry = self._cache.get(key)
+            if not entry:
+                return None
+            if entry.is_expired(self.config.cache_ttl):
+                await self._remove(key)
+                return None
+            entry.touch()
+            self._update_access_order(key)
+            logger.debug(f"Cache hit for key: {key[:20]}...")
+            return entry.value
+    async def set(self, key: str, value: T) -> None:
+        """Set value in cache"""
+        async with self._lock:
+            # Check if we need to evict entries
+            if len(self._cache) >= self.config.max_cache_size:
+                await self._evict_lru()
+            entry = CacheEntry(value=value, created_at=time.time(), cache_key=key)
+            self._cache[key] = entry
+            self._update_access_order(key)
+            logger.debug(f"Cache set for key: {key[:20]}...")
+    async def remove(self, key: str) -> bool:
+        """Remove entry from cache"""
+        async with self._lock:
+            return await self._remove(key)
+    async def clear(self) -> None:
+        """Clear all cache entries"""
+        async with self._lock:
+            self._cache.clear()
+            self._access_order.clear()
+            logger.info("Cache cleared")
+    async def _remove(self, key: str) -> bool:
+        """Internal remove method"""
+        if key in self._cache:
+            del self._cache[key]
+            if key in self._access_order:
+                self._access_order.remove(key)
+            return True
+        return False
+    async def _evict_lru(self) -> None:
+        """Evict least recently used entries"""
+        if not self._access_order:
+            return
+        # Remove oldest entries
+        to_remove = self._access_order[: len(self._access_order) // 4]  # Remove 25%
+        for key in to_remove:
+            await self._remove(key)
+        logger.debug(f"Evicted {len(to_remove)} cache entries")
+    def _update_access_order(self, key: str) -> None:
+        """Update access order for LRU tracking"""
+        if key in self._access_order:
+            self._access_order.remove(key)
+        self._access_order.append(key)
+    def get_stats(self) -> Dict[str, Any]:
+        """Get cache statistics"""
+        total_accesses = sum(entry.access_count for entry in self._cache.values())
+        return {
+            "entries": len(self._cache),
+            "max_size": self.config.max_cache_size,
+            "total_accesses": total_accesses,
+            "hit_rate": total_accesses / max(1, len(self._cache)),
+        }
+class BatchProcessor:
+    """Batch processor for agent requests"""
+    def __init__(self, config: PerformanceConfig):
+        self.config = config
+        self._batches: Dict[str, List[Dict[str, Any]]] = {}
+        self._batch_timers: Dict[str, asyncio.Task] = {}
+        self._lock = asyncio.Lock()
+    async def add_request(
+        self, batch_key: str, request_data: Dict[str, Any], processor_func: Callable
+    ) -> Any:
+        """Add request to batch for processing"""
+        if not self.config.enable_batch_processing:
+            # Process immediately if batching is disabled
+            return await processor_func([request_data])
+        async with self._lock:
+            # Initialize batch if needed
+            if batch_key not in self._batches:
+                self._batches[batch_key] = []
+                self._start_batch_timer(batch_key, processor_func)
+            # Add request to batch
+            self._batches[batch_key].append(request_data)
+            # Process immediately if batch is full
+            if len(self._batches[batch_key]) >= self.config.max_batch_size:
+                return await self._process_batch(batch_key, processor_func)
+            # Wait for timer or batch completion
+            return await self._wait_for_batch_result(
+                batch_key, request_data, processor_func
+            )
+    def _start_batch_timer(self, batch_key: str, processor_func: Callable) -> None:
+        """Start timer for batch processing"""
+        async def timer():
+            await asyncio.sleep(self.config.batch_timeout)
+            async with self._lock:
+                if batch_key in self._batches and self._batches[batch_key]:
+                    await self._process_batch(batch_key, processor_func)
+        self._batch_timers[batch_key] = asyncio.create_task(timer())
+    async def _process_batch(
+        self, batch_key: str, processor_func: Callable
+    ) -> List[Any]:
+        """Process accumulated batch"""
+        if batch_key not in self._batches:
+            return []
+        batch = self._batches.pop(batch_key)
+        # Cancel timer
+        if batch_key in self._batch_timers:
+            self._batch_timers[batch_key].cancel()
+            del self._batch_timers[batch_key]
+        if not batch:
+            return []
+        logger.debug(f"Processing batch {batch_key} with {len(batch)} requests")
+        try:
+            # Process the batch
+            results = await processor_func(batch)
+            return results if isinstance(results, list) else [results]
+        except Exception as e:
+            logger.error(f"Batch processing failed for {batch_key}: {e}")
+            raise
+    async def _wait_for_batch_result(
+        self, batch_key: str, request_data: Dict[str, Any], processor_func: Callable
+    ) -> Any:
+        """Wait for batch processing to complete"""
+        # This is a simplified implementation
+        # In a real implementation, you'd use events/conditions to coordinate
+        # between requests in the same batch
+        while batch_key in self._batches:
+            await asyncio.sleep(0.1)
+        # For now, process individually as fallback
+        return await processor_func([request_data])
+class RequestDeduplicator:
+    """Deduplicates identical agent requests"""
+    def __init__(self):
+        self._pending_requests: Dict[str, asyncio.Future] = {}
+        self._lock = asyncio.Lock()
+    @lru_cache(maxsize=1000)
+    def _generate_request_hash(self, request_data: str) -> str:
+        """Generate hash for request deduplication"""
+        return hashlib.md5(request_data.encode()).hexdigest()
+    async def deduplicate_request(
+        self, request_data: Dict[str, Any], processor_func: Callable
+    ) -> Any:
+        """Deduplicate and process request"""
+        # Generate hash for deduplication
+        request_str = json.dumps(request_data, sort_keys=True)
+        request_hash = self._generate_request_hash(request_str)
+        async with self._lock:
+            # Check if request is already pending
+            if request_hash in self._pending_requests:
+                logger.debug(f"Deduplicating request: {request_hash[:16]}...")
+                return await self._pending_requests[request_hash]
+            # Create future for this request
+            future = asyncio.create_task(
+                self._process_unique_request(request_hash, request_data, processor_func)
+            )
+            self._pending_requests[request_hash] = future
+            try:
+                result = await future
+                return result
+            finally:
+                # Clean up completed request
+                async with self._lock:
+                    self._pending_requests.pop(request_hash, None)
+    async def _process_unique_request(
+        self, request_hash: str, request_data: Dict[str, Any], processor_func: Callable
+    ) -> Any:
+        """Process unique request"""
+        logger.debug(f"Processing unique request: {request_hash[:16]}...")
+        return await processor_func(request_data)
+class PerformanceOptimizer:
+    """Main performance optimization coordinator"""
+    def __init__(self, config: PerformanceConfig):
+        self.config = config
+        self.cache = (
+            MemoryCache(config.cache_config) if config.enable_response_caching else None
+        )
+        self.batch_processor = (
+            BatchProcessor(config) if config.enable_batch_processing else None
+        )
+        self.deduplicator = (
+            RequestDeduplicator() if config.enable_request_deduplication else None
+        )
+        self._semaphore = asyncio.Semaphore(config.max_concurrent_requests)
+    async def optimize_agent_call(
+        self,
+        agent_name: str,
+        request_data: Dict[str, Any],
+        processor_func: Callable,
+        cache_key_generator: Optional[Callable[[Dict[str, Any]], str]] = None,
+    ) -> Any:
+        """Optimize agent call with caching, batching, and deduplication"""
+        # Generate cache key
+        cache_key = None
+        if self.cache and cache_key_generator:
+            cache_key = cache_key_generator(request_data)
+            # Check cache first
+            cached_result = await self.cache.get(cache_key)
+            if cached_result is not None:
+                return cached_result
+        # Apply rate limiting
+        async with self._semaphore:
+            # Apply deduplication
+            if self.deduplicator and self.config.enable_request_deduplication:
+                result = await self.deduplicator.deduplicate_request(
+                    request_data, processor_func
+                )
+            else:
+                result = await processor_func(request_data)
+            # Cache result
+            if self.cache and cache_key and result is not None:
+                await self.cache.set(cache_key, result)
+            return result
+    async def optimize_batch_processing(
+        self, batch_key: str, request_data: Dict[str, Any], processor_func: Callable
+    ) -> Any:
+        """Optimize using batch processing"""
+        if self.batch_processor:
+            return await self.batch_processor.add_request(
+                batch_key, request_data, processor_func
+            )
+        else:
+            return await processor_func([request_data])
+    def get_performance_stats(self) -> Dict[str, Any]:
+        """Get performance optimization statistics"""
+        stats = {
+            "config": {
+                "batch_processing": self.config.enable_batch_processing,
+                "parallel_execution": self.config.enable_parallel_execution,
+                "request_deduplication": self.config.enable_request_deduplication,
+                "response_caching": self.config.enable_response_caching,
+            },
+            "concurrency": {
+                "max_concurrent": self.config.max_concurrent_requests,
+                "current_available": self._semaphore._value,
+            },
+        }
+        if self.cache:
+            stats["cache"] = self.cache.get_stats()
+        return stats
+# Global performance optimizer
+_global_optimizer: Optional[PerformanceOptimizer] = None
+def get_performance_optimizer(
+    config: Optional[PerformanceConfig] = None,
+) -> PerformanceOptimizer:
+    """Get global performance optimizer instance"""
+    global _global_optimizer
+    if _global_optimizer is None:
+        _global_optimizer = PerformanceOptimizer(config or PerformanceConfig())
+    return _global_optimizer
+# Decorators for performance optimization
+def cache_response(cache_key_func: Callable[[Any], str], ttl: int = 3600):
+    """Decorator to cache function responses"""
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            optimizer = get_performance_optimizer()
+            if not optimizer.cache:
+                return await func(*args, **kwargs)
+            # Generate cache key
+            cache_key = cache_key_func(*args, **kwargs)
+            # Check cache
+            cached_result = await optimizer.cache.get(cache_key)
+            if cached_result is not None:
+                return cached_result
+            # Execute function
+            result = await func(*args, **kwargs)
+            # Cache result
+            if result is not None:
+                await optimizer.cache.set(cache_key, result)
+            return result
+        return wrapper
+    return decorator
+def rate_limit(max_concurrent: int = 5):
+    """Decorator to apply rate limiting"""
+    semaphore = asyncio.Semaphore(max_concurrent)
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            async with semaphore:
+                return await func(*args, **kwargs)
+        return wrapper
+    return decorator
+# Utility functions for cache key generation
+def generate_card_cache_key(
+    topic: str, subject: str, num_cards: int, difficulty: str, **kwargs
+) -> str:
+    """Generate cache key for card generation"""
+    key_data = {
+        "topic": topic,
+        "subject": subject,
+        "num_cards": num_cards,
+        "difficulty": difficulty,
+        "context": kwargs.get("context", {}),
+    }
+    key_str = json.dumps(key_data, sort_keys=True)
+    return f"cards:{hashlib.md5(key_str.encode()).hexdigest()}"
+def generate_judgment_cache_key(
+    cards: List[Card], judgment_type: str = "general"
+) -> str:
+    """Generate cache key for card judgment"""
+    # Use card content to generate stable hash
+    card_data = []
+    for card in cards:
+        card_data.append(
+            {
+                "question": card.front.question,
+                "answer": card.back.answer,
+                "type": card.card_type,
+            }
+        )
+    key_data = {"cards": card_data, "judgment_type": judgment_type}
+    key_str = json.dumps(key_data, sort_keys=True)
+    return f"judgment:{hashlib.md5(key_str.encode()).hexdigest()}"

ankigen/agents/schemas.py ADDED Viewed

	@@ -0,0 +1,189 @@

+"""
+Pydantic schemas for structured outputs from agents.
+These schemas ensure type safety and eliminate JSON parsing errors.
+"""
+from typing import List, Dict, Any, Optional
+from pydantic import BaseModel, Field
+from enum import Enum
+class DifficultyLevel(str, Enum):
+    """Difficulty levels for flashcards"""
+    BEGINNER = "beginner"
+    INTERMEDIATE = "intermediate"
+    ADVANCED = "advanced"
+class CardType(str, Enum):
+    """Types of flashcards"""
+    BASIC = "basic"
+    CLOZE = "cloze"
+class CardFrontSchema(BaseModel):
+    """Schema for the front of a flashcard"""
+    question: str = Field(..., description="The question or prompt for the flashcard")
+class CardBackSchema(BaseModel):
+    """Schema for the back of a flashcard"""
+    answer: str = Field(..., description="The main answer to the question")
+    explanation: str = Field(..., description="Detailed explanation of the answer")
+    example: str = Field(..., description="A concrete example illustrating the concept")
+class CardMetadataSchema(BaseModel):
+    """Schema for flashcard metadata"""
+    topic: str = Field(..., description="The main topic of the card")
+    subject: str = Field(..., description="The subject area (e.g., Biology, History)")
+    difficulty: DifficultyLevel = Field(..., description="The difficulty level")
+    tags: Optional[List[str]] = Field(
+        None, description="Relevant tags for categorization"
+    )
+    learning_outcomes: Optional[List[str]] = Field(
+        None, description="What the learner should achieve"
+    )
+    prerequisites: Optional[List[str]] = Field(
+        None, description="Required prior knowledge"
+    )
+    related_concepts: Optional[List[str]] = Field(
+        None, description="Related concepts to explore"
+    )
+    estimated_time: Optional[str] = Field(None, description="Estimated time to learn")
+    common_mistakes: Optional[List[str]] = Field(
+        None, description="Common mistakes to avoid"
+    )
+    memory_aids: Optional[List[str]] = Field(
+        None, description="Memory aids or mnemonics"
+    )
+    real_world_applications: Optional[List[str]] = Field(
+        None, description="Real-world applications"
+    )
+class CardSchema(BaseModel):
+    """Complete schema for a flashcard"""
+    card_type: CardType = Field(..., description="The type of flashcard")
+    front: CardFrontSchema = Field(..., description="The front of the card")
+    back: CardBackSchema = Field(..., description="The back of the card")
+    metadata: CardMetadataSchema = Field(..., description="Metadata about the card")
+    enhancement_notes: Optional[str] = Field(
+        None, description="Notes about enhancements made"
+    )
+class CardsGenerationSchema(BaseModel):
+    """Schema for multiple cards generation"""
+    cards: List[CardSchema] = Field(..., description="List of generated flashcards")
+class JudgeDecisionSchema(BaseModel):
+    """Schema for judge decisions"""
+    approved: bool = Field(..., description="Whether the card is approved")
+    score: float = Field(
+        ..., ge=0.0, le=1.0, description="Quality score between 0 and 1"
+    )
+    feedback: str = Field(..., description="Detailed feedback about the card")
+    improvements: Optional[List[str]] = Field(
+        None, description="Suggested improvements"
+    )
+    reasoning: str = Field(..., description="Detailed reasoning for the decision")
+    confidence: float = Field(
+        ..., ge=0.0, le=1.0, description="Confidence in the decision"
+    )
+    metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
+class EnhancementSchema(BaseModel):
+    """Schema for card enhancements"""
+    enhanced_card: CardSchema = Field(..., description="The enhanced flashcard")
+    enhancement_summary: str = Field(..., description="Summary of what was enhanced")
+    enhancement_details: Optional[Dict[str, Any]] = Field(
+        None, description="Detailed enhancement information"
+    )
+class GenerationRequestSchema(BaseModel):
+    """Schema for generation requests"""
+    topic: str = Field(..., description="The topic to generate cards for")
+    subject: str = Field(..., description="The subject area")
+    num_cards: int = Field(..., ge=1, le=20, description="Number of cards to generate")
+    difficulty: DifficultyLevel = Field(..., description="Target difficulty level")
+    context: Optional[Dict[str, Any]] = Field(None, description="Additional context")
+    preferences: Optional[Dict[str, Any]] = Field(None, description="User preferences")
+class TokenUsageSchema(BaseModel):
+    """Schema for token usage tracking"""
+    prompt_tokens: int = Field(..., ge=0, description="Number of tokens in the prompt")
+    completion_tokens: int = Field(
+        ..., ge=0, description="Number of tokens in the completion"
+    )
+    total_tokens: int = Field(..., ge=0, description="Total tokens used")
+    estimated_cost: float = Field(..., ge=0.0, description="Estimated cost in USD")
+    model: str = Field(..., description="Model used for the request")
+class AutoConfigSchema(BaseModel):
+    """Schema for auto-configuration based on subject analysis"""
+    # What to search for in Context7
+    library_search_term: str = Field(
+        ...,
+        description="Library name to search for in Context7 (e.g., 'pandas', 'react', 'tensorflow')",
+    )
+    # Specific topic within the library (optional)
+    documentation_focus: Optional[str] = Field(
+        None,
+        description="Specific topic/area within the library documentation to focus on",
+    )
+    # Suggested settings based on subject analysis
+    topic_number: int = Field(
+        ..., ge=2, le=20, description="Number of topics to generate (2-20)"
+    )
+    topics_list: List[str] = Field(
+        ...,
+        min_length=2,
+        max_length=20,
+        description="List of distinct subtopics to cover, ordered by learning progression",
+    )
+    cards_per_topic: int = Field(
+        ..., ge=2, le=30, description="Number of cards per topic (2-30)"
+    )
+    learning_preferences: str = Field(
+        ..., description="Learning preferences and focus areas for card generation"
+    )
+    generate_cloze: bool = Field(
+        ...,
+        description="Whether to generate cloze cards (true for syntax/code, false for concepts)",
+    )
+    model_choice: str = Field(
+        ...,
+        description="Recommended model: 'gpt-5.2-auto', 'gpt-5.2-instant', or 'gpt-5.2-thinking'",
+    )
+    # Analysis metadata
+    subject_type: str = Field(
+        ...,
+        description="Type of subject: 'concepts', 'syntax', 'api', 'theory', 'practical'",
+    )
+    scope: str = Field(
+        ..., description="Scope of the subject: 'narrow', 'medium', 'broad'"
+    )
+    rationale: str = Field(
+        ..., description="Brief explanation of why these settings were chosen"
+    )

ankigen/agents/security.py ADDED Viewed

	@@ -0,0 +1,397 @@

+# Security enhancements for agent system
+import time
+import hashlib
+import re
+from typing import Dict, Any, Optional, List
+from dataclasses import dataclass, field
+from datetime import datetime
+from collections import defaultdict
+import asyncio
+from ankigen.logging import logger
+@dataclass
+class RateLimitConfig:
+    """Configuration for rate limiting"""
+    requests_per_minute: int = 60
+    requests_per_hour: int = 1000
+    burst_limit: int = 10
+    cooldown_period: int = 300  # seconds
+@dataclass
+class SecurityConfig:
+    """Security configuration for agents"""
+    enable_input_validation: bool = True
+    enable_output_filtering: bool = True
+    enable_rate_limiting: bool = True
+    max_input_length: int = 10000
+    max_output_length: int = 50000
+    blocked_patterns: List[str] = field(default_factory=list)
+    allowed_file_extensions: List[str] = field(
+        default_factory=lambda: [".txt", ".md", ".json", ".yaml"]
+    )
+    def __post_init__(self):
+        if not self.blocked_patterns:
+            self.blocked_patterns = [
+                r"(?i)(api[_\-]?key|secret|password|token|credential)",
+                r"(?i)(sk-[a-zA-Z0-9]{48,})",  # OpenAI API key pattern
+                r"(?i)(access[_\-]?token)",
+                r"(?i)(private[_\-]?key)",
+                r"(?i)(<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>)",  # Script tags
+                r"(?i)(javascript:|data:|vbscript:)",  # URL schemes
+            ]
+class RateLimiter:
+    """Rate limiter for API calls and agent executions"""
+    def __init__(self, config: RateLimitConfig):
+        self.config = config
+        self._requests: Dict[str, List[float]] = defaultdict(list)
+        self._locks: Dict[str, asyncio.Lock] = defaultdict(asyncio.Lock)
+    async def check_rate_limit(self, identifier: str) -> bool:
+        """Check if request is within rate limits"""
+        async with self._locks[identifier]:
+            now = time.time()
+            # Clean old requests
+            self._requests[identifier] = [
+                req_time
+                for req_time in self._requests[identifier]
+                if now - req_time < 3600  # Keep last hour
+            ]
+            recent_requests = self._requests[identifier]
+            # Check burst limit (last minute)
+            last_minute = [req for req in recent_requests if now - req < 60]
+            if len(last_minute) >= self.config.burst_limit:
+                logger.warning(f"Burst limit exceeded for {identifier}")
+                return False
+            # Check per-minute limit
+            if len(last_minute) >= self.config.requests_per_minute:
+                logger.warning(f"Per-minute rate limit exceeded for {identifier}")
+                return False
+            # Check per-hour limit
+            if len(recent_requests) >= self.config.requests_per_hour:
+                logger.warning(f"Per-hour rate limit exceeded for {identifier}")
+                return False
+            # Record this request
+            self._requests[identifier].append(now)
+            return True
+    def get_reset_time(self, identifier: str) -> Optional[datetime]:
+        """Get when rate limits will reset for identifier"""
+        if identifier not in self._requests:
+            return None
+        now = time.time()
+        recent_requests = [req for req in self._requests[identifier] if now - req < 60]
+        if len(recent_requests) >= self.config.requests_per_minute:
+            oldest_request = min(recent_requests)
+            return datetime.fromtimestamp(oldest_request + 60)
+        return None
+class SecurityValidator:
+    """Security validator for agent inputs and outputs"""
+    def __init__(self, config: SecurityConfig):
+        self.config = config
+        self._blocked_patterns = [
+            re.compile(pattern) for pattern in config.blocked_patterns
+        ]
+    def validate_input(self, input_text: str, source: str = "unknown") -> bool:
+        """Validate input for security issues"""
+        if not self.config.enable_input_validation:
+            return True
+        try:
+            # Check input length
+            if len(input_text) > self.config.max_input_length:
+                logger.warning(f"Input too long from {source}: {len(input_text)} chars")
+                return False
+            # Check for blocked patterns
+            for pattern in self._blocked_patterns:
+                if pattern.search(input_text):
+                    logger.warning(f"Blocked pattern detected in input from {source}")
+                    return False
+            # Check for suspicious content
+            if self._contains_suspicious_content(input_text):
+                logger.warning(f"Suspicious content detected in input from {source}")
+                return False
+            return True
+        except Exception as e:
+            logger.error(f"Error validating input from {source}: {e}")
+            return False
+    def validate_output(self, output_text: str, agent_name: str = "unknown") -> bool:
+        """Validate output for security issues"""
+        if not self.config.enable_output_filtering:
+            return True
+        try:
+            # Check output length
+            if len(output_text) > self.config.max_output_length:
+                logger.warning(
+                    f"Output too long from {agent_name}: {len(output_text)} chars"
+                )
+                return False
+            # Check for leaked sensitive information
+            for pattern in self._blocked_patterns:
+                if pattern.search(output_text):
+                    logger.warning(
+                        f"Potential data leak detected in output from {agent_name}"
+                    )
+                    return False
+            return True
+        except Exception as e:
+            logger.error(f"Error validating output from {agent_name}: {e}")
+            return False
+    def sanitize_input(self, input_text: str) -> str:
+        """Sanitize input by removing potentially dangerous content"""
+        try:
+            # Remove HTML/XML tags
+            sanitized = re.sub(r"<[^>]+>", "", input_text)
+            # Remove suspicious URLs
+            sanitized = re.sub(
+                r"(?i)(javascript:|data:|vbscript:)[^\s]*", "[URL_REMOVED]", sanitized
+            )
+            # Truncate if too long
+            if len(sanitized) > self.config.max_input_length:
+                sanitized = sanitized[: self.config.max_input_length] + "...[TRUNCATED]"
+            return sanitized
+        except Exception as e:
+            logger.error(f"Error sanitizing input: {e}")
+            return input_text[:1000]  # Return truncated original as fallback
+    def sanitize_output(self, output_text: str) -> str:
+        """Sanitize output by removing sensitive information"""
+        try:
+            sanitized = output_text
+            # Replace potential API keys or secrets
+            for pattern in self._blocked_patterns:
+                sanitized = pattern.sub("[REDACTED]", sanitized)
+            # Truncate if too long
+            if len(sanitized) > self.config.max_output_length:
+                sanitized = (
+                    sanitized[: self.config.max_output_length] + "...[TRUNCATED]"
+                )
+            return sanitized
+        except Exception as e:
+            logger.error(f"Error sanitizing output: {e}")
+            return output_text[:5000]  # Return truncated original as fallback
+    def _contains_suspicious_content(self, text: str) -> bool:
+        """Check for suspicious content patterns"""
+        suspicious_patterns = [
+            r"(?i)(\beval\s*\()",  # eval() calls
+            r"(?i)(\bexec\s*\()",  # exec() calls
+            r"(?i)(__import__)",  # Dynamic imports
+            r"(?i)(subprocess|os\.system)",  # System commands
+            r"(?i)(file://|ftp://)",  # File/FTP URLs
+            r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b",  # IP addresses
+        ]
+        for pattern in suspicious_patterns:
+            if re.search(pattern, text):
+                return True
+        return False
+class SecureAgentWrapper:
+    """Secure wrapper for agent execution with rate limiting and validation"""
+    def __init__(
+        self, base_agent, rate_limiter: RateLimiter, validator: SecurityValidator
+    ):
+        self.base_agent = base_agent
+        self.rate_limiter = rate_limiter
+        self.validator = validator
+        self._identifier = self._generate_identifier()
+    def _generate_identifier(self) -> str:
+        """Generate unique identifier for rate limiting"""
+        agent_name = getattr(self.base_agent, "config", {}).get("name", "unknown")
+        # Include agent name and some randomness for fairness
+        return hashlib.md5(f"{agent_name}_{id(self.base_agent)}".encode()).hexdigest()[
+            :16
+        ]
+    async def secure_execute(
+        self, user_input: str, context: Dict[str, Any] = None
+    ) -> Any:
+        """Execute agent with security checks and rate limiting"""
+        # Rate limiting check
+        if not await self.rate_limiter.check_rate_limit(self._identifier):
+            reset_time = self.rate_limiter.get_reset_time(self._identifier)
+            raise SecurityError(f"Rate limit exceeded. Reset at: {reset_time}")
+        # Input validation
+        if not self.validator.validate_input(user_input, self._identifier):
+            raise SecurityError("Input validation failed")
+        # Sanitize input
+        sanitized_input = self.validator.sanitize_input(user_input)
+        try:
+            # Execute the base agent
+            result = await self.base_agent.execute(sanitized_input, context)
+            # Validate output
+            if isinstance(result, str):
+                if not self.validator.validate_output(result, self._identifier):
+                    raise SecurityError("Output validation failed")
+                # Sanitize output
+                result = self.validator.sanitize_output(result)
+            return result
+        except Exception as e:
+            logger.error(f"Secure execution failed for {self._identifier}: {e}")
+            raise
+class SecurityError(Exception):
+    """Custom exception for security-related errors"""
+    pass
+# Global security components
+_global_rate_limiter: Optional[RateLimiter] = None
+_global_validator: Optional[SecurityValidator] = None
+def get_rate_limiter(config: Optional[RateLimitConfig] = None) -> RateLimiter:
+    """Get global rate limiter instance"""
+    global _global_rate_limiter
+    if _global_rate_limiter is None:
+        _global_rate_limiter = RateLimiter(config or RateLimitConfig())
+    return _global_rate_limiter
+def get_security_validator(
+    config: Optional[SecurityConfig] = None,
+) -> SecurityValidator:
+    """Get global security validator instance"""
+    global _global_validator
+    if _global_validator is None:
+        _global_validator = SecurityValidator(config or SecurityConfig())
+    return _global_validator
+def create_secure_agent(
+    base_agent,
+    rate_config: Optional[RateLimitConfig] = None,
+    security_config: Optional[SecurityConfig] = None,
+) -> SecureAgentWrapper:
+    """Create a secure wrapper for an agent"""
+    rate_limiter = get_rate_limiter(rate_config)
+    validator = get_security_validator(security_config)
+    return SecureAgentWrapper(base_agent, rate_limiter, validator)
+# Configuration file permissions utility
+def set_secure_file_permissions(file_path: str):
+    """Set secure permissions for configuration files"""
+    try:
+        import os
+        import stat
+        # Set read/write for owner only (0o600)
+        os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR)
+        logger.info(f"Set secure permissions for {file_path}")
+    except Exception as e:
+        logger.warning(f"Could not set secure permissions for {file_path}: {e}")
+# Input validation utilities
+def strip_html_tags(text: str) -> str:
+    """Strip HTML tags from text (improved version)"""
+    import html
+    # Decode HTML entities first
+    text = html.unescape(text)
+    # Remove HTML/XML tags
+    text = re.sub(r"<[^>]+>", "", text)
+    # Remove remaining HTML entities
+    text = re.sub(r"&[a-zA-Z0-9#]+;", "", text)
+    # Clean up whitespace
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+def validate_api_key_format(api_key: str) -> bool:
+    """Validate OpenAI API key format without logging it"""
+    if not api_key:
+        return False
+    # Check basic format (starts with sk- and has correct length)
+    if not api_key.startswith("sk-"):
+        return False
+    if len(api_key) < 20:  # Minimum reasonable length
+        return False
+    # Check for obvious fake keys
+    fake_patterns = ["test", "fake", "demo", "example", "placeholder"]
+    lower_key = api_key.lower()
+    if any(pattern in lower_key for pattern in fake_patterns):
+        return False
+    return True
+# Logging security
+def sanitize_for_logging(text: str, max_length: int = 100) -> str:
+    """Sanitize text for safe logging"""
+    if not text:
+        return "[EMPTY]"
+    # Remove potential secrets
+    validator = get_security_validator()
+    sanitized = validator.sanitize_output(text)
+    # Truncate for logging
+    if len(sanitized) > max_length:
+        sanitized = sanitized[:max_length] + "...[TRUNCATED]"
+    return sanitized

ankigen/agents/templates/generators.j2 ADDED Viewed

	@@ -0,0 +1,16 @@

+{# Generator agent configuration template #}
+{
+    "subject_expert": {
+        "name": "subject_expert",
+        "instructions": "You are an expert in {{ subject | default('the subject area') }} with deep pedagogical knowledge. \nYour role is to generate flashcards that demonstrate mastery of {{ subject | default('the subject') }} concepts.\n\nKey responsibilities:\n- Create ATOMIC cards: short answers (1-9 words) when possible; keep explanations/examples brief; split complex info into multiple simple cards\n- Use clear, plain prompts without fancy formatting or unusual words\n- Design prompts that match real-life recall or decision points\n- For basic cards, keep to-be-learned info on the back; prompts are cues, not answers\n- For cloze cards, the deletion is the target; keep cloze sentences natural and minimal\n- Prefer why/how/when/if-then prompts and contrast pairs like A vs B\n- Include a few explain-to-a-non-expert/PM prompts to force clarity\n- Favor retrieval hooks and levers (failure modes, knobs) over proofs or derivations\n- Avoid long equations, derivations, and multi-paragraph cards\n- Ensure technical accuracy and depth appropriate for the target level\n- Use domain-specific terminology correctly\n- Connect concepts to prerequisite knowledge\n\nCard Types:\n- Basic cards (card_type='basic'): Standard Q&A format for concepts and facts\n- Cloze cards (card_type='cloze'): Fill-in-the-blank format using {{ '{{c1::answer}}' }} syntax for code, syntax, formulas\n\nFor cloze cards, wrap the answer in {{ '{{c1::text}}' }} format. Example: 'The vLLM class for inference is {{ '{{c1::LLM}}' }}'\n\nPrioritize atomic simplicity - break complex info into multiple simple cards. Generate cards that test understanding through simple, direct recall.",
+        "model": "{{ subject_expert_model }}",
+        "temperature": 0.7,
+        "timeout": 120.0,
+        "custom_prompts": {
+            "math": "Focus on problem-solving strategies and mathematical reasoning",
+            "science": "Emphasize experimental design and scientific method",
+            "history": "Connect events to broader historical patterns and causation",
+            "programming": "Include executable examples and best practices"
+        }
+    }
+}

ankigen/agents/token_tracker.py ADDED Viewed

	@@ -0,0 +1,221 @@

+"""
+Token usage tracking for OpenAI API calls using tiktoken.
+Provides accurate token counting and cost estimation.
+"""
+import tiktoken
+from typing import Dict, List, Any, Optional
+from dataclasses import dataclass, field
+from datetime import datetime
+from ankigen.logging import logger
+@dataclass
+class TokenUsage:
+    """Track token usage for a single request"""
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+    estimated_cost: Optional[float]
+    model: str
+    timestamp: datetime = field(default_factory=datetime.now)
+class TokenTracker:
+    """Track token usage across multiple requests"""
+    def __init__(self):
+        self.usage_history: List[TokenUsage] = []
+        self.total_cost = 0.0
+        self.total_tokens = 0
+    def count_tokens_for_messages(
+        self, messages: List[Dict[str, str]], model: str
+    ) -> int:
+        """
+        Count total tokens for a list of chat messages using tiktoken.
+        Implements OpenAI's token counting algorithm for chat completions:
+        - Each message adds 3 tokens for role/content/structure overhead
+        - Message names add an additional token
+        - The entire message list adds 3 tokens for conversation wrapper
+        The encoding is selected based on the model:
+        - Attempts to use model-specific encoding via tiktoken
+        - Falls back to 'o200k_base' (GPT-4 Turbo encoding) for unknown models
+        Args:
+            messages: List of message dicts (each with 'role', 'content', optional 'name')
+            model: OpenAI model identifier (e.g., 'gpt-5.2', 'gpt-4o')
+        Returns:
+            Total tokens required to send these messages to the model
+        """
+        try:
+            encoding = tiktoken.encoding_for_model(model)
+        except KeyError:
+            encoding = tiktoken.get_encoding("o200k_base")
+        tokens_per_message = 3
+        tokens_per_name = 1
+        num_tokens = 0
+        for message in messages:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                num_tokens += len(encoding.encode(str(value)))
+                if key == "name":
+                    num_tokens += tokens_per_name
+        num_tokens += 3
+        return num_tokens
+    def count_tokens_for_text(self, text: str, model: str) -> int:
+        try:
+            encoding = tiktoken.encoding_for_model(model)
+        except KeyError:
+            encoding = tiktoken.get_encoding("o200k_base")
+        return len(encoding.encode(text))
+    def track_usage_from_response(
+        self, response_data, model: str
+    ) -> Optional[TokenUsage]:
+        try:
+            if hasattr(response_data, "usage"):
+                usage = response_data.usage
+                prompt_tokens = usage.prompt_tokens
+                completion_tokens = usage.completion_tokens
+                actual_cost = None
+                if hasattr(usage, "total_cost"):
+                    actual_cost = usage.total_cost
+                elif hasattr(usage, "cost"):
+                    actual_cost = usage.cost
+                return self.track_usage(
+                    prompt_tokens, completion_tokens, model, actual_cost
+                )
+            return None
+        except Exception as e:
+            logger.error(f"Failed to track usage from response: {e}")
+            return None
+    def track_usage(
+        self,
+        prompt_tokens: int,
+        completion_tokens: int,
+        model: str,
+        actual_cost: Optional[float] = None,
+    ) -> TokenUsage:
+        total_tokens = prompt_tokens + completion_tokens
+        final_cost = actual_cost  # Cost estimation removed - rely on API-provided costs
+        usage = TokenUsage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens,
+            estimated_cost=final_cost,
+            model=model,
+        )
+        self.usage_history.append(usage)
+        if final_cost:
+            self.total_cost += final_cost
+        self.total_tokens += total_tokens
+        logger.info(
+            f"💰 Token usage - Model: {model}, Prompt: {prompt_tokens}, Completion: {completion_tokens}, Cost: ${final_cost:.4f}"
+            if final_cost
+            else f"💰 Token usage - Model: {model}, Prompt: {prompt_tokens}, Completion: {completion_tokens}"
+        )
+        return usage
+    def get_session_summary(self) -> Dict[str, Any]:
+        if not self.usage_history:
+            return {
+                "total_requests": 0,
+                "total_tokens": 0,
+                "total_cost": 0.0,
+                "by_model": {},
+            }
+        by_model = {}
+        for usage in self.usage_history:
+            if usage.model not in by_model:
+                by_model[usage.model] = {"requests": 0, "tokens": 0, "cost": 0.0}
+            by_model[usage.model]["requests"] += 1
+            by_model[usage.model]["tokens"] += usage.total_tokens
+            if usage.estimated_cost:
+                by_model[usage.model]["cost"] += usage.estimated_cost
+        return {
+            "total_requests": len(self.usage_history),
+            "total_tokens": self.total_tokens,
+            "total_cost": self.total_cost,
+            "by_model": by_model,
+        }
+    def get_session_usage(self) -> Dict[str, Any]:
+        return self.get_session_summary()
+    def reset_session(self):
+        self.usage_history.clear()
+        self.total_cost = 0.0
+        self.total_tokens = 0
+        logger.info("🔄 Token usage tracking reset")
+    def track_usage_from_agents_sdk(
+        self, usage_dict: Dict[str, Any], model: str
+    ) -> Optional[TokenUsage]:
+        """Track usage from OpenAI Agents SDK usage format"""
+        try:
+            if not usage_dict or usage_dict.get("total_tokens", 0) == 0:
+                return None
+            prompt_tokens = usage_dict.get("input_tokens", 0)
+            completion_tokens = usage_dict.get("output_tokens", 0)
+            return self.track_usage(prompt_tokens, completion_tokens, model)
+        except Exception as e:
+            logger.error(f"Failed to track usage from agents SDK: {e}")
+            return None
+# Global token tracker instance
+_global_tracker = TokenTracker()
+def get_token_tracker() -> TokenTracker:
+    return _global_tracker
+def track_agent_usage(
+    prompt_text: str,
+    completion_text: str,
+    model: str,
+    actual_cost: Optional[float] = None,
+) -> TokenUsage:
+    tracker = get_token_tracker()
+    prompt_tokens = tracker.count_tokens_for_text(prompt_text, model)
+    completion_tokens = tracker.count_tokens_for_text(completion_text, model)
+    return tracker.track_usage(prompt_tokens, completion_tokens, model, actual_cost)
+def track_usage_from_openai_response(response_data, model: str) -> Optional[TokenUsage]:
+    tracker = get_token_tracker()
+    return tracker.track_usage_from_response(response_data, model)
+def track_usage_from_agents_sdk(
+    usage_dict: Dict[str, Any], model: str
+) -> Optional[TokenUsage]:
+    """Track usage from OpenAI Agents SDK usage format"""
+    tracker = get_token_tracker()
+    return tracker.track_usage_from_agents_sdk(usage_dict, model)

ankigen/auto_config.py ADDED Viewed

	@@ -0,0 +1,211 @@

+"""Auto-configuration service for intelligent settings population"""
+from typing import Dict, Any
+from openai import AsyncOpenAI
+from ankigen.logging import logger
+from ankigen.context7 import Context7Client
+from ankigen.agents.schemas import AutoConfigSchema
+from ankigen.llm_interface import structured_agent_call
+class AutoConfigService:
+    """Service for analyzing subjects and auto-configuring flashcard generation settings"""
+    def __init__(self):
+        self.context7_client = Context7Client()
+    async def analyze_subject(
+        self,
+        subject: str,
+        openai_client: AsyncOpenAI,
+        target_topic_count: int | None = None,
+    ) -> AutoConfigSchema:
+        """Analyze a subject string and return configuration settings.
+        Args:
+            subject: The subject to analyze
+            openai_client: OpenAI client for LLM calls
+            target_topic_count: If provided, forces exactly this many topics in decomposition
+        """
+        # Build topic count instruction if override provided
+        topic_count_instruction = ""
+        if target_topic_count is not None:
+            topic_count_instruction = f"""
+IMPORTANT OVERRIDE: The user has requested exactly {target_topic_count} topics.
+You MUST set topic_number to {target_topic_count} and provide exactly {target_topic_count} items in topics_list.
+Choose the {target_topic_count} most important/foundational subtopics for this subject.
+"""
+        system_prompt = f"""You are an educational content analyzer specializing in spaced repetition learning. Analyze the given subject and determine flashcard generation settings that focus on ESSENTIAL concepts.
+{topic_count_instruction}
+CRITICAL PRINCIPLE: Quality over quantity. Focus on fundamental concepts that unlock understanding, not trivial facts.
+Consider:
+1. Extract any library/framework names for Context7 search (e.g., "pandas", "react", "tensorflow")
+2. IMPORTANT: Extract the specific documentation focus from the subject
+   - "Basic Pandas Dataframe" → documentation_focus: "dataframe basics, creation, indexing"
+   - "React hooks tutorial" → documentation_focus: "hooks, useState, useEffect"
+   - "Docker networking" → documentation_focus: "networking, network drivers, container communication"
+3. Identify the scope: narrow (specific feature), medium (several related topics), broad (full overview)
+4. Determine content type: concepts (theory/understanding), syntax (code/commands), api (library usage), practical (hands-on skills)
+5. TOPIC DECOMPOSITION: Break down the subject into distinct subtopics that together provide comprehensive coverage
+6. Recommend cloze cards for syntax/code, basic cards for concepts
+7. Choose model based on complexity: gpt-5.2-thinking for complex topics, gpt-5.2-instant for basic/simple, gpt-5.2-auto for mixed scope
+   - Valid model_choice values: "gpt-5.2-auto", "gpt-5.2-instant", "gpt-5.2-thinking"
+TOPIC DECOMPOSITION (topics_list):
+You MUST provide a topics_list - a list of distinct subtopics that together cover the subject comprehensively.
+- Each topic should be specific and non-overlapping
+- Order topics from foundational to advanced (learning progression)
+- The number of topics should match topic_number
+Examples:
+- "React Hooks" → topics_list: ["useState fundamentals", "useEffect and lifecycle", "useRef and useContext", "custom hooks patterns", "performance with useMemo/useCallback", "testing hooks"]
+- "Docker basics" → topics_list: ["containers vs VMs", "images and Dockerfile", "container lifecycle", "volumes and persistence", "networking fundamentals", "docker-compose basics"]
+- "Machine Learning" → topics_list: ["supervised vs unsupervised", "regression models", "classification models", "model evaluation metrics", "overfitting and regularization", "feature engineering", "cross-validation"]
+IMPORTANT - Focus on HIGH-VALUE topics:
+- GOOD topics: Core concepts, fundamental principles, mental models, design patterns, key abstractions
+- AVOID topics: Trivial commands (like "docker ps"), basic syntax that's easily googled, minor API details
+Guidelines for settings (MINIMUM 30 cards total):
+- Narrow/specific scope: 4-5 essential topics with 8-10 cards each (32-50 cards)
+- Medium scope: 5-7 core topics with 7-9 cards each (35-63 cards)
+- Broad scope: 6-8 fundamental topics with 6-8 cards each (36-64 cards)
+- "Basic"/"Introduction" keywords: Start with fundamentals, 40-50 cards total
+- "Complex" keywords: Deep dive into critical concepts, 45-60 cards
+Learning preference suggestions:
+- For basics: "Focus on fundamental concepts and mental models that form the foundation"
+- For practical: "Emphasize core patterns and principles with real-world applications"
+- For theory: "Build deep conceptual understanding with progressive complexity"
+Return a JSON object matching the AutoConfigSchema."""
+        user_prompt = f"""Analyze this subject for flashcard generation: "{subject}"
+Extract:
+1. The library name if mentioned
+2. The specific documentation focus (what aspects of the library to focus on)
+3. Suggested settings for effective learning
+Provide a brief rationale for your choices."""
+        try:
+            config = await structured_agent_call(
+                openai_client=openai_client,
+                model="gpt-5.2",
+                instructions=system_prompt,
+                user_input=user_prompt,
+                output_type=AutoConfigSchema,
+                temperature=0.3,  # Lower temperature for more consistent analysis
+            )
+            logger.info(
+                f"Subject analysis complete: library='{config.library_search_term}', "
+                f"topics={config.topic_number}, cards/topic={config.cards_per_topic}"
+            )
+            return config
+        except Exception as e:
+            logger.error(f"Failed to analyze subject: {e}")
+            # Return sensible defaults on error (still aim for good card count)
+            # Use the subject as a single topic as fallback
+            return AutoConfigSchema(
+                library_search_term="",
+                documentation_focus=None,
+                topic_number=6,
+                topics_list=[
+                    f"{subject} - fundamentals",
+                    f"{subject} - core concepts",
+                    f"{subject} - practical applications",
+                    f"{subject} - common patterns",
+                    f"{subject} - best practices",
+                    f"{subject} - advanced topics",
+                ],
+                cards_per_topic=8,
+                learning_preferences="Focus on fundamental concepts and core principles with practical examples",
+                generate_cloze=False,
+                model_choice="gpt-5.2-auto",
+                subject_type="concepts",
+                scope="medium",
+                rationale="Using default settings due to analysis error",
+            )
+    async def auto_configure(
+        self,
+        subject: str,
+        openai_client: AsyncOpenAI,
+        target_topic_count: int | None = None,
+    ) -> Dict[str, Any]:
+        """
+        Complete auto-configuration pipeline:
+        1. Analyze subject with AI
+        2. Search Context7 for library if detected
+        3. Return complete configuration for UI
+        Args:
+            subject: The subject to analyze
+            openai_client: OpenAI client for LLM calls
+            target_topic_count: If provided, forces exactly this many topics
+        """
+        if not subject or not subject.strip():
+            logger.warning("Empty subject provided to auto_configure")
+            return {}
+        logger.info(f"Starting auto-configuration for subject: '{subject}'")
+        # Step 1: Analyze the subject
+        config = await self.analyze_subject(
+            subject, openai_client, target_topic_count=target_topic_count
+        )
+        # Step 2: Search Context7 for library if one was detected
+        library_id = None
+        if config.library_search_term:
+            logger.info(
+                f"Searching Context7 for library: '{config.library_search_term}'"
+            )
+            try:
+                library_id = await self.context7_client.resolve_library_id(
+                    config.library_search_term
+                )
+                if library_id:
+                    logger.info(f"Resolved library to Context7 ID: {library_id}")
+                else:
+                    logger.warning(
+                        f"Could not find library '{config.library_search_term}' in Context7"
+                    )
+            except Exception as e:
+                logger.error(f"Context7 search failed: {e}")
+        # Step 3: Build complete configuration dict for UI
+        ui_config = {
+            "library_name": config.library_search_term if library_id else "",
+            "library_topic": config.documentation_focus or "",
+            "topic_number": config.topic_number,
+            "topics_list": config.topics_list,
+            "cards_per_topic": config.cards_per_topic,
+            "preference_prompt": config.learning_preferences,
+            "generate_cloze_checkbox": config.generate_cloze,
+            "model_choice": config.model_choice,
+            # Metadata for display
+            "analysis_metadata": {
+                "subject_type": config.subject_type,
+                "scope": config.scope,
+                "rationale": config.rationale,
+                "library_found": library_id is not None,
+                "context7_id": library_id,
+            },
+        }
+        logger.info(
+            f"Auto-configuration complete: library={'found' if library_id else 'not found'}, "
+            f"topics={config.topic_number}, model={config.model_choice}"
+        )
+        return ui_config

ankigen/card_generator.py ADDED Viewed

	@@ -0,0 +1,311 @@

+# Module for core card generation logic
+import gradio as gr
+import pandas as pd
+from typing import List, Dict, Any
+# Imports from our core modules
+from ankigen.utils import (
+    get_logger,
+    ResponseCache,
+    strip_html_tags,
+)
+from ankigen.llm_interface import OpenAIClientManager
+from ankigen.models import (
+    Card,
+)  # Import necessary Pydantic models
+# Import agent system - required
+from ankigen.agents.integration import AgentOrchestrator
+from agents import set_tracing_disabled
+logger = get_logger()
+# Disable tracing to prevent metrics persistence issues
+set_tracing_disabled(True)
+AGENTS_AVAILABLE = True
+logger.info("Agent system loaded successfully")
+# --- Constants --- (Moved from app.py)
+AVAILABLE_MODELS = [
+    {
+        "value": "gpt-5.2-auto",
+        "label": "GPT-5.2 Auto",
+        "description": "Adaptive reasoning",
+    },
+    {
+        "value": "gpt-5.2-instant",
+        "label": "GPT-5.2 Instant",
+        "description": "Fast, minimal reasoning",
+    },
+    {
+        "value": "gpt-5.2-thinking",
+        "label": "GPT-5.2 Thinking",
+        "description": "Higher reasoning effort",
+    },
+]
+GENERATION_MODES = [
+    {
+        "value": "subject",
+        "label": "Single Subject",
+        "description": "Generate cards for a specific topic",
+    },
+]
+# --- Core Functions --- (Moved and adapted from app.py)
+# Legacy functions removed - all card generation now handled by agent system
+def _parse_model_selection(model_selection: str) -> tuple[str, str | None]:
+    """Parse model selection into model name and reasoning effort."""
+    if not model_selection:
+        return "gpt-5.2", None
+    normalized = model_selection.strip().lower()
+    if normalized == "gpt-5.2-auto":
+        return "gpt-5.2", None
+    if normalized == "gpt-5.2-instant":
+        return "gpt-5.2", "none"
+    if normalized == "gpt-5.2-thinking":
+        return "gpt-5.2", "high"
+    if "gpt-5.2" in normalized:
+        if "instant" in normalized:
+            return "gpt-5.2", "none"
+        if "thinking" in normalized:
+            return "gpt-5.2", "high"
+        if "auto" in normalized:
+            return "gpt-5.2", None
+        return "gpt-5.2", None
+    # Fallback for direct model names
+    return model_selection, None
+def _map_generation_mode_to_subject(generation_mode: str, subject: str) -> str:
+    """Map UI generation mode to agent subject."""
+    if generation_mode == "subject":
+        return subject if subject else "general"
+    elif generation_mode == "path":
+        return "curriculum_design"
+    elif generation_mode == "text":
+        return "content_analysis"
+    return "general"
+def _build_generation_context(generation_mode: str, source_text: str) -> Dict[str, Any]:
+    """Build context dict for card generation."""
+    context: Dict[str, Any] = {}
+    if generation_mode == "text" and source_text:
+        context["source_text"] = source_text
+    return context
+def _get_token_usage_html(token_tracker) -> str:
+    """Extract token usage and format as HTML."""
+    try:
+        if hasattr(token_tracker, "get_session_summary"):
+            token_usage = token_tracker.get_session_summary()
+        elif hasattr(token_tracker, "get_session_usage"):
+            token_usage = token_tracker.get_session_usage()
+        else:
+            raise AttributeError("TokenTracker has no session summary method")
+        return f"<div style='margin-top: 8px;'><b>Token Usage:</b> {token_usage['total_tokens']} tokens</div>"
+    except Exception as e:
+        logger.error(f"Token usage collection failed: {e}")
+        return "<div style='margin-top: 8px;'><b>Token Usage:</b> No usage data</div>"
+def _format_cards_to_dataframe(
+    agent_cards: List[Card], subject: str
+) -> tuple[pd.DataFrame, str]:
+    """Format agent cards to DataFrame and generate message."""
+    formatted_cards = format_cards_for_dataframe(
+        agent_cards,
+        topic_name=subject if subject else "General",
+        start_index=1,
+    )
+    output_df = pd.DataFrame(formatted_cards, columns=get_dataframe_columns())
+    total_cards_message = f"<div><b>Cards Generated:</b> <span id='total-cards-count'>{len(output_df)}</span></div>"
+    return output_df, total_cards_message
+async def orchestrate_card_generation(
+    client_manager: OpenAIClientManager,
+    cache: ResponseCache,
+    api_key_input: str,
+    subject: str,
+    generation_mode: str,
+    source_text: str,
+    url_input: str,
+    model_name: str,
+    topic_number: int,
+    cards_per_topic: int,
+    preference_prompt: str,
+    generate_cloze: bool,
+    use_llm_judge: bool = False,
+    library_name: str = None,
+    library_topic: str = None,
+    topics_list: List[str] = None,
+):
+    """Orchestrates the card generation process based on UI inputs."""
+    logger.info(f"Starting card generation orchestration in {generation_mode} mode")
+    logger.debug(
+        f"Parameters: mode={generation_mode}, topics={topic_number}, "
+        f"cards_per_topic={cards_per_topic}, cloze={generate_cloze}"
+    )
+    if not AGENTS_AVAILABLE:
+        logger.error("Agent system is required but not available")
+        gr.Error("Agent system is required but not available")
+        return pd.DataFrame(columns=get_dataframe_columns()), "Agent system error", ""
+    try:
+        from ankigen.agents.token_tracker import get_token_tracker
+        token_tracker = get_token_tracker()
+        orchestrator = AgentOrchestrator(client_manager)
+        model_name_resolved, reasoning_effort = _parse_model_selection(model_name)
+        logger.info(f"Using {model_name_resolved} for SubjectExpertAgent")
+        await orchestrator.initialize(
+            api_key_input,
+            {"subject_expert": model_name_resolved},
+            {"subject_expert": reasoning_effort},
+        )
+        agent_subject = _map_generation_mode_to_subject(generation_mode, subject)
+        context = _build_generation_context(generation_mode, source_text)
+        if preference_prompt:
+            context["learning_preferences"] = preference_prompt
+        total_cards_needed = topic_number * cards_per_topic
+        agent_cards, agent_metadata = await orchestrator.generate_cards_with_agents(
+            topic=subject if subject else "Mixed Topics",
+            subject=agent_subject,
+            num_cards=total_cards_needed,
+            difficulty="intermediate",
+            context=context,
+            library_name=library_name,
+            library_topic=library_topic,
+            generate_cloze=generate_cloze,
+            topics_list=topics_list,
+            cards_per_topic=cards_per_topic,
+        )
+        token_usage_html = _get_token_usage_html(token_tracker)
+        if agent_cards:
+            output_df, total_cards_message = _format_cards_to_dataframe(
+                agent_cards, subject
+            )
+            logger.info(f"Agent system generated {len(output_df)} cards successfully")
+            return output_df, total_cards_message, token_usage_html
+        logger.error("Agent system returned no cards")
+        gr.Error("Agent system returned no cards")
+        return (
+            pd.DataFrame(columns=get_dataframe_columns()),
+            "Agent system returned no cards.",
+            "",
+        )
+    except Exception as e:
+        logger.error(f"Agent system failed: {e}")
+        gr.Error(f"Agent system error: {str(e)}")
+        return (
+            pd.DataFrame(columns=get_dataframe_columns()),
+            f"Agent system error: {str(e)}",
+            "",
+        )
+# Legacy helper functions removed - all processing now handled by agent system
+# --- Formatting and Utility Functions --- (Moved and adapted)
+def format_cards_for_dataframe(
+    cards: list[Card], topic_name: str, topic_index: int = 0, start_index: int = 1
+) -> list:
+    """Formats a list of Card objects into a list of dictionaries for DataFrame display.
+    Ensures all data is plain text.
+    """
+    formatted_cards = []
+    for i, card_obj in enumerate(cards):
+        actual_index = start_index + i
+        card_type = card_obj.card_type or "basic"
+        question = card_obj.front.question or ""
+        answer = card_obj.back.answer or ""
+        explanation = card_obj.back.explanation or ""
+        example = card_obj.back.example or ""
+        # Metadata processing
+        metadata = card_obj.metadata or {}
+        prerequisites = metadata.get("prerequisites", [])
+        learning_outcomes = metadata.get("learning_outcomes", [])
+        difficulty = metadata.get("difficulty", "N/A")
+        # Ensure list-based metadata are joined as plain strings for DataFrame
+        prerequisites_str = strip_html_tags(
+            ", ".join(prerequisites)
+            if isinstance(prerequisites, list)
+            else str(prerequisites)
+        )
+        learning_outcomes_str = strip_html_tags(
+            ", ".join(learning_outcomes)
+            if isinstance(learning_outcomes, list)
+            else str(learning_outcomes)
+        )
+        difficulty_str = strip_html_tags(str(difficulty))
+        formatted_card = {
+            "Index": (
+                f"{topic_index}.{actual_index}"
+                if topic_index > 0
+                else str(actual_index)
+            ),
+            "Topic": strip_html_tags(topic_name),  # Ensure topic is also plain
+            "Card_Type": strip_html_tags(card_type),
+            "Question": question,  # Already stripped during Card object creation
+            "Answer": answer,  # Already stripped
+            "Explanation": explanation,  # Already stripped
+            "Example": example,  # Already stripped
+            "Prerequisites": prerequisites_str,
+            "Learning_Outcomes": learning_outcomes_str,
+            "Difficulty": difficulty_str,  # Ensure difficulty is plain text
+            "Source_URL": strip_html_tags(
+                metadata.get("source_url", "")
+            ),  # Ensure Source_URL is plain
+        }
+        formatted_cards.append(formatted_card)
+    return formatted_cards
+def get_dataframe_columns() -> list[str]:
+    """Returns the standard list of columns for the Anki card DataFrame."""
+    return [
+        "Index",
+        "Topic",
+        "Card_Type",
+        "Question",
+        "Answer",
+        "Explanation",
+        "Example",
+        "Prerequisites",
+        "Learning_Outcomes",
+        "Difficulty",
+        "Source_URL",
+    ]
+def generate_token_usage_html(token_usage=None):
+    """Generate HTML for token usage display"""
+    if token_usage and isinstance(token_usage, dict):
+        total_tokens = token_usage.get("total_tokens", 0)
+        return f"<div style='margin-top: 8px;'><b>Token Usage:</b> {total_tokens} tokens</div>"
+    else:
+        return "<div style='margin-top: 8px;'><b>Token Usage:</b> No usage data</div>"

ankigen/cli.py ADDED Viewed

	@@ -0,0 +1,372 @@

+"""CLI interface for AnkiGen - Generate Anki flashcards from the command line"""
+import asyncio
+import os
+import sys
+from pathlib import Path
+from typing import Optional
+import click
+import pandas as pd
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
+from rich.table import Table
+from rich.panel import Panel
+from ankigen.agents.token_tracker import get_token_tracker
+from ankigen.auto_config import AutoConfigService
+from ankigen.card_generator import orchestrate_card_generation
+from ankigen.exporters import export_dataframe_to_apkg, export_dataframe_to_csv
+from ankigen.llm_interface import OpenAIClientManager
+from ankigen.utils import ResponseCache, get_logger
+console = Console()
+logger = get_logger()
+def get_api_key() -> str:
+    """Get OpenAI API key from env or prompt user"""
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        console.print("[yellow]OpenAI API key not found in environment[/yellow]")
+        api_key = click.prompt("Enter your OpenAI API key", hide_input=True)
+    return api_key
+async def auto_configure_from_prompt(
+    prompt: str,
+    api_key: str,
+    override_topics: Optional[int] = None,
+    override_cards: Optional[int] = None,
+    override_model: Optional[str] = None,
+) -> dict:
+    """Auto-configure settings from a prompt using AI analysis"""
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        console=console,
+    ) as progress:
+        progress.add_task("Analyzing subject...", total=None)
+        # Initialize client
+        client_manager = OpenAIClientManager()
+        await client_manager.initialize_client(api_key)
+        openai_client = client_manager.get_client()
+        # Get auto-config (pass topic count override so LLM decomposes correctly)
+        auto_config_service = AutoConfigService()
+        config = await auto_config_service.auto_configure(
+            prompt, openai_client, target_topic_count=override_topics
+        )
+    # Apply remaining overrides (topics already handled in auto_configure)
+    if override_cards is not None:
+        config["cards_per_topic"] = override_cards
+    if override_model is not None:
+        config["model_choice"] = override_model
+    # Display configuration
+    table = Table(
+        title="Auto-Configuration", show_header=True, header_style="bold cyan"
+    )
+    table.add_column("Setting", style="dim")
+    table.add_column("Value", style="green")
+    table.add_row("Topics", str(config.get("topic_number", "N/A")))
+    table.add_row("Cards per Topic", str(config.get("cards_per_topic", "N/A")))
+    table.add_row(
+        "Total Cards",
+        str(config.get("topic_number", 0) * config.get("cards_per_topic", 0)),
+    )
+    table.add_row("Model", config.get("model_choice", "N/A"))
+    if config.get("library_name"):
+        table.add_row("Library", config.get("library_name"))
+    if config.get("library_topic"):
+        table.add_row("Library Topic", config.get("library_topic"))
+    # Display discovered topics
+    if config.get("topics_list"):
+        topics = config["topics_list"]
+        # Show first few topics, indicate if there are more
+        if len(topics) <= 4:
+            topics_str = ", ".join(topics)
+        else:
+            topics_str = ", ".join(topics[:3]) + f", ... (+{len(topics) - 3} more)"
+        table.add_row("Subtopics", topics_str)
+    if config.get("preference_prompt"):
+        table.add_row(
+            "Learning Focus", config.get("preference_prompt", "")[:50] + "..."
+        )
+    console.print(table)
+    return config
+async def generate_cards_from_config(
+    prompt: str,
+    config: dict,
+    api_key: str,
+) -> tuple:
+    """Generate cards using the configuration"""
+    client_manager = OpenAIClientManager()
+    response_cache = ResponseCache()
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+        console=console,
+    ) as progress:
+        task = progress.add_task(
+            f"Generating {config['topic_number'] * config['cards_per_topic']} cards...",
+            total=100,
+        )
+        # Generate cards
+        (
+            output_df,
+            total_cards_html,
+            token_usage_html,
+        ) = await orchestrate_card_generation(
+            client_manager=client_manager,
+            cache=response_cache,
+            api_key_input=api_key,
+            subject=prompt,
+            generation_mode="subject",
+            source_text="",
+            url_input="",
+            model_name=config.get("model_choice", "gpt-5.2-auto"),
+            topic_number=config.get("topic_number", 3),
+            cards_per_topic=config.get("cards_per_topic", 5),
+            preference_prompt=config.get("preference_prompt", ""),
+            generate_cloze=config.get("generate_cloze_checkbox", False),
+            library_name=config.get("library_name")
+            if config.get("library_name")
+            else None,
+            library_topic=config.get("library_topic")
+            if config.get("library_topic")
+            else None,
+            topics_list=config.get("topics_list"),
+        )
+        progress.update(task, completed=100)
+    return output_df, total_cards_html, token_usage_html
+def export_cards(
+    df: pd.DataFrame,
+    output_path: str,
+    deck_name: str,
+    export_format: str = "apkg",
+) -> str:
+    """Export cards to file"""
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        console=console,
+    ) as progress:
+        progress.add_task(f"Exporting to {export_format.upper()}...", total=None)
+        if export_format == "apkg":
+            # Ensure .apkg extension
+            if not output_path.endswith(".apkg"):
+                output_path = (
+                    output_path.replace(".csv", ".apkg")
+                    if ".csv" in output_path
+                    else f"{output_path}.apkg"
+                )
+            exported_path = export_dataframe_to_apkg(df, output_path, deck_name)
+        else:  # csv
+            # Ensure .csv extension
+            if not output_path.endswith(".csv"):
+                output_path = (
+                    output_path.replace(".apkg", ".csv")
+                    if ".apkg" in output_path
+                    else f"{output_path}.csv"
+                )
+            exported_path = export_dataframe_to_csv(df, output_path)
+    return exported_path
+@click.command()
+@click.option(
+    "-p",
+    "--prompt",
+    required=True,
+    help="Subject or topic for flashcard generation (e.g., 'Basic SQL', 'React Hooks')",
+)
+@click.option(
+    "--topics",
+    type=int,
+    help="Number of topics (auto-detected if not specified)",
+)
+@click.option(
+    "--cards-per-topic",
+    type=int,
+    help="Number of cards per topic (auto-detected if not specified)",
+)
+@click.option(
+    "--model",
+    type=click.Choice(
+        ["gpt-5.2-auto", "gpt-5.2-instant", "gpt-5.2-thinking"],
+        case_sensitive=False,
+    ),
+    help="Model to use for generation (auto-selected if not specified)",
+)
+@click.option(
+    "-o",
+    "--output",
+    default="deck.apkg",
+    help="Output file path (default: deck.apkg)",
+)
+@click.option(
+    "--format",
+    "export_format",
+    type=click.Choice(["apkg", "csv"], case_sensitive=False),
+    default="apkg",
+    help="Export format (default: apkg)",
+)
+@click.option(
+    "--api-key",
+    envvar="OPENAI_API_KEY",
+    help="OpenAI API key (or set OPENAI_API_KEY env var)",
+)
+@click.option(
+    "--no-confirm",
+    is_flag=True,
+    help="Skip confirmation prompt",
+)
+def main(
+    prompt: str,
+    topics: Optional[int],
+    cards_per_topic: Optional[int],
+    model: Optional[str],
+    output: str,
+    export_format: str,
+    api_key: Optional[str],
+    no_confirm: bool,
+):
+    """
+    AnkiGen CLI - Generate Anki flashcards from the command line
+    Examples:
+      # Quick generation with auto-config
+      ankigen -p "Basic SQL"
+      # With custom settings
+      ankigen -p "React Hooks" --topics 5 --cards-per-topic 8 --output hooks.apkg
+      # Export to CSV
+      ankigen -p "Docker basics" --format csv -o docker.csv
+    """
+    # Print header
+    console.print(
+        Panel.fit(
+            "[bold cyan]AnkiGen CLI[/bold cyan]\n[dim]Generate Anki flashcards with AI[/dim]",
+            border_style="cyan",
+        )
+    )
+    console.print()
+    # Get API key
+    if not api_key:
+        api_key = get_api_key()
+    # Run async workflow
+    async def workflow():
+        try:
+            # Step 1: Auto-configure
+            console.print(f"[bold]Subject:[/bold] {prompt}\n")
+            config = await auto_configure_from_prompt(
+                prompt=prompt,
+                api_key=api_key,
+                override_topics=topics,
+                override_cards=cards_per_topic,
+                override_model=model,
+            )
+            # Step 2: Confirm (unless --no-confirm)
+            if not no_confirm:
+                console.print()
+                if not click.confirm("Proceed with card generation?", default=True):
+                    console.print("[yellow]Cancelled[/yellow]")
+                    return
+            console.print()
+            # Step 3: Generate cards
+            df, total_html, token_html = await generate_cards_from_config(
+                prompt=prompt,
+                config=config,
+                api_key=api_key,
+            )
+            if df.empty:
+                console.print("[red]✗[/red] No cards generated")
+                sys.exit(1)
+            # Step 4: Export
+            console.print()
+            deck_name = f"AnkiGen - {prompt}"
+            exported_path = export_cards(
+                df=df,
+                output_path=output,
+                deck_name=deck_name,
+                export_format=export_format,
+            )
+            # Step 5: Success summary
+            console.print()
+            file_size = Path(exported_path).stat().st_size / 1024  # KB
+            summary = Table.grid(padding=(0, 2))
+            summary.add_row("[green]✓[/green] Success!", "")
+            summary.add_row("Cards Generated:", f"[bold]{len(df)}[/bold]")
+            summary.add_row("Output File:", f"[bold]{exported_path}[/bold]")
+            summary.add_row("File Size:", f"{file_size:.1f} KB")
+            # Get token usage from tracker
+            tracker = get_token_tracker()
+            session = tracker.get_session_summary()
+            if session["total_tokens"] > 0:
+                # Calculate totals across all models
+                total_input = sum(u.prompt_tokens for u in tracker.usage_history)
+                total_output = sum(u.completion_tokens for u in tracker.usage_history)
+                summary.add_row(
+                    "Tokens:",
+                    f"{total_input:,} in / {total_output:,} out ({session['total_tokens']:,} total)",
+                )
+            console.print(
+                Panel(summary, border_style="green", title="Generation Complete")
+            )
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Cancelled by user[/yellow]")
+            sys.exit(130)
+        except Exception as e:
+            logger.error(f"CLI error: {e}", exc_info=True)
+            console.print(f"[red]✗ Error:[/red] {str(e)}")
+            sys.exit(1)
+    # Run the async workflow
+    asyncio.run(workflow())
+if __name__ == "__main__":
+    main()

ankigen/context7.py ADDED Viewed

	@@ -0,0 +1,339 @@

+"""Context7 integration for library documentation"""
+import asyncio
+import subprocess
+import json
+from typing import Optional, Dict, Any
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
+from ankigen.logging import logger
+from ankigen.exceptions import (
+    ValidationError,
+)
+MAX_STRING_LENGTH = 200  # Prevent excessively long inputs
+SUBPROCESS_TIMEOUT = 60.0  # 60 second timeout for Context7 calls
+class Context7Client:
+    """Context7 MCP client for fetching library documentation"""
+    def __init__(self):
+        pass  # No state needed - each call creates fresh subprocess
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=2, max=10),
+        retry=retry_if_exception_type((TimeoutError, ConnectionError)),
+        reraise=True,
+    )
+    async def call_context7_tool(
+        self, tool_name: str, args: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """Call a Context7 tool via direct JSONRPC with retry logic"""
+        try:
+            # Build the JSONRPC request
+            request = {
+                "jsonrpc": "2.0",
+                "id": 1,
+                "method": "tools/call",
+                "params": {"name": tool_name, "arguments": args},
+            }
+            # Call the Context7 server
+            process = await asyncio.create_subprocess_exec(
+                "npx",
+                "@upstash/context7-mcp",
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+            # Send initialization first
+            init_request = {
+                "jsonrpc": "2.0",
+                "id": 0,
+                "method": "initialize",
+                "params": {
+                    "protocolVersion": "2025-06-18",
+                    "capabilities": {},
+                    "clientInfo": {"name": "ankigen", "version": "1.0.0"},
+                },
+            }
+            # Send both requests with timeout protection
+            # Optimize: Use list join for string concatenation
+            input_data = "\n".join([json.dumps(init_request), json.dumps(request), ""])
+            try:
+                stdout, stderr = await asyncio.wait_for(
+                    process.communicate(input=input_data.encode()),
+                    timeout=SUBPROCESS_TIMEOUT,
+                )
+            except asyncio.TimeoutError:
+                # Proper process cleanup on timeout
+                try:
+                    if process.returncode is None:  # Process still running
+                        process.kill()
+                        # Wait for process to actually terminate
+                        await asyncio.wait_for(process.wait(), timeout=5.0)
+                except Exception as cleanup_error:
+                    logger.error(f"Error during process cleanup: {cleanup_error}")
+                raise TimeoutError(
+                    f"Context7 subprocess timed out after {SUBPROCESS_TIMEOUT}s"
+                )
+            except Exception:
+                # Clean up process on any other error
+                try:
+                    if process.returncode is None:
+                        process.kill()
+                        await asyncio.wait_for(process.wait(), timeout=5.0)
+                except Exception:
+                    pass  # Best effort cleanup
+                raise
+            # Parse responses
+            responses = stdout.decode().strip().split("\n")
+            if len(responses) >= 2:
+                # Skip init response, get tool response
+                tool_response = json.loads(responses[1])
+                if "result" in tool_response:
+                    result = tool_response["result"]
+                    # Extract content from the result
+                    if "content" in result and result["content"]:
+                        content_item = result["content"][0]
+                        if "text" in content_item:
+                            return {"text": content_item["text"], "success": True}
+                        elif "type" in content_item and content_item["type"] == "text":
+                            return {
+                                "text": content_item.get("text", ""),
+                                "success": True,
+                            }
+                    return {"error": "No content in response", "success": False}
+                elif "error" in tool_response:
+                    return {"error": tool_response["error"], "success": False}
+            return {"error": "Invalid response format", "success": False}
+        except Exception as e:
+            logger.error(f"Error calling Context7 tool {tool_name}: {e}")
+            return {"error": str(e), "success": False}
+    def _parse_library_response(self, text: str) -> list[Dict[str, Any]]:
+        """Parse Context7 response text into list of library dicts.
+        Args:
+            text: Raw text response from Context7
+        Returns:
+            List of library dicts with keys: title, id, snippets, trust
+        """
+        libraries = []
+        lines = text.split("\n")
+        current_lib: Dict[str, Any] = {}
+        for line in lines:
+            line = line.strip()
+            if line.startswith("- Title:"):
+                if current_lib and current_lib.get("id"):
+                    libraries.append(current_lib)
+                current_lib = {"title": line.replace("- Title:", "").strip().lower()}
+            elif line.startswith("- Context7-compatible library ID:"):
+                lib_id = line.replace("- Context7-compatible library ID:", "").strip()
+                if current_lib is not None:
+                    current_lib["id"] = lib_id
+            elif line.startswith("- Code Snippets:"):
+                snippets_str = line.replace("- Code Snippets:", "").strip()
+                try:
+                    if current_lib is not None:
+                        current_lib["snippets"] = int(snippets_str)
+                except ValueError:
+                    pass
+            elif line.startswith("- Trust Score:"):
+                score_str = line.replace("- Trust Score:", "").strip()
+                try:
+                    if current_lib is not None:
+                        current_lib["trust"] = float(score_str)
+                except ValueError:
+                    pass
+        if current_lib and current_lib.get("id"):
+            libraries.append(current_lib)
+        return libraries
+    def _score_library(self, lib: Dict[str, Any], search_term: str) -> float:
+        """Score a library based on how well it matches the search term.
+        Args:
+            lib: Library dict with title, id, snippets, trust
+            search_term: Lowercase search term
+        Returns:
+            Score (higher is better match)
+        """
+        score = 0.0
+        lib_title = lib.get("title", "")
+        lib_id = lib["id"].lower()
+        # Exact title match gets highest priority
+        if lib_title == search_term:
+            score += 10000
+        elif lib_id == f"/{search_term}-dev/{search_term}":
+            score += 5000
+        elif f"/{search_term}/" in lib_id or lib_id.endswith(f"/{search_term}"):
+            score += 2000
+        elif search_term in lib_title:
+            if lib_title == search_term:
+                score += 1000
+            elif lib_title.startswith(search_term):
+                score += 200
+            else:
+                score += 50
+        # Bonus for code snippets (indicates main library)
+        snippets = lib.get("snippets", 0)
+        score += snippets / 10
+        # Bonus for trust score (high trust = official/authoritative)
+        trust = lib.get("trust", 0)
+        score += trust * 100
+        return score
+    def _select_best_library(
+        self, libraries: list[Dict[str, Any]], search_term: str
+    ) -> Optional[Dict[str, Any]]:
+        """Select the best matching library from a list.
+        Args:
+            libraries: List of library dicts
+            search_term: Lowercase search term
+        Returns:
+            Best matching library dict, or None if no match
+        """
+        best_lib = None
+        best_score = -1.0
+        for lib in libraries:
+            score = self._score_library(lib, search_term)
+            if search_term in lib.get("title", "") or search_term in lib["id"].lower():
+                logger.debug(
+                    f"Scoring {lib['id']}: title='{lib.get('title', '')}', "
+                    f"snippets={lib.get('snippets', 0)}, trust={lib.get('trust', 0)}, score={score:.2f}"
+                )
+            if score > best_score:
+                best_score = score
+                best_lib = lib
+        if best_lib:
+            logger.info(
+                f"Selected library: {best_lib['id']} (title: {best_lib.get('title', 'unknown')}, "
+                f"snippets: {best_lib.get('snippets', 0)}, trust: {best_lib.get('trust', 0)}, "
+                f"score: {best_score:.2f})"
+            )
+        return best_lib
+    async def resolve_library_id(self, library_name: str) -> Optional[str]:
+        """Resolve a library name to a Context7-compatible ID"""
+        logger.info(f"Resolving library ID for: {library_name}")
+        result = await self.call_context7_tool(
+            "resolve-library-id", {"libraryName": library_name}
+        )
+        if not (result and result.get("success") and result.get("text")):
+            logger.warning(f"Could not resolve library ID for '{library_name}'")
+            return None
+        libraries = self._parse_library_response(result["text"])
+        if not libraries:
+            logger.warning(f"Could not resolve library ID for '{library_name}'")
+            return None
+        best_lib = self._select_best_library(libraries, library_name.lower())
+        if best_lib:
+            logger.info(f"Resolved '{library_name}' to ID: {best_lib['id']}")
+            return best_lib["id"]
+        logger.warning(f"Could not resolve library ID for '{library_name}'")
+        return None
+    async def get_library_docs(
+        self, library_id: str, topic: Optional[str] = None, tokens: int = 5000
+    ) -> Optional[str]:
+        """Get documentation for a library"""
+        # Security: Validate library_id (should start with /)
+        if (
+            not library_id
+            or not library_id.startswith("/")
+            or len(library_id) > MAX_STRING_LENGTH
+        ):
+            logger.error(f"Invalid library ID format (security): '{library_id}'")
+            raise ValidationError("Invalid library ID format")
+        logger.info(
+            f"Fetching docs for: {library_id}" + (f" (topic: {topic})" if topic else "")
+        )
+        args = {"context7CompatibleLibraryID": library_id, "tokens": tokens}
+        if topic:
+            args["topic"] = topic
+        result = await self.call_context7_tool("get-library-docs", args)
+        if result and result.get("success") and result.get("text"):
+            docs = result["text"]
+            logger.info(f"Retrieved {len(docs)} characters of documentation")
+            return docs
+        logger.warning(f"Could not fetch docs for '{library_id}'")
+        return None
+    async def fetch_library_documentation(
+        self, library_name: str, topic: Optional[str] = None, tokens: int = 5000
+    ) -> Optional[str]:
+        """Convenience method to resolve and fetch docs in one call"""
+        library_id = await self.resolve_library_id(library_name)
+        if not library_id:
+            return None
+        return await self.get_library_docs(library_id, topic, tokens)
+async def test_context7() -> None:
+    """Test the Context7 integration"""
+    client = Context7Client()
+    print("Testing Context7 integration...")
+    # Test resolving a library
+    library_id = await client.resolve_library_id("react")
+    if library_id:
+        print(f"✓ Resolved 'react' to ID: {library_id}")
+        # Test fetching docs
+        docs = await client.get_library_docs(library_id, topic="hooks", tokens=2000)
+        if docs:
+            print(f"✓ Fetched {len(docs)} characters of documentation")
+            print(f"Preview: {docs[:300]}...")
+        else:
+            print("✗ Failed to fetch documentation")
+    else:
+        print("✗ Failed to resolve library ID")
+if __name__ == "__main__":
+    asyncio.run(test_context7())

ankigen/exceptions.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""Custom exceptions for AnkiGen application.
+This module provides a hierarchy of custom exceptions to standardize
+error handling across the codebase.
+"""
+class AnkigenError(Exception):
+    """Base exception for all AnkiGen errors."""
+    pass
+class ValidationError(AnkigenError):
+    """Raised when input validation fails."""
+    pass
+class SecurityError(AnkigenError):
+    """Raised when a security check fails (SSRF, command injection, etc.)."""
+    pass
+class APIError(AnkigenError):
+    """Base exception for API-related errors."""
+    pass
+class OpenAIAPIError(APIError):
+    """Raised when OpenAI API calls fail."""
+    pass
+class Context7APIError(APIError):
+    """Raised when Context7 API calls fail."""
+    pass
+class ExportError(AnkigenError):
+    """Base exception for export-related errors."""
+    pass
+class CardGenerationError(AnkigenError):
+    """Raised when card generation fails."""
+    pass
+class ConfigurationError(AnkigenError):
+    """Raised when configuration is invalid or missing."""
+    pass
+def handle_exception(
+    exc: Exception,
+    logger,
+    message: str,
+    reraise: bool = True,
+    reraise_as: type[Exception] | None = None,
+) -> None:
+    """Standardized exception handler.
+    Args:
+        exc: The exception to handle
+        logger: Logger instance to use
+        message: Error message to log
+        reraise: Whether to re-raise the exception
+        reraise_as: Optional exception type to wrap and re-raise as
+    Raises:
+        The original exception or wrapped exception if reraise is True
+    """
+    logger.error(f"{message}: {exc}", exc_info=True)
+    if reraise:
+        if reraise_as:
+            raise reraise_as(f"{message}: {exc}") from exc
+        raise

ankigen/exporters.py ADDED Viewed

	@@ -0,0 +1,943 @@

+# Module for CSV and APKG export functions
+import gradio as gr
+import pandas as pd
+import genanki
+import random
+import html
+from typing import List, Dict, Any, Optional
+import csv
+from datetime import datetime
+import os
+from ankigen.utils import get_logger, strip_html_tags
+logger = get_logger()
+# --- Helper function for formatting fields ---
+def _format_field_as_string(value: Any) -> str:
+    if isinstance(value, list) or isinstance(value, tuple):
+        return ", ".join(str(item).strip() for item in value if str(item).strip())
+    if pd.isna(value) or value is None:
+        return ""
+    return str(value).strip()
+def _generate_timestamped_filename(
+    base_name: str, extension: str, include_timestamp: bool = True
+) -> str:
+    """Generate a filename with optional timestamp.
+    Args:
+        base_name: The base name for the file (without extension)
+        extension: File extension (e.g., 'csv', 'apkg')
+        include_timestamp: Whether to include timestamp in filename
+    Returns:
+        Generated filename with extension
+    """
+    if include_timestamp:
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        return f"{base_name}_{timestamp}.{extension}"
+    return f"{base_name}.{extension}"
+def _ensure_output_directory(filepath: str) -> None:
+    """Ensure the output directory exists for the given filepath.
+    Args:
+        filepath: Full path to the file
+    Creates the directory if it doesn't exist.
+    """
+    output_dir = os.path.dirname(filepath)
+    if output_dir and not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+        logger.info(f"Created output directory: {output_dir}")
+def _validate_non_empty_data(data: Any, data_type: str) -> None:
+    """Validate that data is not empty.
+    Args:
+        data: The data to validate (list, DataFrame, etc.)
+        data_type: Description of data type for error messages
+    Raises:
+        ValueError: If data is empty or None
+    """
+    if data is None:
+        raise ValueError(f"No {data_type} provided to export.")
+    if isinstance(data, list) and not data:
+        raise ValueError(f"No {data_type} provided to export.")
+    if isinstance(data, pd.DataFrame) and data.empty:
+        raise ValueError(f"No {data_type} available to export.")
+# --- Constants for APKG Generation (Subtask 10) ---
+ANKI_BASIC_MODEL_NAME = "AnkiGen Basic"
+ANKI_CLOZE_MODEL_NAME = "AnkiGen Cloze"
+# It's good practice to generate unique IDs. These are examples.
+# Real applications might use a persistent way to store/retrieve these if models are updated.
+DEFAULT_BASIC_MODEL_ID = random.randrange(1 << 30, 1 << 31)
+DEFAULT_CLOZE_MODEL_ID = random.randrange(1 << 30, 1 << 31)
+# --- Shared CSS with dark mode support ---
+CARD_CSS = """
+    /* CSS Variables - Light Mode (default) */
+    .card {
+        --bg-card: #ffffff;
+        --bg-answer: #f0f9ff;
+        --bg-explanation: #f0fdf4;
+        --bg-example: #fefce8;
+        --bg-back-extra: #eef2ff;
+        --bg-prereq: #f8fafc;
+        --bg-code: #2d2d2d;
+        --text-primary: #1a1a1a;
+        --text-secondary: #4b5563;
+        --text-muted: #666666;
+        --text-heading: #1f2937;
+        --text-code: #f8f8f2;
+        --accent-blue: #2563eb;
+        --accent-blue-light: #60a5fa;
+        --accent-green: #4ade80;
+        --accent-yellow: #facc15;
+        --accent-indigo: #818cf8;
+        --accent-red: #ef4444;
+        --border-light: #e5e7eb;
+        --border-dashed: #cbd5e1;
+        --shadow: rgba(0, 0, 0, 0.05);
+    }
+    /* Dark Mode Overrides */
+    .nightMode .card,
+    .night_mode .card {
+        --bg-card: #1e1e1e;
+        --bg-answer: #1e293b;
+        --bg-explanation: #14291a;
+        --bg-example: #292518;
+        --bg-back-extra: #1e1b2e;
+        --bg-prereq: #262626;
+        --bg-code: #0d0d0d;
+        --text-primary: #e4e4e7;
+        --text-secondary: #a1a1aa;
+        --text-muted: #9ca3af;
+        --text-heading: #f4f4f5;
+        --text-code: #f8f8f2;
+        --accent-blue: #60a5fa;
+        --accent-blue-light: #93c5fd;
+        --accent-green: #4ade80;
+        --accent-yellow: #fde047;
+        --accent-indigo: #a5b4fc;
+        --accent-red: #f87171;
+        --border-light: #3f3f46;
+        --border-dashed: #52525b;
+        --shadow: rgba(0, 0, 0, 0.3);
+    }
+    /* Base styles */
+    .card {
+        font-family: 'Inter', system-ui, -apple-system, sans-serif;
+        font-size: 16px;
+        line-height: 1.6;
+        color: var(--text-primary);
+        max-width: 800px;
+        margin: 0 auto;
+        padding: 20px;
+        background: var(--bg-card);
+    }
+    @media (max-width: 768px) {
+        .card {
+            font-size: 14px;
+            padding: 15px;
+        }
+    }
+    /* Question side */
+    .question-side {
+        position: relative;
+        min-height: 200px;
+    }
+    .difficulty-indicator {
+        position: absolute;
+        top: 10px;
+        right: 10px;
+        width: 10px;
+        height: 10px;
+        border-radius: 50%;
+    }
+    .difficulty-indicator.beginner { background: var(--accent-green); }
+    .difficulty-indicator.intermediate { background: var(--accent-yellow); }
+    .difficulty-indicator.advanced { background: var(--accent-red); }
+    .question {
+        font-size: 1.3em;
+        font-weight: 600;
+        color: var(--accent-blue);
+        margin-bottom: 1.5em;
+    }
+    .prerequisites {
+        margin-top: 1em;
+        font-size: 0.9em;
+        color: var(--text-muted);
+    }
+    .prerequisites-toggle {
+        color: var(--accent-blue);
+        cursor: pointer;
+        text-decoration: underline;
+    }
+    .prerequisites-content {
+        display: none;
+        margin-top: 0.5em;
+        padding: 0.5em;
+        background: var(--bg-prereq);
+        border-radius: 4px;
+    }
+    .prerequisites.show .prerequisites-content {
+        display: block;
+    }
+    /* Answer side sections */
+    .answer-section,
+    .explanation-section,
+    .example-section,
+    .back-extra-section {
+        margin: 1.5em 0;
+        padding: 1.2em;
+        border-radius: 8px;
+        box-shadow: 0 2px 4px var(--shadow);
+    }
+    .answer-section {
+        background: var(--bg-answer);
+        border-left: 4px solid var(--accent-blue);
+    }
+    .back-extra-section {
+        background: var(--bg-back-extra);
+        border-left: 4px solid var(--accent-indigo);
+    }
+    .explanation-section {
+        background: var(--bg-explanation);
+        border-left: 4px solid var(--accent-green);
+    }
+    .example-section {
+        background: var(--bg-example);
+        border-left: 4px solid var(--accent-yellow);
+    }
+    .example-section pre {
+        background-color: var(--bg-code);
+        color: var(--text-code);
+        padding: 1em;
+        border-radius: 0.3em;
+        overflow-x: auto;
+        font-family: 'Consolas', 'Monaco', 'Menlo', monospace;
+        font-size: 0.9em;
+        line-height: 1.4;
+    }
+    .example-section code {
+        font-family: 'Consolas', 'Monaco', 'Menlo', monospace;
+    }
+    .metadata-section {
+        margin-top: 2em;
+        padding-top: 1em;
+        border-top: 1px solid var(--border-light);
+        font-size: 0.9em;
+        color: var(--text-secondary);
+    }
+    .metadata-section h3 {
+        font-size: 1em;
+        color: var(--text-heading);
+        margin-bottom: 0.5em;
+    }
+    .metadata-section > div {
+        margin-bottom: 0.8em;
+    }
+    .source-url a {
+        color: var(--accent-blue);
+        text-decoration: none;
+    }
+    .source-url a:hover {
+        text-decoration: underline;
+    }
+    /* Cloze deletion styles */
+    .cloze {
+        font-weight: bold;
+        color: var(--accent-blue);
+    }
+    /* General utility */
+    hr {
+        border: none;
+        border-top: 1px dashed var(--border-dashed);
+        margin: 1.5em 0;
+    }
+    /* Rich text field styling */
+    .field ul, .field ol {
+        margin-left: 1.5em;
+        padding-left: 0.5em;
+    }
+    .field li {
+        margin-bottom: 0.3em;
+    }
+    /* Responsive design */
+    @media (max-width: 640px) {
+        .answer-section,
+        .explanation-section,
+        .example-section,
+        .back-extra-section {
+            padding: 1em;
+            margin: 1em 0;
+        }
+    }
+    /* Animations */
+    @keyframes fadeIn {
+        from { opacity: 0; }
+        to { opacity: 1; }
+    }
+    .card {
+        animation: fadeIn 0.3s ease-in-out;
+    }
+"""
+# --- Full Model Definitions ---
+BASIC_MODEL = genanki.Model(
+    DEFAULT_BASIC_MODEL_ID,  # Use the generated ID
+    ANKI_BASIC_MODEL_NAME,  # Use the constant name
+    fields=[
+        {"name": "Question"},
+        {"name": "Answer"},
+        {"name": "Explanation"},
+        {"name": "Example"},
+        {"name": "Prerequisites"},
+        {"name": "Learning_Outcomes"},
+        {"name": "Difficulty"},
+        {"name": "SourceURL"},  # Added for consistency if used by template
+        {"name": "TagsStr"},  # Added for consistency if used by template
+    ],
+    templates=[
+        {
+            "name": "Card 1",
+            "qfmt": """
+            <div class=\"card question-side\">
+                <div class=\"difficulty-indicator {{Difficulty}}\"></div>
+                <div class=\"content\">
+                    <div class=\"question\">{{Question}}</div>
+                    <div class=\"prerequisites\" onclick=\"event.stopPropagation();\">
+                        <div class=\"prerequisites-toggle\">Show Prerequisites</div>
+                        <div class=\"prerequisites-content\">{{Prerequisites}}</div>
+                    </div>
+                </div>
+            </div>
+            <script>
+                document.querySelector('.prerequisites-toggle').addEventListener('click', function(e) {
+                    e.stopPropagation();
+                    this.parentElement.classList.toggle('show');
+                });
+            </script>
+            """,
+            "afmt": """
+            <div class=\"card answer-side\">
+                <div class=\"content\">
+                    <div class=\"question-section\">
+                        <div class=\"question\">{{Question}}</div>
+                        <div class=\"prerequisites\">
+                            <strong>Prerequisites:</strong> {{Prerequisites}}
+                        </div>
+                    </div>
+                    <hr>
+                    <div class=\"answer-section\">
+                        <h3>Answer</h3>
+                        <div class=\"answer\">{{Answer}}</div>
+                    </div>
+                    <div class=\"explanation-section\">
+                        <h3>Explanation</h3>
+                        <div class=\"explanation-text\">{{Explanation}}</div>
+                    </div>
+                    <div class=\"example-section\">
+                        <h3>Example</h3>
+                        <div class=\"example-text\">{{Example}}</div>
+                        <!-- Example field might contain pre/code or plain text -->
+                        <!-- Handled by how HTML is put into the Example field -->
+                    </div>
+                    <div class=\"metadata-section\">
+                        <div class=\"learning-outcomes\">
+                            <h3>Learning Outcomes</h3>
+                            <div>{{Learning_Outcomes}}</div>
+                        </div>
+                        <div class=\"difficulty\">
+                            <h3>Difficulty Level</h3>
+                            <div>{{Difficulty}}</div>
+                        </div>
+                        {{#SourceURL}}<div class=\"source-url\"><small>Source: <a href=\"{{SourceURL}}\">{{SourceURL}}</a></small></div>{{/SourceURL}}
+                    </div>
+                </div>
+            </div>
+            """,
+        }
+    ],
+    css=CARD_CSS,
+)
+CLOZE_MODEL = genanki.Model(
+    DEFAULT_CLOZE_MODEL_ID,  # Use the generated ID
+    ANKI_CLOZE_MODEL_NAME,  # Use the constant name
+    fields=[
+        {"name": "Text"},
+        {"name": "Back Extra"},
+        {"name": "Explanation"},
+        {"name": "Example"},
+        {"name": "Prerequisites"},
+        {"name": "Learning_Outcomes"},
+        {"name": "Difficulty"},
+        {"name": "SourceURL"},
+        {"name": "TagsStr"},
+    ],
+    templates=[
+        {
+            "name": "Cloze Card",
+            "qfmt": """
+            <div class=\"card question-side\">
+                <div class=\"difficulty-indicator {{Difficulty}}\"></div>
+                <div class=\"content\">
+                    <div class=\"question\">{{cloze:Text}}</div>
+                    <div class=\"prerequisites\" onclick=\"event.stopPropagation();\">
+                        <div class=\"prerequisites-toggle\">Show Prerequisites</div>
+                        <div class=\"prerequisites-content\">{{Prerequisites}}</div>
+                    </div>
+                </div>
+            </div>
+            <script>
+                document.querySelector('.prerequisites-toggle').addEventListener('click', function(e) {
+                    e.stopPropagation();
+                    this.parentElement.classList.toggle('show');
+                });
+            </script>
+            """,
+            "afmt": """
+            <div class=\"card answer-side\">
+                <div class=\"content\">
+                    <div class=\"question-section\">
+                        <div class=\"question\">{{cloze:Text}}</div>
+                        <div class=\"prerequisites\">
+                            <strong>Prerequisites:</strong> {{Prerequisites}}
+                        </div>
+                    </div>
+                    <hr>
+                    {{#Back Extra}}
+                    <div class=\"back-extra-section\">
+                        <h3>Additional Information</h3>
+                        <div class=\"back-extra-text\">{{Back Extra}}</div>
+                    </div>
+                    {{/Back Extra}}
+                    <div class=\"explanation-section\">
+                        <h3>Explanation</h3>
+                        <div class=\"explanation-text\">{{Explanation}}</div>
+                    </div>
+                    <div class=\"example-section\">
+                        <h3>Example</h3>
+                        <div class=\"example-text\">{{Example}}</div>
+                    </div>
+                    <div class=\"metadata-section\">
+                        <div class=\"learning-outcomes\">
+                            <h3>Learning Outcomes</h3>
+                            <div>{{Learning_Outcomes}}</div>
+                        </div>
+                        <div class=\"difficulty\">
+                            <h3>Difficulty Level</h3>
+                            <div>{{Difficulty}}</div>
+                        </div>
+                        {{#SourceURL}}<div class=\"source-url\"><small>Source: <a href=\"{{SourceURL}}\">{{SourceURL}}</a></small></div>{{/SourceURL}}
+                    </div>
+                </div>
+            </div>
+            """,
+        }
+    ],
+    css=CARD_CSS,
+    model_type=1,  # Cloze model type
+)
+# --- Helper functions for APKG (Subtask 10) ---
+def _get_or_create_model(
+    model_id: int,
+    name: str,
+    fields: List[Dict[str, str]],
+    templates: List[Dict[str, str]],
+) -> genanki.Model:
+    return genanki.Model(model_id, name, fields=fields, templates=templates)
+# --- New CSV Exporter for List of Dictionaries ---
+def export_cards_to_csv(
+    cards: List[Dict[str, Any]], filename: Optional[str] = None
+) -> str:
+    """Export a list of card dictionaries to a CSV file.
+    Args:
+        cards: A list of dictionaries, where each dictionary represents a card
+               and should contain 'front' and 'back' keys. Other keys like
+               'tags' and 'note_type' are optional.
+        filename: Optional. The desired filename/path for the CSV.
+                  If None, a timestamped filename will be generated.
+    Returns:
+        The path to the generated CSV file.
+    Raises:
+        IOError: If there is an issue writing to the file.
+        KeyError: If a card dictionary is missing essential keys like 'front' or 'back'.
+        ValueError: If the cards list is empty or not provided.
+    """
+    # Validation using helper
+    _validate_non_empty_data(cards, "cards")
+    # Filename generation using helper
+    if not filename:
+        filename = _generate_timestamped_filename("ankigen_cards", "csv")
+        logger.info(f"No filename provided, generated: {filename}")
+    # Ensure output directory exists using helper
+    _ensure_output_directory(filename)
+    # Define the fieldnames expected in the CSV.
+    fieldnames = ["front", "back", "tags", "note_type"]
+    try:
+        logger.info(f"Attempting to export {len(cards)} cards to {filename}")
+        with open(filename, "w", newline="", encoding="utf-8") as csvfile:
+            writer = csv.DictWriter(
+                csvfile, fieldnames=fieldnames, extrasaction="ignore"
+            )
+            writer.writeheader()
+            for i, card in enumerate(cards):
+                try:
+                    # Ensure mandatory fields exist
+                    if "front" not in card or "back" not in card:
+                        raise KeyError(
+                            f"Card at index {i} is missing 'front' or 'back' key."
+                        )
+                    row_to_write = {
+                        "front": card["front"],
+                        "back": card["back"],
+                        "tags": card.get("tags", ""),
+                        "note_type": card.get("note_type", "Basic"),
+                    }
+                    writer.writerow(row_to_write)
+                except KeyError as e_inner:
+                    logger.error(
+                        f"Skipping card due to KeyError: {e_inner}. Card data: {card}"
+                    )
+                    continue
+        logger.info(f"Successfully exported cards to {filename}")
+        return filename
+    except IOError as e_io:
+        logger.error(f"IOError during CSV export to {filename}: {e_io}", exc_info=True)
+        raise
+    except Exception as e_general:
+        logger.error(
+            f"Unexpected error during CSV export to {filename}: {e_general}",
+            exc_info=True,
+        )
+        raise
+def export_cards_to_apkg(
+    cards: List[Dict[str, Any]],
+    filename: Optional[str] = None,
+    deck_name: str = "Ankigen Generated Cards",
+) -> str:
+    """Exports a list of card dictionaries to an Anki .apkg file.
+    Args:
+        cards: List of dictionaries, where each dictionary represents a card.
+               It's expected that these dicts are prepared by export_dataframe_to_apkg
+               and contain keys like 'Question', 'Answer', 'Explanation', etc.
+        filename: The full path (including filename) for the exported file.
+                  If None, a default filename will be generated in the current directory.
+        deck_name: The name of the deck if exporting to .apkg format.
+    Returns:
+        The path to the exported file.
+    """
+    logger.info(f"Starting APKG export for {len(cards)} cards to deck '{deck_name}'.")
+    # Validation using helper - note this now raises ValueError instead of gr.Error
+    _validate_non_empty_data(cards, "cards")
+    # Filename generation using helper
+    if not filename:
+        filename = _generate_timestamped_filename("ankigen_deck", "apkg")
+    elif not filename.lower().endswith(".apkg"):
+        filename += ".apkg"
+    # Ensure output directory exists using helper
+    _ensure_output_directory(filename)
+    anki_basic_model = BASIC_MODEL
+    anki_cloze_model = CLOZE_MODEL
+    deck_id = random.randrange(1 << 30, 1 << 31)
+    anki_deck = genanki.Deck(deck_id, deck_name)
+    notes_added_count = 0
+    for card_dict in cards:
+        note_type = card_dict.get("note_type", "Basic")
+        tags_for_note_object = card_dict.get("tags_for_note_object", [])
+        # Extract all potential fields, defaulting to empty strings
+        # Security: Sanitize HTML to prevent XSS when viewing cards in Anki
+        question = html.escape(card_dict.get("Question", ""))
+        answer = html.escape(card_dict.get("Answer", ""))
+        explanation = html.escape(card_dict.get("Explanation", ""))
+        example = html.escape(card_dict.get("Example", ""))
+        prerequisites = html.escape(card_dict.get("Prerequisites", ""))
+        learning_outcomes = html.escape(card_dict.get("Learning_Outcomes", ""))
+        difficulty = html.escape(card_dict.get("Difficulty", ""))
+        source_url = html.escape(card_dict.get("SourceURL", ""))
+        tags_str_field = html.escape(card_dict.get("TagsStr", ""))
+        if not question:
+            logger.error(
+                f"SKIPPING CARD DUE TO EMPTY 'Question' (front/text) field. Card data: {card_dict}"
+            )
+            continue
+        try:
+            if note_type.lower() == "cloze":
+                # CLOZE_MODEL fields
+                note_fields = [
+                    question,  # Text
+                    answer,  # Back Extra
+                    explanation,
+                    example,
+                    prerequisites,
+                    learning_outcomes,
+                    difficulty,
+                    source_url,
+                    tags_str_field,
+                ]
+                note = genanki.Note(
+                    model=anki_cloze_model,
+                    fields=note_fields,
+                    tags=tags_for_note_object,
+                )
+            else:  # Basic
+                # BASIC_MODEL fields
+                note_fields = [
+                    question,
+                    answer,
+                    explanation,
+                    example,
+                    prerequisites,
+                    learning_outcomes,
+                    difficulty,
+                    source_url,
+                    tags_str_field,
+                ]
+                note = genanki.Note(
+                    model=anki_basic_model,
+                    fields=note_fields,
+                    tags=tags_for_note_object,
+                )
+            anki_deck.add_note(note)
+            notes_added_count += 1
+        except Exception as e:
+            logger.error(
+                f"Failed to create genanki.Note for card: {card_dict}. Error: {e}",
+                exc_info=True,
+            )
+            logger.warning(f"Skipping card due to error: Question='{question[:50]}...'")
+    if notes_added_count == 0:
+        logger.error(
+            "No valid notes could be created from the provided cards. APKG generation aborted."
+        )
+        raise gr.Error("Failed to create any valid Anki notes from the input.")
+    logger.info(
+        f"Added {notes_added_count} notes to deck '{deck_name}'. Proceeding to package."
+    )
+    # Package and write
+    package = genanki.Package(anki_deck)
+    try:
+        package.write_to_file(filename)
+        logger.info(f"Successfully exported Anki deck to {filename}")
+    except Exception as e:
+        logger.error(f"Failed to write .apkg file to {filename}: {e}", exc_info=True)
+        raise IOError(f"Could not write .apkg file: {e}")
+    return filename
+def export_cards_from_crawled_content(
+    cards: List[Dict[str, Any]],
+    output_path: Optional[
+        str
+    ] = None,  # Changed from filename to output_path for clarity
+    export_format: str = "csv",  # Added export_format parameter
+    deck_name: str = "Ankigen Generated Cards",
+) -> str:
+    """Exports cards (list of dicts) to the specified format (CSV or APKG).
+    Args:
+        cards: List of dictionaries, where each dictionary represents a card.
+               Expected keys: 'front', 'back'. Optional: 'tags' (space-separated string), 'source_url', 'note_type' ('Basic' or 'Cloze').
+        output_path: The full path (including filename) for the exported file.
+                     If None, a default filename will be generated in the current directory.
+        export_format: The desired format, either 'csv' or 'apkg'.
+        deck_name: The name of the deck if exporting to .apkg format.
+    Returns:
+        The path to the exported file.
+    """
+    if not cards:
+        logger.warning("No cards provided to export_cards_from_crawled_content.")
+        # MODIFIED: Raise error immediately if no cards, as per test expectation
+        raise ValueError("No cards provided to export.")
+    logger.info(
+        f"Exporting {len(cards)} cards to format '{export_format}' with deck name '{deck_name}'."
+    )
+    if export_format.lower() == "csv":
+        return export_cards_to_csv(cards, filename=output_path)
+    elif export_format.lower() == "apkg":
+        return export_cards_to_apkg(cards, filename=output_path, deck_name=deck_name)
+    else:
+        supported_formats = ["csv", "apkg"]
+        logger.error(
+            f"Unsupported export format: {export_format}. Supported formats: {supported_formats}"
+        )
+        # MODIFIED: Updated error message to include supported formats
+        raise ValueError(
+            f"Unsupported export format: {export_format}. Supported formats: {supported_formats}"
+        )
+# --- New DataFrame CSV Exporter (Subtask 11) ---
+def export_dataframe_to_csv(
+    data: Optional[pd.DataFrame],
+    filename_suggestion: Optional[str] = "ankigen_cards.csv",
+) -> Optional[str]:
+    """Exports a Pandas DataFrame to a CSV file, designed for Gradio download.
+    Args:
+        data: The Pandas DataFrame to export.
+        filename_suggestion: A suggestion for the base filename (e.g., from subject).
+    Returns:
+        The path to the temporary CSV file, or None if an error occurs or data is empty.
+    """
+    logger.info(
+        f"Attempting to export DataFrame to CSV. Suggested filename: {filename_suggestion}"
+    )
+    # Validation using helper
+    try:
+        _validate_non_empty_data(data, "card data")
+    except ValueError:
+        logger.warning(
+            "No data provided to export_dataframe_to_csv. Skipping CSV export."
+        )
+        raise gr.Error("No card data available")
+    try:
+        # Generate filename from suggestion
+        base_name_from_suggestion = "ankigen_cards"  # Default base part
+        # Sanitize and use the suggestion (e.g., subject name) if provided
+        if filename_suggestion and isinstance(filename_suggestion, str):
+            # Remove .csv if present, then sanitize
+            processed_suggestion = filename_suggestion.removesuffix(".csv")
+            safe_suggestion = (
+                processed_suggestion.replace(" ", "_")
+                .replace("/", "-")
+                .replace("\\", "-")
+            )
+            if safe_suggestion:
+                base_name_from_suggestion = f"ankigen_{safe_suggestion[:50]}"
+        # Generate timestamped filename using helper
+        final_filename = _generate_timestamped_filename(
+            base_name_from_suggestion, "csv"
+        )
+        # Ensure output directory exists using helper
+        _ensure_output_directory(final_filename)
+        data.to_csv(final_filename, index=False)
+        logger.info(f"Successfully exported DataFrame to CSV: {final_filename}")
+        gr.Info(f"CSV ready for download: {os.path.basename(final_filename)}")
+        return final_filename
+    except Exception as e:
+        logger.error(f"Error exporting DataFrame to CSV: {e}", exc_info=True)
+        gr.Error(f"Error exporting DataFrame to CSV: {e}")
+        return None
+# --- New DataFrame to APKG Exporter (for Main Generator Tab) ---
+def export_dataframe_to_apkg(
+    df: pd.DataFrame,
+    output_path: Optional[str],
+    deck_name: str,
+) -> str:
+    """Exports a DataFrame of cards to an Anki .apkg file."""
+    # Validation using helper
+    _validate_non_empty_data(df, "cards in DataFrame")
+    logger.info(
+        f"Starting APKG export for DataFrame with {len(df)} rows to deck '{deck_name}'. Output: {output_path}"
+    )
+    cards_for_apkg: List[Dict[str, Any]] = []
+    for _, row in df.iterrows():
+        try:
+            note_type_val = (
+                _format_field_as_string(row.get("Card_Type", "Basic")) or "Basic"
+            )
+            topic = _format_field_as_string(row.get("Topic", ""))
+            difficulty_raw = _format_field_as_string(row.get("Difficulty", ""))
+            difficulty_plain_for_tag = strip_html_tags(difficulty_raw)
+            tags_list_for_note_obj = []
+            if topic:
+                tags_list_for_note_obj.append(topic.replace(" ", "_").replace(",", "_"))
+            if difficulty_plain_for_tag:
+                safe_difficulty_tag = difficulty_plain_for_tag.replace(" ", "_")
+                tags_list_for_note_obj.append(safe_difficulty_tag)
+            tags_str_for_field = " ".join(tags_list_for_note_obj)
+            card_data_for_note = {
+                "note_type": note_type_val,
+                "tags_for_note_object": tags_list_for_note_obj,
+                "TagsStr": tags_str_for_field,
+                "Question": _format_field_as_string(row.get("Question", "")),
+                "Answer": _format_field_as_string(row.get("Answer", "")),
+                "Explanation": _format_field_as_string(row.get("Explanation", "")),
+                "Example": _format_field_as_string(row.get("Example", "")),
+                "Prerequisites": _format_field_as_string(row.get("Prerequisites", "")),
+                "Learning_Outcomes": _format_field_as_string(
+                    row.get("Learning_Outcomes", "")
+                ),
+                "Difficulty": difficulty_raw,
+                "SourceURL": _format_field_as_string(row.get("Source_URL", "")),
+            }
+            cards_for_apkg.append(card_data_for_note)
+        except Exception as e:
+            logger.error(
+                f"Error processing DataFrame row for APKG: {row}. Error: {e}",
+                exc_info=True,
+            )
+            continue
+    if not cards_for_apkg:
+        logger.error("No cards could be processed from DataFrame for APKG export.")
+        raise ValueError("No processable cards found in DataFrame for APKG export.")
+    return export_cards_to_apkg(
+        cards_for_apkg, filename=output_path, deck_name=deck_name
+    )
+# --- Compatibility Exports for Tests and Legacy Code ---
+# These aliases ensure that tests expecting these names will find them.
+# Export functions under expected names
+export_csv = (
+    export_dataframe_to_csv  # Update this to export_dataframe_to_csv for compatibility
+)
+# MODIFIED: export_deck is now a wrapper to provide a default deck_name
+def export_deck(
+    df: pd.DataFrame,
+    output_path: Optional[str] = None,
+    deck_name: str = "Ankigen Generated Cards",
+) -> str:
+    """Alias for exporting a DataFrame to APKG, providing a default deck name."""
+    if df is None or df.empty:
+        logger.warning("export_deck called with None or empty DataFrame.")
+        # Match the error type and message expected by tests
+        raise gr.Error("No card data available")
+    # Original logic to call export_dataframe_to_apkg
+    # Ensure all necessary parameters for export_dataframe_to_apkg are correctly passed.
+    # The export_dataframe_to_apkg function itself will handle its specific error conditions.
+    # The 'output_path' for export_dataframe_to_apkg needs to be handled.
+    # If 'output_path' is None here, export_cards_to_apkg (called by export_dataframe_to_apkg)
+    # will generate a default filename.
+    # If output_path is not provided to export_deck, it's None.
+    # export_dataframe_to_apkg expects output_path: Optional[str].
+    # And export_cards_to_apkg (which it calls) also handles Optional[str] filename.
+    # So, passing output_path directly should be fine.
+    return export_dataframe_to_apkg(df, output_path=output_path, deck_name=deck_name)
+export_dataframe_csv = export_dataframe_to_csv
+export_dataframe_apkg = export_dataframe_to_apkg
+__all__ = [
+    "BASIC_MODEL",
+    "CLOZE_MODEL",
+    "export_csv",
+    "export_deck",
+    "export_dataframe_csv",
+    "export_dataframe_apkg",
+    "export_cards_to_csv",
+    "export_cards_to_apkg",
+    "export_cards_from_crawled_content",
+    "export_dataframe_to_csv",
+    "export_dataframe_to_apkg",
+]

ankigen/llm_interface.py ADDED Viewed

	@@ -0,0 +1,337 @@

+# Module for OpenAI client management and API call logic
+import asyncio
+import time
+from typing import Optional, TypeVar
+from agents import Agent, ModelSettings, Runner, set_default_openai_client
+from openai import (
+    APIConnectionError,
+    APIStatusError,
+    AsyncOpenAI,
+    OpenAIError,
+    RateLimitError,
+)
+from pydantic import BaseModel
+from ankigen.logging import logger
+from ankigen.utils import ResponseCache
+T = TypeVar("T", bound=BaseModel)
+class OpenAIClientManager:
+    """Manages the AsyncOpenAI client instance."""
+    def __init__(self):
+        self._client: Optional[AsyncOpenAI] = None
+        self._api_key: Optional[str] = None
+    async def initialize_client(self, api_key: str):
+        """Initializes the AsyncOpenAI client with the given API key."""
+        if not api_key or not api_key.startswith("sk-"):
+            logger.error("Invalid OpenAI API key provided for client initialization.")
+            raise ValueError("Invalid OpenAI API key format.")
+        self._api_key = api_key
+        try:
+            self._client = AsyncOpenAI(api_key=self._api_key)
+            logger.info("AsyncOpenAI client initialized successfully.")
+        except OpenAIError as e:  # Catch specific OpenAI errors
+            logger.error(f"Failed to initialize AsyncOpenAI client: {e}", exc_info=True)
+            self._client = None  # Ensure client is None on failure
+            raise  # Re-raise the OpenAIError to be caught by UI
+        except Exception as e:  # Catch any other unexpected errors
+            logger.error(
+                f"An unexpected error occurred during AsyncOpenAI client initialization: {e}",
+                exc_info=True,
+            )
+            self._client = None
+            raise RuntimeError("Unexpected error initializing AsyncOpenAI client.")
+    def get_client(self) -> AsyncOpenAI:
+        """Returns the initialized AsyncOpenAI client. Raises error if not initialized."""
+        if self._client is None:
+            logger.error(
+                "AsyncOpenAI client accessed before initialization or after a failed initialization."
+            )
+            raise RuntimeError(
+                "AsyncOpenAI client is not initialized. Please provide a valid API key."
+            )
+        return self._client
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit - cleanup resources."""
+        self.close()
+        return False
+    async def __aenter__(self):
+        """Async context manager entry."""
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit - cleanup resources."""
+        await self.aclose()
+        return False
+    def close(self) -> None:
+        """Close the OpenAI client synchronously."""
+        if self._client:
+            try:
+                # OpenAI client has a close method for cleanup
+                if hasattr(self._client, "close"):
+                    self._client.close()
+                logger.debug("OpenAI client closed")
+            except Exception as e:
+                logger.warning(f"Error closing OpenAI client: {e}")
+            finally:
+                self._client = None
+    async def aclose(self) -> None:
+        """Close the OpenAI client asynchronously."""
+        if self._client:
+            try:
+                # OpenAI async client has an aclose method
+                if hasattr(self._client, "aclose"):
+                    await self._client.aclose()
+                elif hasattr(self._client, "close"):
+                    self._client.close()
+                logger.debug("OpenAI client closed (async)")
+            except Exception as e:
+                logger.warning(f"Error closing OpenAI client: {e}")
+            finally:
+                self._client = None
+# --- Agents SDK Utility ---
+async def structured_agent_call(
+    openai_client: AsyncOpenAI,
+    model: str,
+    instructions: str,
+    user_input: str,
+    output_type: type[T],
+    cache: Optional[ResponseCache] = None,
+    cache_key: Optional[str] = None,
+    temperature: float = 0.7,
+    timeout: float = 120.0,
+    retry_attempts: int = 3,
+) -> T:
+    """
+    Make a single-turn structured output call using the agents SDK.
+    This is a lightweight wrapper for simple structured output calls,
+    not intended for complex multi-agent workflows.
+    Args:
+        openai_client: AsyncOpenAI client instance
+        model: Model name (e.g., "gpt-5.2", "gpt-5.2-chat-latest")
+        instructions: System instructions for the agent
+        user_input: User prompt/input
+        output_type: Pydantic model class for structured output
+        cache: Optional ResponseCache instance
+        cache_key: Cache key (required if cache is provided)
+        temperature: Model temperature (default 0.7)
+        timeout: Request timeout in seconds (default 120)
+        retry_attempts: Number of retry attempts (default 3)
+    Returns:
+        Instance of output_type with the structured response
+    """
+    # 1. Check cache first
+    if cache and cache_key:
+        cached = cache.get(cache_key, model)
+        if cached is not None:
+            logger.info(f"Using cached response for model {model}")
+            # Reconstruct Pydantic model from cached dict
+            if isinstance(cached, dict):
+                return output_type.model_validate(cached)
+            return cached
+    # 2. Set up the OpenAI client for agents SDK
+    set_default_openai_client(openai_client, use_for_tracing=False)
+    # 3. Build model settings with GPT-5.x reasoning support
+    model_settings_kwargs: dict = {"temperature": temperature}
+    # GPT-5.x (not chat-latest) supports reasoning_effort
+    if model.startswith("gpt-5") and "chat-latest" not in model:
+        from openai.types.shared import Reasoning
+        model_settings_kwargs["reasoning"] = Reasoning(effort="none")
+    model_settings = ModelSettings(**model_settings_kwargs)
+    # 4. Create agent with structured output
+    agent = Agent(
+        name="structured_output_agent",
+        instructions=instructions,
+        model=model,
+        model_settings=model_settings,
+        output_type=output_type,
+    )
+    # 5. Execute with retry and timeout
+    last_error: Optional[Exception] = None
+    for attempt in range(retry_attempts):
+        try:
+            result = await asyncio.wait_for(
+                Runner.run(agent, user_input),
+                timeout=timeout,
+            )
+            # 6. Extract structured output
+            output = result.final_output
+            # 7. Cache successful result (as dict for serialization)
+            if cache and cache_key and output is not None:
+                if isinstance(output, BaseModel):
+                    cache.set(cache_key, model, output.model_dump())
+                else:
+                    cache.set(cache_key, model, output)
+            logger.debug(f"Successfully received response from model {model}")
+            return output
+        except asyncio.TimeoutError as e:
+            last_error = e
+            if attempt < retry_attempts - 1:
+                wait_time = 4 * (2**attempt)  # Exponential backoff
+                logger.warning(
+                    f"Agent timed out (attempt {attempt + 1}/{retry_attempts}), "
+                    f"retrying in {wait_time}s..."
+                )
+                await asyncio.sleep(wait_time)
+                continue
+            logger.error(f"Agent timed out after {retry_attempts} attempts")
+            raise
+        except Exception as e:
+            last_error = e
+            if attempt < retry_attempts - 1:
+                wait_time = 4 * (2**attempt)
+                logger.warning(
+                    f"Agent failed (attempt {attempt + 1}/{retry_attempts}): {e}, "
+                    f"retrying in {wait_time}s..."
+                )
+                await asyncio.sleep(wait_time)
+                continue
+            logger.error(f"Agent failed after {retry_attempts} attempts: {e}")
+            raise
+    raise RuntimeError(f"Retry loop exited without result: {last_error}")
+# Generic schema for arbitrary JSON structured outputs
+class GenericJsonOutput(BaseModel):
+    """Generic container for JSON output - allows any structure."""
+    model_config = {"extra": "allow"}  # Allow arbitrary fields
+async def structured_output_completion(
+    openai_client: AsyncOpenAI,
+    model: str,
+    response_format: dict,  # Legacy parameter - kept for API compatibility
+    system_prompt: str,
+    user_prompt: str,
+    cache: ResponseCache,
+) -> Optional[dict]:
+    """
+    Makes an API call with structured output using agents SDK.
+    Note: response_format parameter is ignored - the agents SDK handles
+    JSON parsing automatically. For typed outputs, use structured_agent_call() directly.
+    """
+    cache_key = f"{system_prompt}:{user_prompt}"
+    # Ensure system_prompt includes JSON instruction
+    effective_system_prompt = system_prompt
+    if "JSON object matching the specified schema" not in system_prompt:
+        effective_system_prompt = f"{system_prompt}\nProvide your response as a JSON object matching the specified schema."
+    try:
+        result = await structured_agent_call(
+            openai_client=openai_client,
+            model=model,
+            instructions=effective_system_prompt.strip(),
+            user_input=user_prompt.strip(),
+            output_type=GenericJsonOutput,
+            cache=cache,
+            cache_key=cache_key,
+            temperature=0.7,
+        )
+        # Convert Pydantic model back to dict for backward compatibility
+        if isinstance(result, BaseModel):
+            return result.model_dump()
+        return result
+    except Exception as e:
+        logger.error(
+            f"structured_output_completion failed for model {model}: {e}",
+            exc_info=True,
+        )
+        raise  # Re-raise unexpected errors
+# Specific OpenAI exceptions to retry on
+RETRYABLE_OPENAI_ERRORS = (
+    APIConnectionError,
+    RateLimitError,
+    APIStatusError,  # Typically for 5xx server errors
+)
+# --- New OpenAIRateLimiter Class (Subtask 9.2) ---
+class OpenAIRateLimiter:
+    """Manages token usage to proactively stay within (estimated) OpenAI rate limits."""
+    def __init__(self, tokens_per_minute: int = 60000):  # Default, can be configured
+        self.tokens_per_minute_limit: int = tokens_per_minute
+        self.tokens_used_current_window: int = 0
+        self.current_window_start_time: float = time.monotonic()
+    async def wait_if_needed(self, estimated_tokens_for_request: int):
+        """Waits if adding the estimated tokens would exceed the rate limit for the current window."""
+        current_time = time.monotonic()
+        # Check if the 60-second window has passed
+        if current_time - self.current_window_start_time >= 60.0:
+            # Reset window and token count
+            self.current_window_start_time = current_time
+            self.tokens_used_current_window = 0
+            logger.debug("OpenAIRateLimiter: Window reset.")
+        # Check if the request would exceed the limit in the current window
+        if (
+            self.tokens_used_current_window + estimated_tokens_for_request
+            > self.tokens_per_minute_limit
+        ):
+            time_to_wait = (self.current_window_start_time + 60.0) - current_time
+            if time_to_wait > 0:
+                logger.info(
+                    f"OpenAIRateLimiter: Approaching token limit. Waiting for {time_to_wait:.2f} seconds to reset window."
+                )
+                await asyncio.sleep(time_to_wait)
+            # After waiting for the window to reset, reset counters
+            self.current_window_start_time = time.monotonic()  # New window starts now
+            self.tokens_used_current_window = 0
+            logger.debug("OpenAIRateLimiter: Window reset after waiting.")
+        # If we are here, it's safe to proceed (or we've waited and reset)
+        # Add tokens for the current request
+        self.tokens_used_current_window += estimated_tokens_for_request
+        logger.debug(
+            f"OpenAIRateLimiter: Tokens used in current window: {self.tokens_used_current_window}/{self.tokens_per_minute_limit}"
+        )
+# Global instance of the rate limiter
+# This assumes a single rate limit bucket for all calls from this application instance.
+# More sophisticated scenarios might need per-model or per-key limiters.
+openai_rate_limiter = OpenAIRateLimiter()  # Using default 60k TPM for now

ankigen/logging.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import logging
+import os
+import sys
+from datetime import datetime
+def setup_logger(name="ankigen", log_level=logging.INFO):
+    """Set up and return a logger with file and console handlers"""
+    # Create logger
+    logger = logging.getLogger(name)
+    logger.setLevel(log_level)
+    # Remove existing handlers if any
+    # This ensures that if setup_logger is called multiple times for the same logger name,
+    # it doesn't accumulate handlers.
+    if logger.hasHandlers():
+        logger.handlers.clear()
+    # Create formatter
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s"
+    )
+    # Create console handler
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setFormatter(formatter)
+    logger.addHandler(console_handler)
+    # Create file handler
+    # Logs will be stored in ~/.ankigen/logs/
+    # A new log file is created each day (e.g., ankigen_20231027.log)
+    log_dir = os.path.join(os.path.expanduser("~"), ".ankigen", "logs")
+    os.makedirs(log_dir, exist_ok=True)
+    timestamp = datetime.now().strftime("%Y%m%d")
+    log_file = os.path.join(log_dir, f"{name}_{timestamp}.log")
+    file_handler = logging.FileHandler(log_file)
+    file_handler.setFormatter(formatter)
+    logger.addHandler(file_handler)
+    return logger
+# Create a default logger instance for easy import and use.
+# Projects can also create their own named loggers using setup_logger(name="my_module_logger")
+logger = setup_logger()

ankigen/models.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from pydantic import BaseModel
+from typing import List, Optional
+# Module for Pydantic data models
+class Step(BaseModel):
+    explanation: str
+    output: str
+class Subtopics(BaseModel):
+    steps: List[Step]
+    result: List[str]
+class Topics(BaseModel):
+    result: List[Subtopics]
+class CardFront(BaseModel):
+    question: Optional[str] = None
+class CardBack(BaseModel):
+    answer: Optional[str] = None
+    explanation: str
+    example: str
+class Card(BaseModel):
+    front: CardFront
+    back: CardBack
+    metadata: Optional[dict] = None
+    card_type: str = "basic"  # Add card_type, default to basic
+class CardList(BaseModel):
+    topic: str
+    cards: List[Card]
+class ConceptBreakdown(BaseModel):
+    main_concept: str
+    prerequisites: List[str]
+    learning_outcomes: List[str]
+    difficulty_level: str  # "beginner", "intermediate", "advanced"
+class CardGeneration(BaseModel):
+    concept: str
+    thought_process: str
+    verification_steps: List[str]
+    card: Card
+class LearningSequence(BaseModel):
+    topic: str
+    concepts: List[ConceptBreakdown]
+    cards: List[CardGeneration]
+    suggested_study_order: List[str]
+    review_recommendations: List[str]

ankigen/ui_logic.py ADDED Viewed

	@@ -0,0 +1,168 @@

+# Module for functions that build or manage UI sections/logic
+import gradio as gr
+import pandas as pd
+from typing import List
+from ankigen.utils import get_logger
+from ankigen.models import Card
+logger = get_logger()
+def update_mode_visibility(mode: str, current_subject: str):
+    """Updates visibility and values of UI elements based on generation mode.
+    Currently only 'subject' mode is supported. This function is kept for
+    future extensibility.
+    """
+    # Define standard columns for empty DataFrames
+    main_output_df_columns = [
+        "Index",
+        "Topic",
+        "Card_Type",
+        "Question",
+        "Answer",
+        "Explanation",
+        "Example",
+        "Prerequisites",
+        "Learning_Outcomes",
+        "Difficulty",
+    ]
+    return (
+        gr.update(visible=True),  # subject_mode (Group) - always visible
+        gr.update(visible=True),  # cards_output - always visible
+        gr.update(value=current_subject),  # subject textbox value
+        gr.update(
+            value=pd.DataFrame(columns=main_output_df_columns)
+        ),  # output DataFrame
+        gr.update(
+            value="<div><b>Total Cards Generated:</b> <span id='total-cards-count'>0</span></div>",
+            visible=False,
+        ),  # total_cards_html
+    )
+# --- Card Preview and Editing Utilities ---
+def cards_to_dataframe(cards: List[Card]) -> pd.DataFrame:
+    """Converts a list of Card objects to a Pandas DataFrame for UI display."""
+    data_for_df = []
+    for i, card in enumerate(cards):
+        # Extract tags from metadata if they exist
+        tags_list = card.metadata.get("tags", []) if card.metadata else []
+        tags_str = ", ".join(tags_list) if tags_list else ""
+        # Topic from metadata or a default
+        topic_str = card.metadata.get("topic", "N/A") if card.metadata else "N/A"
+        data_for_df.append(
+            {
+                "ID": i + 1,  # 1-indexed ID for display
+                "Topic": topic_str,
+                "Front": card.front.question,
+                "Back": card.back.answer,
+                "Tags": tags_str,
+                "Card Type": card.card_type or "Basic",
+                "Explanation": card.back.explanation or "",
+                "Example": card.back.example or "",
+                "Source_URL": card.metadata.get("source_url", "")
+                if card.metadata
+                else "",
+            }
+        )
+    # Define all columns explicitly for consistent DataFrame structure
+    df_columns = [
+        "ID",
+        "Topic",
+        "Front",
+        "Back",
+        "Tags",
+        "Card Type",
+        "Explanation",
+        "Example",
+        "Source_URL",
+    ]
+    df = pd.DataFrame(data_for_df, columns=df_columns)
+    return df
+def dataframe_to_cards(df: pd.DataFrame, original_cards: List[Card]) -> List[Card]:
+    """
+    Updates a list of Card objects based on edits from a Pandas DataFrame.
+    Assumes the DataFrame 'ID' column corresponds to the 1-based index of original_cards.
+    """
+    updated_cards: List[Card] = []
+    if df.empty and not original_cards:
+        return []
+    if df.empty and original_cards:
+        return []
+    for index, row in df.iterrows():
+        try:
+            card_id = int(row["ID"])  # DataFrame ID is 1-indexed
+            original_card_index = card_id - 1
+            if 0 <= original_card_index < len(original_cards):
+                card_to_update = original_cards[original_card_index]
+                new_front = card_to_update.front.copy(
+                    update={
+                        "question": str(row.get("Front", card_to_update.front.question))
+                    }
+                )
+                new_back = card_to_update.back.copy(
+                    update={
+                        "answer": str(row.get("Back", card_to_update.back.answer)),
+                        "explanation": str(
+                            row.get("Explanation", card_to_update.back.explanation)
+                        ),
+                        "example": str(row.get("Example", card_to_update.back.example)),
+                    }
+                )
+                tags_str = str(
+                    row.get(
+                        "Tags",
+                        ",".join(
+                            card_to_update.metadata.get("tags", [])
+                            if card_to_update.metadata
+                            else []
+                        ),
+                    )
+                )
+                new_tags = [t.strip() for t in tags_str.split(",") if t.strip()]
+                new_metadata = (
+                    card_to_update.metadata.copy() if card_to_update.metadata else {}
+                )
+                new_metadata["tags"] = new_tags
+                new_metadata["topic"] = str(
+                    row.get("Topic", new_metadata.get("topic", "N/A"))
+                )
+                updated_card = card_to_update.copy(
+                    update={
+                        "front": new_front,
+                        "back": new_back,
+                        "card_type": str(
+                            row.get("Card Type", card_to_update.card_type or "Basic")
+                        ),
+                        "metadata": new_metadata,
+                    }
+                )
+                updated_cards.append(updated_card)
+            else:
+                logger.warning(
+                    f"Card ID {card_id} from DataFrame is out of bounds for original_cards list."
+                )
+        except (ValueError, KeyError, AttributeError) as e:
+            logger.error(
+                f"Error processing row {index} from DataFrame: {row}. Error: {e}"
+            )
+            if 0 <= original_card_index < len(original_cards):
+                updated_cards.append(original_cards[original_card_index])
+            continue
+    return updated_cards

ankigen/utils.py ADDED Viewed

	@@ -0,0 +1,231 @@

+# Module for utility functions (logging, caching, web fetching)
+import logging
+from logging.handlers import RotatingFileHandler
+import sys
+import hashlib
+import requests
+from bs4 import BeautifulSoup
+from typing import Any, Optional
+import time
+# --- Logging Setup ---
+_logger_instance = None
+def setup_logging() -> logging.Logger:
+    """Configure logging to both file and console"""
+    global _logger_instance
+    if _logger_instance:
+        return _logger_instance
+    logger = logging.getLogger("ankigen")
+    logger.setLevel(logging.DEBUG)  # Keep debug level for the root logger
+    # Prevent duplicate handlers if called multiple times (though get_logger should prevent this)
+    if logger.hasHandlers():
+        logger.handlers.clear()
+    detailed_formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s"
+    )
+    simple_formatter = logging.Formatter("%(levelname)s: %(message)s")
+    file_handler = RotatingFileHandler(
+        "ankigen.log", maxBytes=1024 * 1024, backupCount=5
+    )
+    file_handler.setLevel(logging.DEBUG)  # File handler logs everything from DEBUG up
+    file_handler.setFormatter(detailed_formatter)
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(logging.INFO)  # Console handler logs INFO and above
+    console_handler.setFormatter(simple_formatter)
+    logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
+    _logger_instance = logger
+    return logger
+def get_logger() -> logging.Logger:
+    """Returns the initialized logger instance."""
+    if _logger_instance is None:
+        return setup_logging()
+    return _logger_instance
+# Initialize logger when module is loaded
+logger = get_logger()
+# --- Caching ---
+class ResponseCache:
+    """Simple and efficient LRU cache for API responses with proper eviction."""
+    def __init__(self, maxsize: int = 128):
+        self.maxsize = maxsize
+        self._cache = {}  # {key: response}
+        self._access_order = []  # Track access order for LRU eviction
+        self.hits = 0
+        self.misses = 0
+    def get(self, prompt: str, model: str) -> Optional[Any]:
+        """Retrieve item from cache, updating LRU order."""
+        cache_key = self._create_key(prompt, model)
+        if cache_key in self._cache:
+            # Move to end (most recently used)
+            self._access_order.remove(cache_key)
+            self._access_order.append(cache_key)
+            self.hits += 1
+            logger.debug(
+                f"Cache HIT: {cache_key[:16]}... (hits={self.hits}, misses={self.misses})"
+            )
+            return self._cache[cache_key]
+        self.misses += 1
+        logger.debug(
+            f"Cache MISS: {cache_key[:16]}... (hits={self.hits}, misses={self.misses})"
+        )
+        return None
+    def set(self, prompt: str, model: str, response: Any):
+        """Store item in cache with LRU eviction when full."""
+        cache_key = self._create_key(prompt, model)
+        # If key exists, update and move to end
+        if cache_key in self._cache:
+            self._access_order.remove(cache_key)
+        # If cache is full, evict least recently used
+        elif len(self._cache) >= self.maxsize:
+            evicted_key = self._access_order.pop(0)
+            del self._cache[evicted_key]
+            logger.debug(
+                f"Cache EVICT: {evicted_key[:16]}... (size={len(self._cache)})"
+            )
+        self._cache[cache_key] = response
+        self._access_order.append(cache_key)
+        logger.debug(f"Cache SET: {cache_key[:16]}... (size={len(self._cache)})")
+    def clear(self) -> None:
+        """Clear all cache entries and statistics."""
+        self._cache.clear()
+        self._access_order.clear()
+        self.hits = 0
+        self.misses = 0
+        logger.debug("Cache CLEARED")
+    def _create_key(self, prompt: str, model: str) -> str:
+        """Create cache key from prompt and model (MD5 hash for size efficiency)."""
+        # Hash to keep keys manageable size while maintaining uniqueness
+        return hashlib.md5(f"{model}:{prompt}".encode("utf-8")).hexdigest()
+# --- Web Content Fetching ---
+def fetch_webpage_text(url: str) -> str:
+    """Fetches and extracts main text content from a URL."""
+    logger_util = get_logger()  # Use the logger from this module
+    try:
+        logger_util.info(f"Fetching content from URL: {url}")
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+        response = requests.get(url, headers=headers, timeout=15)
+        response.raise_for_status()
+        logger_util.debug(f"Parsing HTML content for {url}")
+        try:
+            soup = BeautifulSoup(response.text, "lxml")
+        except ImportError:  # Keep existing fallback
+            logger_util.warning("lxml not found, using html.parser instead.")
+            soup = BeautifulSoup(response.text, "html.parser")
+        except Exception as e:  # Catch other BeautifulSoup init errors
+            logger_util.error(
+                f"BeautifulSoup initialization failed for {url}: {e}", exc_info=True
+            )
+            raise RuntimeError(f"Failed to parse HTML content for {url}.")
+        for script_or_style in soup(["script", "style"]):
+            script_or_style.extract()
+        main_content = soup.find("main")
+        if not main_content:
+            main_content = soup.find("article")
+        if main_content:
+            text = main_content.get_text()
+            logger_util.debug(f"Extracted text from <{main_content.name}> tag.")
+        else:
+            body = soup.find("body")
+            if body:
+                text = body.get_text()
+                logger_util.debug("Extracted text from <body> tag (fallback).")
+            else:
+                text = ""
+                logger_util.warning(f"Could not find <body> tag in {url}")
+        # Simpler text cleaning: join stripped lines
+        lines = (line.strip() for line in text.splitlines())
+        cleaned_text = "\n".join(line for line in lines if line)
+        if not cleaned_text:
+            logger_util.warning(f"Could not extract meaningful text from {url}")
+            return ""
+        logger_util.info(
+            f"Successfully extracted text from {url} (Length: {len(cleaned_text)} chars)"
+        )
+        return cleaned_text
+    except requests.exceptions.RequestException as e:
+        logger_util.error(f"Network error fetching URL {url}: {e}", exc_info=True)
+        raise ConnectionError(f"Could not fetch URL: {e}")
+    except Exception as e:
+        logger_util.error(f"Error processing URL {url}: {e}", exc_info=True)
+        if isinstance(e, (ValueError, ConnectionError, RuntimeError)):
+            raise e
+        else:
+            raise RuntimeError(
+                f"An unexpected error occurred while processing the URL: {e}"
+            )
+# --- New Synchronous RateLimiter Class ---
+class RateLimiter:
+    """A simple synchronous rate limiter."""
+    def __init__(self, requests_per_second: float):
+        if requests_per_second <= 0:
+            raise ValueError("Requests per second must be positive.")
+        self.min_interval_seconds: float = 1.0 / requests_per_second
+        self.last_request_timestamp: float = 0.0
+        # Use a lock if this were to be used by multiple threads, but for now assuming single thread access per instance
+    def wait(self) -> None:
+        """Blocks until it's safe to make the next request."""
+        current_time = time.monotonic()  # Use monotonic clock for intervals
+        time_since_last_request = current_time - self.last_request_timestamp
+        if time_since_last_request < self.min_interval_seconds:
+            wait_duration = self.min_interval_seconds - time_since_last_request
+            # logger.debug(f"RateLimiter waiting for {wait_duration:.3f} seconds.") # Optional: add logging
+            time.sleep(wait_duration)
+        self.last_request_timestamp = time.monotonic()
+# --- Existing Utility Functions (if any) ---
+# def some_other_util_function():
+#     pass
+def strip_html_tags(text: str) -> str:
+    """Removes HTML tags from a string using a safe, non-regex approach."""
+    if not isinstance(text, str):
+        return str(text)  # Ensure it's a string, or return as is if not coercible
+    # Use BeautifulSoup for safe HTML parsing
+    soup = BeautifulSoup(text, "html.parser")
+    return soup.get_text().strip()

app.py CHANGED Viewed

@@ -7,23 +7,23 @@ from datetime import datetime
 import gradio as gr
 import pandas as pd
-from ankigen_core.card_generator import (
     AVAILABLE_MODELS,
     orchestrate_card_generation,
 )  # GENERATION_MODES is internal to card_generator
-from ankigen_core.exporters import (
     export_dataframe_to_apkg,
     export_dataframe_to_csv,
 )  # Anki models (BASIC_MODEL, CLOZE_MODEL) are internal to exporters
-from ankigen_core.llm_interface import (
     OpenAIClientManager,
 )  # structured_output_completion is internal to core modules
-from ankigen_core.ui_logic import update_mode_visibility
-from ankigen_core.utils import (
     ResponseCache,
     get_logger,
 )  # fetch_webpage_text is used by card_generator
-from ankigen_core.auto_config import AutoConfigService
 # --- Initialization ---
 logger = get_logger()
@@ -341,7 +341,7 @@ def create_ankigen_interface(theme=None, css=None, js=None):
                             label="Download Deck", visible=False
                         )
-            # --- Event Handlers --- (Updated to use functions from ankigen_core)
             generation_mode.change(
                 fn=update_mode_visibility,
                 inputs=[

 import gradio as gr
 import pandas as pd
+from ankigen.card_generator import (
     AVAILABLE_MODELS,
     orchestrate_card_generation,
 )  # GENERATION_MODES is internal to card_generator
+from ankigen.exporters import (
     export_dataframe_to_apkg,
     export_dataframe_to_csv,
 )  # Anki models (BASIC_MODEL, CLOZE_MODEL) are internal to exporters
+from ankigen.llm_interface import (
     OpenAIClientManager,
 )  # structured_output_completion is internal to core modules
+from ankigen.ui_logic import update_mode_visibility
+from ankigen.utils import (
     ResponseCache,
     get_logger,
 )  # fetch_webpage_text is used by card_generator
+from ankigen.auto_config import AutoConfigService
 # --- Initialization ---
 logger = get_logger()
                             label="Download Deck", visible=False
                         )
+            # --- Event Handlers --- (Updated to use functions from ankigen)
             generation_mode.change(
                 fn=update_mode_visibility,
                 inputs=[

pyproject.toml CHANGED Viewed

@@ -49,10 +49,13 @@ cli = [
 ]
 [project.scripts]
-ankigen = "ankigen_core.cli:main"
 [tool.setuptools]
 py-modules = ["app"]
 [tool.pytest.ini_options]
 anyio_backend = "asyncio"

 ]
 [project.scripts]
+ankigen = "ankigen.cli:main"
 [tool.setuptools]
 py-modules = ["app"]
+[tool.setuptools.packages.find]
+include = ["ankigen*"]
 [tool.pytest.ini_options]
 anyio_backend = "asyncio"