Spaces:

decodingdatascience
/

Challengebot

Running

App Files Files Community

Challengebot / app3.py

decodingdatascience

Update app3.py

00bebe2 verified 1 day ago

raw

history blame contribute delete

11.5 kB

	import os
	from pathlib import Path
	import gradio as gr

	from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
	from llama_index.llms.openai import OpenAI
	from llama_index.embeddings.openai import OpenAIEmbedding


	# ======================
	# Config (safe defaults)
	# ======================
	MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
	EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
	TOP_K = int(os.getenv("TOP_K", "3"))

	# Knowledge base file in the Space repo root (recommended)
	DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt"))

	# DDS logo (raw GitHub URL)
	LOGO_URL = os.getenv(
	"LOGO_URL",
	"https://github.com/Decoding-Data-Science/airesidency/blob/main/dds_logo.jpg?raw=true",
	)

	# Behavior / guardrails
	SYSTEM_GUARDRAILS = (
	"You are Challenge Copilot. Answer ONLY using the provided context from challenge_context.txt. "
	"If the answer is not in the context, say: 'I don’t know based on the current document.' "
	"Then ask the user to add the missing official details to challenge_context.txt."
	)

	APP_TITLE = "Building AI Application Challenge Copilot"
	APP_SUBTITLE = (
	"Ask questions about the Building AI Application Challenge using the official content you put into "
	"challenge_context.txt (LlamaIndex + OpenAI)."
	)


	# ======================
	# Index build (cached)
	# ======================
	_INDEX = None
	_QUERY_ENGINE = None


	def build_index():
	"""Build and cache the LlamaIndex query engine."""
	global _INDEX, _QUERY_ENGINE
	if _QUERY_ENGINE is not None:
	return _QUERY_ENGINE

	# OpenAI key must exist in Space secrets
	if not os.getenv("OPENAI_API_KEY"):
	raise RuntimeError(
	"OPENAI_API_KEY is missing. Add it in the Space → Settings → Variables and secrets."
	)

	# Create placeholder TXT if missing so Space can boot
	if not DOC_PATH.exists():
	DOC_PATH.write_text(
	"Add the official Building AI Application Challenge content here.\n",
	encoding="utf-8",
	)

	# LlamaIndex global settings
	Settings.llm = OpenAI(model=MODEL, temperature=0.2)
	Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL)
	Settings.chunk_size = 800
	Settings.chunk_overlap = 120

	# Reader expects a directory
	data_dir = str(DOC_PATH.parent)
	docs = SimpleDirectoryReader(
	input_dir=data_dir,
	required_exts=[".txt"],
	recursive=False,
	).load_data()

	# Only index the target file
	docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name]
	if not docs:
	raise FileNotFoundError(
	f"Could not load {DOC_PATH.name}. Make sure it exists in the repo root (or set DOC_PATH env var)."
	)

	_INDEX = VectorStoreIndex.from_documents(docs)
	_QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K)
	return _QUERY_ENGINE


	def format_sources(resp, max_sources=3, max_chars=240):
	"""Format top retrieved chunks for transparency."""
	lines = []
	for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1):
	fn = sn.node.metadata.get("file_name", "unknown")
	snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars]
	score = getattr(sn, "score", None)
	score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else ""
	lines.append(f"{i}. {fn}{score_txt}: {snippet}...")
	return "\n".join(lines) if lines else "No sources returned."


	def chat(message, history):
	"""Chat handler used by Gradio ChatInterface."""
	qe = build_index()

	prompt = (
	f"{SYSTEM_GUARDRAILS}\n\n"
	f"User question: {message}\n"
	f"Answer using ONLY the context."
	)

	resp = qe.query(prompt)
	answer = str(resp).strip()

	show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true"
	if show_sources:
	answer += "\n\n---\nSources:\n" + format_sources(resp, max_sources=TOP_K)

	return answer


	# ======================
	# UI (professional layout)
	# ======================
	CSS = """
	/* Global polish */
	.dds-header { display:flex; align-items:center; gap:16px; }
	.dds-logo img { height:60px; width:auto; border-radius:10px; box-shadow: 0 2px 10px rgba(0,0,0,0.10); }
	.dds-title { margin:0; line-height:1.1; }
	.dds-subtitle { margin:6px 0 0 0; color: #555; }
	.dds-muted { color: #666; font-size: 0.95rem; }
	.dds-card { border: 1px solid rgba(0,0,0,0.08); border-radius: 14px; padding: 14px; background: rgba(255,255,255,0.7); }
	.dds-section-title { margin: 0 0 6px 0; }
	"""

	# Theme fallback (don’t pass theme to ChatInterface to avoid older-gradio errors)
	try:
	theme_obj = gr.themes.Soft()
	except Exception:
	theme_obj = None

	with gr.Blocks(theme=theme_obj, css=CSS, title=APP_TITLE) as demo:
	# Header row (Logo left + Title right)
	with gr.Row():
	with gr.Column(scale=1, min_width=140):
	gr.HTML(
	f"""
	<div class="dds-logo">
	<img src="{LOGO_URL}" alt="DDS Logo"/>
	</div>
	"""
	)
	with gr.Column(scale=6):
	gr.HTML(
	f"""
	<div class="dds-header">
	<div>
	<h2 class="dds-title">{APP_TITLE}</h2>
	<p class="dds-subtitle">{APP_SUBTITLE}</p>
	<p class="dds-muted">
	If something is missing, add official details to <b>{DOC_PATH.name}</b> and restart the Space.
	</p>
	</div>
	</div>
	"""
	)

	gr.Markdown("---")

	# Two sections: Chat + Challenge FAQ
	with gr.Row():
	# Section 1: Chat
	with gr.Column(scale=6):
	gr.HTML(
	"""
	<div class="dds-card">
	<h3 class="dds-section-title">Section 1 — Ask the Copilot</h3>
	<p class="dds-muted">RAG flow: retrieve relevant chunks → generate a grounded answer using your LLM API.</p>
	</div>
	"""
	)

	# IMPORTANT: No theme= here (avoids your earlier error)
	gr.ChatInterface(
	fn=chat,
	examples=[
	"What will I build in this live session?",
	"Who is this best for?",
	"What are the prerequisites?",
	"What is the RAG flow in this project?",
	"What should I submit (link + repo + write-up)?",
	],
	)

	# Section 2: Challenge FAQ (participant-focused)
	with gr.Column(scale=4):
	gr.HTML(
	"""
	<div class="dds-card">
	<h3 class="dds-section-title">Section 2 — Challenge FAQ</h3>
	<p class="dds-muted">
	Quick guidance for participants. If something is not answered here, ask in the Copilot chat.
	</p>
	</div>
	"""
	)

	with gr.Accordion("FAQ 1 — What should I build for this challenge?", open=False):
	gr.Markdown(
	"""
	- Build a simple AI application aligned to the challenge tracks (LLM/API, no-code/low-code, sponsor tool track, etc.).
	- Aim for a working demo + proof-of-work you can share.
	- Ask in chat: “Suggest 5 project ideas that fit the official rules in the document.”
	""".strip()
	)

	with gr.Accordion("FAQ 2 — Which track/path should I choose?", open=False):
	gr.Markdown(
	"""
	- Pick based on your level:
	- LLM/API Integration: Python + API + simple RAG patterns
	- No-code/Low-code: fastest to ship, less code
	- Sponsor/tool track: follow the workshop tool (if applicable)
	- Ask in chat: “Given my background (X), which track is best and why?”
	""".strip()
	)

	with gr.Accordion("FAQ 3 — What is the minimum deliverable to be eligible?", open=False):
	gr.Markdown(
	"""
	Typical minimum:
	- A working app link that judges can open
	- A short description (problem + user + how to use)
	- Repo is optional but strongly recommended
	Ask in chat: “What does the official document say about minimum submission requirements?”
	""".strip()
	)

	with gr.Accordion("FAQ 4 — How do I submit my project?", open=False):
	gr.Markdown(
	"""
	Common submission package:
	- App URL (Hugging Face Spaces / Streamlit / etc.)
	- Repo URL (optional but strong)
	- Short write-up + screenshots/video (if required)
	Ask in chat: “What is the official submission format and where is the submission link?”
	""".strip()
	)

	with gr.Accordion("FAQ 5 — Where should I deploy so judges can access easily?", open=False):
	gr.Markdown(
	"""
	Low-friction options:
	- Hugging Face Spaces (Gradio) — easiest for demos
	- Streamlit Community Cloud
	- Vercel (for web apps)
	Ask in chat: “What deployment options are recommended in the official challenge doc?”
	""".strip()
	)

	with gr.Accordion("FAQ 6 — What do judges usually look for?", open=False):
	gr.Markdown(
	"""
	Strong signals:
	- Working demo (no errors, easy to use)
	- Clear problem + target audience
	- Good AI behavior (grounded, safe, consistent)
	- Product thinking (UX, clarity, flow)
	Ask in chat: “What are the judging criteria in the official document?”
	""".strip()
	)

	with gr.Accordion("FAQ 7 — What should I post as proof-of-work?", open=False):
	gr.Markdown(
	"""
	Suggested proof post structure:
	- 1-line problem + who it helps
	- Demo link + screenshot/GIF
	- What you learned + next improvement
	Ask in chat: “Draft a proof-of-work post based on my project idea.”
	""".strip()
	)

	with gr.Accordion("FAQ 8 — How do I make my app ‘RAG grounded’ (not hallucinating)?", open=False):
	gr.Markdown(
	"""
	Best practices:
	- Restrict answers to retrieved context
	- Show sources/snippets (optional but strong)
	- If missing info → say “Not in document” and request adding content
	Ask in chat: “Answer using only the document; if missing, tell me what section to add.”
	""".strip()
	)

	with gr.Accordion("FAQ 9 — I can’t find a detail (dates/rules/prizes). What now?", open=False):
	gr.Markdown(
	f"""
	- The Copilot can only answer what exists inside {DOC_PATH.name}.
	- If the official detail isn’t in the TXT, add it, commit, and restart the Space.
	Ask in chat: “What exact section should I add to cover [missing detail]?”
	""".strip()
	)

	gr.Markdown("---")
	gr.Markdown(
	f"""
	Admin notes
	- Context file: `{DOC_PATH.name}`
	- Optional env vars: `OPENAI_MODEL`, `OPENAI_EMBED_MODEL`, `TOP_K`, `SHOW_SOURCES`, `DOC_PATH`, `LOGO_URL`
	""".strip()
	)

	if __name__ == "__main__":
	demo.launch()