Spaces:

simontingle
/

neutts-backend

Sleeping

Claude

Fix encode_reference unpack: returns single value, not tuple

826855a unverified 21 days ago

4.85 kB

	"""NeuTTS FastAPI backend — runs on HuggingFace Spaces."""

	from __future__ import annotations

	import io
	import os
	import sys
	import tempfile
	import traceback
	from pathlib import Path

	import numpy as np
	import soundfile as sf
	import uvicorn
	from fastapi import FastAPI, File, Form, Header, HTTPException, UploadFile
	from fastapi.responses import Response

	from neutts import NeuTTS

	# ─── Config ───────────────────────────────────────────────────────────────────

	API_KEY = os.environ.get("NEUTTS_API_KEY", "")
	BACKBONE = os.environ.get("NEUTTS_BACKBONE", "neuphonic/neutts-nano-q8-gguf")
	DEVICE = os.environ.get("NEUTTS_DEVICE", "cpu")
	CODEC = os.environ.get("NEUTTS_CODEC", "neuphonic/neucodec-onnx-decoder")
	SAMPLE_RATE = 24_000

	# ─── Model loading (at startup) ───────────────────────────────────────────────

	print(f"[backend] Loading NeuTTS: backbone={BACKBONE} device={DEVICE} codec={CODEC}", flush=True)
	_tts: NeuTTS \| None = None
	try:
	_tts = NeuTTS(
	backbone_repo=BACKBONE,
	backbone_device=DEVICE,
	codec_repo=CODEC,
	codec_device="cpu",
	)
	print("[backend] Model loaded OK", flush=True)
	except Exception as exc:
	print(f"[backend] WARNING: model load failed: {exc}", file=sys.stderr, flush=True)

	_whisper_model = None
	_whisper_model_name = ""

	# ─── FastAPI app ──────────────────────────────────────────────────────────────

	app = FastAPI(title="NeuTTS backend", version="1.0")


	def _check_key(key: str \| None) -> None:
	if API_KEY and key != API_KEY:
	raise HTTPException(status_code=401, detail="Invalid API key")


	@app.get("/health")
	def health(x_api_key: str \| None = Header(default=None)):
	_check_key(x_api_key)
	return {
	"status": "ok",
	"model_loaded": _tts is not None,
	"backbone": BACKBONE,
	"device": DEVICE,
	"codec": CODEC,
	}


	@app.post("/generate")
	async def generate(
	text: str = Form(...),
	ref_text: str = Form(""),
	temperature: float = Form(1.0),
	top_k: int = Form(50),
	ref_audio: UploadFile = File(...),
	x_api_key: str \| None = Header(default=None),
	):
	_check_key(x_api_key)
	if _tts is None:
	raise HTTPException(status_code=503, detail="Model not loaded on backend")

	suffix = Path(ref_audio.filename or "audio.wav").suffix or ".wav"
	with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
	tmp.write(await ref_audio.read())
	tmp_path = tmp.name

	try:
	ref_codes = _tts.encode_reference(tmp_path)
	wav = _tts.infer(
	text.strip(),
	ref_codes,
	ref_text.strip() or " ",
	temperature=float(temperature),
	top_k=int(top_k),
	)
	buf = io.BytesIO()
	sf.write(buf, wav.astype(np.float32), SAMPLE_RATE, format="WAV")
	buf.seek(0)
	return Response(content=buf.read(), media_type="audio/wav")
	except Exception as exc:
	print(f"[backend] /generate error:\n{traceback.format_exc()}", file=sys.stderr, flush=True)
	raise HTTPException(status_code=500, detail=str(exc))
	finally:
	Path(tmp_path).unlink(missing_ok=True)


	@app.post("/transcribe")
	async def transcribe(
	audio: UploadFile = File(...),
	model_id: str = Form("base"),
	x_api_key: str \| None = Header(default=None),
	):
	global _whisper_model, _whisper_model_name
	_check_key(x_api_key)

	try:
	import whisper as _w
	except ImportError:
	raise HTTPException(status_code=503, detail="openai-whisper not installed on backend")

	suffix = Path(audio.filename or "audio.wav").suffix or ".wav"
	with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
	tmp.write(await audio.read())
	tmp_path = tmp.name

	try:
	if _whisper_model is None or _whisper_model_name != model_id:
	print(f"[backend] loading Whisper '{model_id}'...", flush=True)
	_whisper_model = _w.load_model(model_id)
	_whisper_model_name = model_id
	result = _whisper_model.transcribe(tmp_path)
	return {"text": result["text"].strip()}
	except Exception as exc:
	raise HTTPException(status_code=500, detail=str(exc))
	finally:
	Path(tmp_path).unlink(missing_ok=True)


	if __name__ == "__main__":
	port = int(os.environ.get("PORT", 7860))
	uvicorn.run(app, host="0.0.0.0", port=port)