Spaces:
Paused
Paused
File size: 4,210 Bytes
cb1109f 845c5fd cb1109f 845c5fd cb1109f 845c5fd cb1109f 845c5fd cb1109f 845c5fd cb1109f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
import joblib
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import torch
app = FastAPI(title="Comment Validator API")
# =====================================
# 🔹 Chargement des modèles
# =====================================
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps" # pour ton Mac local
else:
device = "cpu"
print(f"🧠 Using device: {device}")
print("Loading model embedding")
text_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2", device=device)
print("Loading model classifier")
clf = joblib.load("models/classifier.joblib")
print("Loading model encoder")
encoder = joblib.load("models/encoder.joblib")
print("Loading model sentiment-analysis")
sentiment_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment", device=device)
print("Loading model toxicity")
toxicity_analyzer = pipeline("text-classification", model="unitary/toxic-bert", return_all_scores=True, device=device)
def analyze_comment(comment: str, category: str, country: str) -> dict:
reasons = []
# --- Analyse du sentiment ---
try:
sentiment = sentiment_analyzer(comment[:512])[0]
label = sentiment["label"]
score = sentiment["score"]
except Exception:
label, score = "unknown", 0.0
if "1" in label or "2" in label:
sentiment_score = -1
reasons.append("Le ton semble négatif ou insatisfait.")
elif "4" in label or "5" in label:
sentiment_score = 1
else:
sentiment_score = 0
# --- Encodage du texte ---
X_text = text_model.encode([comment])
# --- Encodage catégorie/pays ---
df_cat = pd.DataFrame([[category, country]], columns=["category", "country"])
try:
X_cat = encoder.transform(df_cat)
except ValueError:
reasons.append(f"Catégorie ou pays inconnus : {category}, {country}")
n_features = sum(len(cats) for cats in encoder.categories_)
X_cat = np.zeros((1, n_features))
# --- Concaténation ---
X = np.concatenate([X_text, X_cat], axis=1)
# --- Prédiction validité ---
proba = clf.predict_proba(X)[0][1]
prediction = proba >= 0.5
if len(comment.split()) < 3:
reasons.append("Le commentaire est trop court.")
if sentiment_score < 0:
reasons.append("Le ton global est négatif.")
if proba < 0.4:
reasons.append("Le modèle estime une faible probabilité de validité.")
# --- Analyse toxicité ---
try:
tox_scores = toxicity_analyzer(comment[:512])[0] # tronquer pour sécurité
tags = {f"tag_{item['label']}": round(item['score'], 3) for item in tox_scores}
except Exception:
tags = {f"tag_{label}": 0.0 for label in ["toxicity","severe_toxicity","obscene","identity_attack","insult","threat"]}
# --- Résultat final ---
result = {
"is_valid": bool(prediction),
"confidence": round(float(proba), 3),
"sentiment": label,
"sentiment_score": round(float(score), 3),
"reasons": "; ".join(reasons) if reasons else "Aucune anomalie détectée."
}
result.update(tags)
return result
# =====================================
# 🔸 Modèles de requête/réponse
# =====================================
class CommentRequest(BaseModel):
comment: str
category: str
country: str
class BatchRequest(BaseModel):
items: List[CommentRequest]
# =====================================
# 🔹 Routes
# =====================================
@app.post("/predict")
def predict(item: CommentRequest):
"""Analyse un seul commentaire"""
result = analyze_comment(item.comment, item.category, item.country)
return result
@app.post("/batch_predict")
def batch_predict(request: BatchRequest):
"""Analyse plusieurs commentaires à la fois"""
results = []
for item in request.items:
results.append(analyze_comment(item.comment, item.category, item.country))
return {"results": results}
|