miqa / video_analytics_inference.py

Upload video_analytics_inference.py with huggingface_hub

a7301d6 verified 24 days ago

36.3 kB

	import os
	import sys
	import torch
	import argparse
	import logging
	from pathlib import Path
	from typing import List, Dict, Optional, Tuple
	from collections import OrderedDict, defaultdict
	import json
	from datetime import datetime
	from tqdm import tqdm
	import numpy as np
	import cv2

	# Image processing imports
	from PIL import Image, ImageDraw, ImageFont
	import torchvision.transforms as transforms
	import matplotlib.pyplot as plt
	import matplotlib

	matplotlib.use('Agg') # Use non-interactive backend

	# Import your existing model components
	from models.MIQA_base import get_torch_model, get_timm_model
	from models.RA_MIQA import RegionVisionTransformer
	from models.hf_model_registry import HF_REPO_ID, HF_REVISION, MODEL_FILENAMES
	from utils.hf_download_utils import ensure_checkpoint_from_hf

	# Supported file extensions
	SUPPORTED_IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.JPEG', '.png', '.bmp', '.tiff', '.tif'}
	SUPPORTED_VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv', '.webm'}


	class VideoFrameExtractor:
	"""
	Extracts and samples frames from video files intelligently.

	This class handles different sampling strategies to balance between
	thoroughness and computational efficiency.
	"""

	def __init__(self, sampling_strategy: str = 'uniform',
	target_frames: int = 30,
	fps_sample: Optional[float] = None):
	"""
	Initialize the frame extractor.

	Args:
	sampling_strategy: How to sample frames - 'uniform', 'fps', or 'keyframe'
	target_frames: Target number of frames to extract (for uniform sampling)
	fps_sample: Sample rate in frames per second (for fps sampling)
	"""
	self.sampling_strategy = sampling_strategy
	self.target_frames = target_frames
	self.fps_sample = fps_sample

	def extract_frames(self, video_path: str) -> Tuple[List[np.ndarray], List[float], Dict]:
	"""
	Extract frames from video based on sampling strategy.

	Returns:
	Tuple of (frames_list, timestamps_list, video_metadata)
	"""
	cap = cv2.VideoCapture(video_path)

	if not cap.isOpened():
	raise ValueError(f"Cannot open video file: {video_path}")

	# Get video properties
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	fps = cap.get(cv2.CAP_PROP_FPS)
	duration = total_frames / fps if fps > 0 else 0
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

	metadata = {
	'total_frames': total_frames,
	'fps': fps,
	'duration': duration,
	'width': width,
	'height': height
	}

	# Determine which frames to sample
	frame_indices = self._get_sample_indices(total_frames, fps)

	frames = []
	timestamps = []

	for idx in frame_indices:
	cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	ret, frame = cap.read()

	if ret:
	# Convert BGR to RGB (OpenCV uses BGR)
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	frames.append(frame_rgb)
	# Calculate timestamp in seconds
	timestamp = idx / fps if fps > 0 else idx
	timestamps.append(timestamp)

	cap.release()

	return frames, timestamps, metadata

	def _get_sample_indices(self, total_frames: int, fps: float) -> List[int]:
	"""
	Determine which frame indices to sample based on strategy.
	"""
	if self.sampling_strategy == 'uniform':
	# Sample frames uniformly across the video
	if total_frames <= self.target_frames:
	return list(range(total_frames))
	else:
	# Calculate step size to get approximately target_frames
	step = total_frames / self.target_frames
	indices = [int(i * step) for i in range(self.target_frames)]
	return indices

	elif self.sampling_strategy == 'fps':
	# Sample at a specific frame rate
	if self.fps_sample is None:
	raise ValueError("fps_sample must be specified for fps sampling strategy")

	frame_interval = max(1, int(fps / self.fps_sample))
	indices = list(range(0, total_frames, frame_interval))
	return indices

	else:
	raise ValueError(f"Unknown sampling strategy: {self.sampling_strategy}")


	def aggregate_scores_by_second(frame_results: List[Dict]) -> List[Dict]:
	"""
	Aggregate frame-level quality scores to per-second averages.

	This function groups all frames that fall within the same second
	and computes their average quality score. This provides a smoothed
	view of quality over time, reducing noise from frame-to-frame variations.

	Args:
	frame_results: List of dictionaries with 'timestamp' and 'quality_score'

	Returns:
	List of dictionaries with per-second aggregated scores
	"""
	# Group frames by their second (floor of timestamp)
	seconds_data = defaultdict(list)

	for frame in frame_results:
	second = int(frame['timestamp']) # Floor to nearest second
	seconds_data[second].append(frame['quality_score'])

	# Calculate average for each second
	per_second_results = []
	for second in sorted(seconds_data.keys()):
	scores = seconds_data[second]
	per_second_results.append({
	'second': second,
	'timestamp': float(second), # Use second as timestamp for plotting
	'quality_score': np.mean(scores),
	'min_score': np.min(scores),
	'max_score': np.max(scores),
	'num_frames': len(scores),
	'std_score': np.std(scores) if len(scores) > 1 else 0.0
	})

	return per_second_results


	class MIQAInference:
	"""
	Inference wrapper for MIQA models supporting both images and videos.
	"""

	def __init__(self, task: str, model_name: str = 'ra_miqa',
	metric_type: str = 'composite', device: Optional[str] = None,
	video_sampling: str = 'uniform', video_target_frames: int = 30):
	"""
	Initialize the MIQA inference system.

	Args:
	task: Task type - 'cls', 'det', or 'ins'
	model_name: Model architecture to use
	metric_type: Training objective - 'composite', 'consistency', or 'accuracy'
	device: Device to run inference on
	video_sampling: Frame sampling strategy for videos
	video_target_frames: Target number of frames to extract from videos
	"""
	self.task = task.lower()
	self.model_name = model_name
	self.metric_type = metric_type
	self.video_target_frames = video_target_frames

	# Setup logging
	self.logger = self._setup_logger()

	# Determine device
	if device is None:
	self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	else:
	self.device = torch.device(device)

	self.logger.info(f"🚀 Initializing MIQA Inference System")
	self.logger.info(f" Task: {self.task.upper()}")
	self.logger.info(f" Model: {self.model_name}")
	self.logger.info(f" Device: {self.device}")

	# Validate configuration
	self._validate_config()

	# Initialize model
	self.model = self._load_model()

	# Setup image preprocessing
	self.transforms1, self.transforms2 = self._get_transforms()

	# Initialize video frame extractor
	self.frame_extractor = VideoFrameExtractor(
	sampling_strategy=video_sampling,
	target_frames=video_target_frames
	)

	self.logger.info("✅ System ready for inference\n")

	def _setup_logger(self) -> logging.Logger:
	"""Configure logging with both file and console output."""
	logger = logging.getLogger('MIQA_Inference')
	logger.setLevel(logging.INFO)

	if logger.hasHandlers():
	return logger

	logger.propagate = False

	# Console handler with clean formatting
	console_handler = logging.StreamHandler(sys.stdout)
	console_handler.setLevel(logging.INFO)
	console_formatter = logging.Formatter('%(message)s')
	console_handler.setFormatter(console_formatter)
	logger.addHandler(console_handler)

	return logger

	def _validate_config(self) -> None:
	"""Validate that the requested configuration is supported."""

	if self.metric_type not in ['composite', 'consistency', 'accuracy']:
	raise ValueError(
	f"Invalid metric_type '{self.metric_type}'. "
	f"Supported: ['composite', 'consistency', 'accuracy']"
	)

	if self.task not in MODEL_FILENAMES[self.metric_type]:
	raise ValueError(
	f"Invalid task '{self.task}'. "
	f"Supported tasks: {list(MODEL_FILENAMES[self.metric_type].keys())}"
	)

	if self.model_name not in MODEL_FILENAMES[self.metric_type][self.task]:
	available = list(MODEL_FILENAMES[self.metric_type][self.task].keys())
	raise ValueError(
	f"Model '{self.model_name}' not available for task '{self.task}'. "
	f"Available models: {available}"
	)

	def _get_checkpoint_path(self) -> str:
	"""Generate the path where model checkpoint should be stored."""
	base_dir = Path('models') / 'checkpoints' / f'{self.metric_type}_metric'
	base_dir.mkdir(parents=True, exist_ok=True)

	filename = MODEL_FILENAMES[self.metric_type][self.task][self.model_name]
	return str(base_dir / filename)

	def _download_weights(self, checkpoint_path: str) -> bool:
	"""
	Download model weights if not present locally.

	Returns:
	True if weights are available (already existed or successfully downloaded)
	"""
	if os.path.exists(checkpoint_path):
	self.logger.info(f"✓ Found cached model weights")
	return True

	self.logger.info(
	f"⏬ Downloading from Hugging Face: repo={HF_REPO_ID}, "
	f"file={Path(checkpoint_path).name}, rev={HF_REVISION}"
	)
	try:
	ensure_checkpoint_from_hf(
	repo_id=HF_REPO_ID,
	filename=Path(checkpoint_path).name,
	local_dir=str(Path(checkpoint_path).parent),
	revision=HF_REVISION,
	)
	self.logger.info("✓ Successfully downloaded model weights")
	return True
	except Exception as e:
	self.logger.error(f"❌ Failed to download model weights from Hugging Face: {e}")
	return False

	def _create_model(self) -> torch.nn.Module:
	"""Create the model architecture."""
	if self.model_name == 'ra_miqa':
	self.logger.info("Building Region-Aware Vision Transformer...")
	model = RegionVisionTransformer(
	base_model_name='vit_small_patch16_224',
	pretrained=False, # We'll load our trained weights
	mmseg_config_path='models/model_configs/fcn_sere-small_finetuned_fp16_8x32_224x224_3600_imagenets919.py',
	checkpoint_path='models/checkpoints/sere_finetuned_vit_small_ep100.pth'
	)
	else:
	try:
	self.logger.info(f"Building {self.model_name} from PyTorch...")
	model = get_torch_model(model_name=self.model_name, pretrained=False, num_classes=1)
	except Exception:
	self.logger.info(f"Building {self.model_name} from timm library...")
	model = get_timm_model(model_name=self.model_name, pretrained=False, num_classes=1)

	return model

	def _load_model(self) -> torch.nn.Module:
	"""Load model with weights."""
	checkpoint_path = self._get_checkpoint_path()

	if not self._download_weights(checkpoint_path):
	raise RuntimeError("Cannot proceed without model weights")

	self.logger.info("🔧 Loading model...")
	model = self._create_model()

	checkpoint = torch.load(checkpoint_path, map_location='cpu')
	state_dict = checkpoint.get('state_dict', checkpoint)

	new_state_dict = OrderedDict()
	for k, v in state_dict.items():
	name = k.replace('module.', '') if k.startswith('module.') else k
	new_state_dict[name] = v

	model.load_state_dict(new_state_dict, strict=True)
	model = model.to(self.device)
	model.eval()

	self.logger.info("✓ Model loaded successfully")
	return model

	def _get_transforms(self) -> [transforms.Compose, transforms.Compose]:
	"""
	Get image preprocessing transforms.

	These transforms normalize images to match the training distribution.
	"""
	IMAGENET_MEAN = (0.485, 0.456, 0.406)
	IMAGENET_STD = (0.229, 0.224, 0.225)
	SIMPLE_MEAN = (0.5, 0.5, 0.5)
	SIMPLE_STD = (0.5, 0.5, 0.5)

	transforms_list1 = [
	transforms.Resize(288),
	transforms.CenterCrop(size=224),
	transforms.ToTensor(),
	transforms.Normalize(mean=SIMPLE_MEAN,
	std=SIMPLE_STD)
	]
	transform_list_2 = [
	transforms.Resize(288),
	transforms.CenterCrop((288, 288)),
	transforms.ToTensor(),
	transforms.Normalize(mean=IMAGENET_MEAN,
	std=IMAGENET_STD)
	]
	return transforms.Compose(transforms_list1), transforms.Compose(transform_list_2)

	def _prepare_frame(self, frame: np.ndarray) -> Tuple[torch.Tensor, torch.Tensor]:
	"""
	Preprocess a video frame for model input.

	Args:
	frame: Numpy array in RGB format

	Returns:
	Tuple of (cropped_tensor, resized_tensor)
	"""
	# Convert numpy array to PIL Image
	img = Image.fromarray(frame)

	# Apply transforms
	img1 = self.transforms1(img).unsqueeze(0)
	img2 = self.transforms2(img).unsqueeze(0)

	return img1, img2

	def _prepare_image(self, image_path: str) -> Tuple[torch.Tensor, torch.Tensor, Image.Image]:
	"""Load and preprocess an image file."""
	img = Image.open(image_path).convert('RGB')
	img1 = self.transforms1(img).unsqueeze(0)
	img2 = self.transforms2(img).unsqueeze(0)
	return img1, img2, img

	@torch.no_grad()
	def predict_single_image(self, image_path: str) -> Dict:
	"""Run inference on a single image."""
	img_cropped, img_resized, original_img = self._prepare_image(image_path)

	img_cropped = img_cropped.to(self.device)
	img_resized = img_resized.to(self.device)

	output = self.model(img_cropped, img_resized)
	score = output.item()

	return {
	'image_path': image_path,
	'image_name': Path(image_path).name,
	'quality_score': score,
	'original_image': original_img,
	'type': 'image'
	}

	@torch.no_grad()
	def predict_video(self, video_path: str, show_progress: bool = True) -> Dict:
	"""
	Run inference on a video file.

	This extracts frames, predicts quality for each, aggregates to per-second
	averages, and returns comprehensive time-series data suitable for visualization.
	"""
	self.logger.info(f"🎬 Processing video: {Path(video_path).name}")

	# Extract frames
	frames, timestamps, metadata = self.frame_extractor.extract_frames(video_path)

	self.logger.info(f" Extracted {len(frames)} frames from {metadata['duration']:.1f}s video")

	# Process each frame
	frame_results = []
	iterator = tqdm(frames, desc="Analyzing frames", disable=not show_progress, ncols=80)

	for frame, timestamp in zip(iterator, timestamps):
	img_cropped, img_resized = self._prepare_frame(frame)

	img_cropped = img_cropped.to(self.device)
	img_resized = img_resized.to(self.device)

	output = self.model(img_cropped, img_resized)
	score = output.item()

	frame_results.append({
	'timestamp': timestamp,
	'quality_score': score,
	'frame': frame # Store for visualization if needed
	})

	# Aggregate frame results by second
	per_second_results = aggregate_scores_by_second(frame_results)

	# Calculate statistics from per-second data for better representation
	second_scores = [r['quality_score'] for r in per_second_results]

	return {
	'video_path': video_path,
	'video_name': Path(video_path).name,
	'type': 'video',
	'metadata': metadata,
	'frame_results': frame_results,
	'per_second_results': per_second_results, # NEW: Added per-second aggregation
	'num_frames_analyzed': len(frame_results),
	'num_seconds': len(per_second_results), # NEW: Number of unique seconds
	'average_quality': np.mean(second_scores),
	'min_quality': np.min(second_scores),
	'max_quality': np.max(second_scores),
	'std_quality': np.std(second_scores)
	}

	def predict(self, input_path: str, show_progress: bool = True) -> List[Dict]:
	"""
	Main prediction interface - handles images, videos, and directories.
	"""
	input_path = Path(input_path)

	# Handle single file
	if input_path.is_file():
	ext = input_path.suffix.lower()

	if ext in SUPPORTED_IMAGE_EXTENSIONS:
	return [self.predict_single_image(str(input_path))]
	elif ext in SUPPORTED_VIDEO_EXTENSIONS:
	return [self.predict_video(str(input_path), show_progress)]
	else:
	raise ValueError(f"Unsupported file extension: {ext}")

	# Handle directory
	elif input_path.is_dir():
	results = []

	# Find all supported files
	image_paths = []
	video_paths = []

	for ext in SUPPORTED_IMAGE_EXTENSIONS:
	image_paths.extend(input_path.glob(f"*{ext}"))

	for ext in SUPPORTED_VIDEO_EXTENSIONS:
	video_paths.extend(input_path.glob(f"*{ext}"))

	image_paths = sorted([str(p) for p in image_paths])
	video_paths = sorted([str(p) for p in video_paths])

	if not image_paths and not video_paths:
	raise ValueError(f"No supported files found in {input_path}")

	self.logger.info(f"📁 Found {len(image_paths)} images and {len(video_paths)} videos")

	# Process images
	if image_paths:
	for img_path in tqdm(image_paths, desc="Processing images", ncols=80):
	try:
	result = self.predict_single_image(img_path)
	results.append(result)
	except Exception as e:
	self.logger.warning(f"⚠️ Failed: {img_path}")

	# Process videos
	if video_paths:
	for vid_path in video_paths:
	try:
	result = self.predict_video(vid_path, show_progress)
	results.append(result)
	except Exception as e:
	self.logger.warning(f"⚠️ Failed: {vid_path}")

	return results
	else:
	raise ValueError(f"Input path does not exist: {input_path}")

	def visualize_video_results(self, video_result: Dict, output_dir: str = 'inference_results',
	granularity: str = 'second') -> None:
	"""
	Create time-series visualization for video quality predictions.

	This generates a line plot showing how quality varies across the video timeline.
	You can choose between frame-level and second-level granularity.

	Args:
	video_result: Dictionary containing video analysis results
	output_dir: Directory to save visualizations
	granularity: Visualization granularity - 'frame' for frame-by-frame,
	'second' for per-second averages, or 'both' for dual plot
	"""
	output_path = Path(output_dir)
	output_path.mkdir(parents=True, exist_ok=True)

	if granularity == 'both':
	# Create side-by-side comparison
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))

	# Frame-level plot (left)
	frame_results = video_result['frame_results']
	frame_timestamps = [r['timestamp'] for r in frame_results]
	frame_scores = [r['quality_score'] for r in frame_results]

	ax1.plot(frame_timestamps, frame_scores, linewidth=1.5, color='#2E86AB',
	marker='o', markersize=3, markerfacecolor='white', markeredgewidth=1,
	alpha=0.7, label='Frame-level')
	ax1.set_xlabel('Time (seconds)', fontsize=11, fontweight='bold')
	ax1.set_ylabel('Quality Score', fontsize=11, fontweight='bold')
	ax1.set_title('Frame-Level Quality', fontsize=12, fontweight='bold')
	ax1.grid(True, alpha=0.3, linestyle='--')
	ax1.set_ylim(0, 1)
	# Second-level plot (right)
	second_results = video_result['per_second_results']
	second_timestamps = [r['timestamp'] for r in second_results]
	second_scores = [r['quality_score'] for r in second_results]

	ax2.plot(second_timestamps, second_scores, linewidth=2.5, color='#A23B72',
	marker='s', markersize=6, markerfacecolor='white', markeredgewidth=1.5,
	label='Per-second average')

	# Add error bars showing variability within each second
	if second_results and 'std_score' in second_results[0]:
	stds = [r['std_score'] for r in second_results]
	ax2.fill_between(second_timestamps,
	np.array(second_scores) - np.array(stds),
	np.array(second_scores) + np.array(stds),
	alpha=0.2, color='#A23B72')

	ax2.set_xlabel('Time (seconds)', fontsize=11, fontweight='bold')
	ax2.set_ylabel('Quality Score', fontsize=11, fontweight='bold')
	ax2.set_title('Per-Second Averaged Quality', fontsize=12, fontweight='bold')
	ax2.grid(True, alpha=0.3, linestyle='--')
	ax2.set_ylim(0, 1)
	# Add overall average line to both
	avg_score = video_result['average_quality']
	ax1.axhline(y=avg_score, color='#F18F01', linestyle='--',
	linewidth=1.5, alpha=0.7, label=f'Overall avg: {avg_score:.2f}')
	ax2.axhline(y=avg_score, color='#F18F01', linestyle='--',
	linewidth=1.5, alpha=0.7, label=f'Overall avg: {avg_score:.2f}')

	ax1.legend(loc='best', framealpha=0.9)
	ax2.legend(loc='best', framealpha=0.9)

	plt.suptitle(f"Video Quality Analysis: {video_result['video_name']}, {self.task}-oriented MIQA",
	fontsize=14, fontweight='bold', y=1.02)

	suffix = 'comparison'

	else:
	# Single plot based on selected granularity
	plt.figure(figsize=(14, 6))

	if granularity == 'frame':
	frame_results = video_result['frame_results']
	timestamps = [r['timestamp'] for r in frame_results]
	scores = [r['quality_score'] for r in frame_results]
	plot_color = '#2E86AB'
	plot_label = 'Frame-level quality'
	title_suffix = '(Frame-Level)'
	suffix = 'frame'
	marker_size = 4
	else: # second
	second_results = video_result['per_second_results']
	timestamps = [r['timestamp'] for r in second_results]
	scores = [r['quality_score'] for r in second_results]
	plot_color = '#A23B72'
	plot_label = 'Per-second average'
	title_suffix = '(Per-Second Average)'
	suffix = 'second'
	marker_size = 6

	# Main quality plot
	plt.plot(timestamps, scores, linewidth=2, color=plot_color, marker='o',
	markersize=marker_size, markerfacecolor='white', markeredgewidth=1.5,
	label=plot_label)

	# Add shaded region for second-level showing variability
	if granularity == 'second' and second_results and 'std_score' in second_results[0]:
	stds = [r['std_score'] for r in second_results]
	plt.fill_between(timestamps,
	np.array(scores) - np.array(stds),
	np.array(scores) + np.array(stds),
	alpha=0.2, color=plot_color)

	# Add average line
	avg_score = video_result['average_quality']
	plt.axhline(y=avg_score, color='#F18F01', linestyle='--',
	linewidth=1.5, label=f'Average: {avg_score:.2f}')

	# Styling
	plt.xlabel('Time (seconds)', fontsize=12, fontweight='bold')
	plt.ylabel('Quality Score', fontsize=12, fontweight='bold')
	plt.title(f"Video Quality Analysis: {video_result['video_name']} {title_suffix}",
	fontsize=14, fontweight='bold', pad=20)
	plt.grid(True, alpha=0.3, linestyle='--')
	plt.legend(loc='best', framealpha=0.9)

	# Add statistics box
	if granularity == 'both':
	stats_text = (
	f"Duration: {video_result['metadata']['duration']:.1f}s\n"
	f"Frames: {video_result['num_frames_analyzed']} \| "
	f"Seconds: {video_result['num_seconds']}\n"
	f"Score Range: [{video_result['min_quality']:.2f}, {video_result['max_quality']:.2f}]\n"
	f"Std Dev: {video_result['std_quality']:.2f}"
	)
	# Add to the right subplot
	ax2.text(0.02, 0.98, stats_text, transform=ax2.transAxes,
	fontsize=9, verticalalignment='top',
	bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
	else:
	stats_text = (
	f"Duration: {video_result['metadata']['duration']:.1f}s\n"
	f"Frames Analyzed: {video_result['num_frames_analyzed']}\n"
	f"Unique Seconds: {video_result['num_seconds']}\n"
	f"Score Range: [{video_result['min_quality']:.2f}, {video_result['max_quality']:.2f}]\n"
	f"Std Dev: {video_result['std_quality']:.2f}"
	)
	plt.text(0.02, 0.98, stats_text, transform=plt.gca().transAxes,
	fontsize=12, verticalalignment='top',
	bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.6))

	plt.tight_layout()

	# Save figure
	output_file = output_path / f"{Path(video_result['video_name']).stem}_{self.metric_type}_quality_{suffix}.png"
	plt.savefig(output_file, dpi=300, bbox_inches='tight')
	plt.close()

	self.logger.info(f" Saved visualization: {output_file.name}")

	def visualize_results(self, results: List[Dict], output_dir: str = 'inference_results',
	video_granularity: str = 'second') -> None:
	"""
	Create visualizations for all results (images and videos).

	Args:
	results: List of prediction results
	output_dir: Directory to save visualizations
	video_granularity: For videos - 'frame', 'second', or 'both'
	"""
	output_path = Path(output_dir)
	output_path.mkdir(parents=True, exist_ok=True)

	self.logger.info(f"\n🎨 Creating visualizations (granularity: {video_granularity})...")

	for result in results:
	if result['type'] == 'video':
	self.visualize_video_results(result, output_dir, video_granularity)
	elif result['type'] == 'image' and result.get('quality_score') is not None:
	# Use original image visualization logic
	img = result['original_image'].copy()
	draw = ImageDraw.Draw(img)

	score = result['quality_score']
	score_text = f"Quality: {score:.3f}"

	# Simple color coding (adjust range as needed)
	# norm_score = score / 100.0
	norm_score = max(0, score)

	if norm_score < 0.5:
	r, g, b = 255, int(255 * norm_score * 2), 0
	else:
	r, g, b = int(255 * (2 - norm_score * 2)), 255, 0

	color = (r, g, b)

	box_coords = [10, 10, 260, 60]
	draw.rectangle(box_coords, fill=color)

	try:
	font = ImageFont.truetype("arial.ttf", 24)
	except:
	font = ImageFont.load_default()

	draw.text((15, 20), score_text, fill='black', font=font)

	output_file = output_path / f"annotated_{result['image_name']}"
	img.save(output_file)

	self.logger.info(f"✓ Visualizations saved to: {output_dir}/")

	def save_results(self, results: List[Dict], output_path: str = 'predictions.json') -> None:
	"""Save prediction results to JSON file."""
	# Clean results for JSON serialization
	clean_results = []
	for r in results:
	clean_r = {k: v for k, v in r.items()
	if k not in ['original_image', 'frame']}

	# For video results, remove frame data but keep scores
	if clean_r.get('type') == 'video' and 'frame_results' in clean_r:
	clean_r['frame_results'] = [
	{k: v for k, v in fr.items() if k != 'frame'}
	for fr in clean_r['frame_results']
	]

	clean_results.append(clean_r)

	output_path = Path(output_path)
	output_path.parent.mkdir(parents=True, exist_ok=True)

	with open(output_path, 'w') as f:
	json.dump({
	'metadata': {
	'task': self.task,
	'model': self.model_name,
	'metric_type': self.metric_type,
	'timestamp': datetime.now().isoformat(),
	'total_files': len(clean_results)
	},
	'predictions': clean_results
	}, f, indent=2)

	self.logger.info(f"💾 Results saved to: {output_path}")

	def print_summary(self, results: List[Dict]) -> None:
	"""Print formatted summary of prediction results."""
	self.logger.info("\n" + "=" * 80)
	self.logger.info("PREDICTION SUMMARY")
	self.logger.info("=" * 80)

	image_results = [r for r in results if r.get('type') == 'image']
	video_results = [r for r in results if r.get('type') == 'video']

	if image_results:
	valid_images = [r for r in image_results if r.get('quality_score') is not None]
	if valid_images:
	scores = [r['quality_score'] for r in valid_images]
	self.logger.info(f"\n📸 Image Analysis ({len(valid_images)} images)")
	self.logger.info(f" Average quality: {np.mean(scores):.2f}")
	self.logger.info(f" Score range: [{np.min(scores):.2f}, {np.max(scores):.2f}]")

	if video_results:
	self.logger.info(f"\n🎬 Video Analysis ({len(video_results)} videos)")
	for vr in video_results:
	self.logger.info(f"\n {vr['video_name']}:")
	self.logger.info(f" Duration: {vr['metadata']['duration']:.1f}s")
	self.logger.info(f" Frames analyzed: {vr['num_frames_analyzed']}")
	self.logger.info(f" Unique seconds: {vr['num_seconds']}")
	self.logger.info(f" Average quality (per-second): {vr['average_quality']:.2f}")
	self.logger.info(f" Quality range: [{vr['min_quality']:.2f}, {vr['max_quality']:.2f}]")
	self.logger.info(f" Variability (std): {vr['std_quality']:.2f}")

	self.logger.info("\n" + "=" * 80 + "\n")


	def main():
	"""Command-line interface for MIQA inference."""
	parser = argparse.ArgumentParser(
	description='MIQA: Machine-centric Image and Video Quality Assessment',
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	# Analyze video with per-second visualization
	python video_analytics_inference.py --input video.mp4 --task cls --visualize --viz-granularity second

	# Analyze video with both frame and second visualizations
	python video_analytics_inference.py --input video.mp4 --task cls --visualize --viz-granularity both

	# Process directory with frame-level visualization
	python video_analytics_inference.py --input ./assets/demo_video --task det --video-frames 120 --visualize --viz-granularity second --metric-type consistency
	"""
	)

	parser.add_argument('--input', type=str, required=True,
	help='Path to input image/video or directory')
	parser.add_argument('--task', type=str, required=True,
	choices=['cls', 'det', 'ins'],
	help='Task type')
	parser.add_argument('--model', type=str, default='ra_miqa',
	choices=['ra_miqa'],
	help='Model architecture (RA-MIQA only; matches Hub registry)')
	parser.add_argument('--metric-type', type=str, default='composite',
	choices=['composite', 'consistency', 'accuracy'],
	help='Training metric type')
	parser.add_argument('--device', type=str, default=None,
	choices=['cuda', 'cpu'],
	help='Device to run on')
	parser.add_argument('--video-frames', type=int, default=50,
	help='Target number of frames to sample from videos')
	parser.add_argument('--save-results', action='store_true',
	help='Save prediction results to file')
	parser.add_argument('--output-file', type=str, default='predictions.json',
	help='Output file path')
	parser.add_argument('--visualize', action='store_true',
	help='Create visualizations')
	parser.add_argument('--viz-dir', type=str, default='inference_results',
	help='Directory for visualizations')
	parser.add_argument('--viz-granularity', type=str, default='second',
	choices=['frame', 'second', 'both'],
	help='Visualization granularity for videos: frame-level, per-second, or both')
	parser.add_argument('--no-progress', action='store_true',
	help='Disable progress bar')

	args = parser.parse_args()

	try:
	miqa = MIQAInference(
	task=args.task,
	model_name=args.model,
	metric_type=args.metric_type,
	device=args.device,
	video_target_frames=args.video_frames
	)

	results = miqa.predict(args.input, show_progress=not args.no_progress)
	miqa.print_summary(results)

	if args.save_results:
	miqa.save_results(results, args.output_file)

	if args.visualize:
	miqa.visualize_results(results, args.viz_dir, video_granularity=args.viz_granularity)

	except Exception as e:
	print(f"\n❌ Error: {str(e)}", file=sys.stderr)
	import traceback
	traceback.print_exc()
	sys.exit(1)


	if __name__ == '__main__':
	main()