DamageLensAI / scripts /gradcam.py
junaid17's picture
Upload 43 files
eef8873 verified
import cv2
import numpy as np
from PIL import Image
import torch
import torch.nn.functional as F
import logging
logger = logging.getLogger(__name__)
# ------------------------------------------------------------------
# Lightweight hook manager — CPU-only, no logging, direct capture
# ------------------------------------------------------------------
class _GradCAMHook:
__slots__ = ("activation", "gradient", "fwd_handle", "bwd_handle")
def __init__(self, target_layer):
self.activation = None
self.gradient = None
self.fwd_handle = target_layer.register_forward_hook(self._fwd_hook)
self.bwd_handle = None
def _fwd_hook(self, module, inp, out):
self.activation = out
# Tensor-level hook is lighter than full backward hook or retain_grad()
self.bwd_handle = out.register_hook(self._bwd_hook)
def _bwd_hook(self, grad):
self.gradient = grad
def remove(self):
self.fwd_handle.remove()
if self.bwd_handle is not None:
self.bwd_handle.remove()
def _postprocess_cam(cam_tensor, original_img, output_path, alpha=0.5, beta=0.6):
"""
CPU post-processing shared by both ResNet and Fusion.
cam_tensor: 2D torch tensor [H, W] on CPU, already ReLU'd
"""
h, w = original_img.height, original_img.width
# Normalize on CPU (vectorized)
cam_min = cam_tensor.min()
cam_max = cam_tensor.max()
if cam_max > cam_min:
cam_tensor = (cam_tensor - cam_min) / (cam_max - cam_min)
else:
cam_tensor = torch.zeros_like(cam_tensor)
# Convert to numpy once, then resize with OpenCV (very fast on CPU)
cam_np = cam_tensor.numpy()
cam_np = cv2.resize(cam_np, (w, h), interpolation=cv2.INTER_LINEAR)
cam_np = np.uint8(255 * cam_np)
heatmap = cv2.applyColorMap(cam_np, cv2.COLORMAP_JET)
original_bgr = cv2.cvtColor(np.array(original_img), cv2.COLOR_RGB2BGR)
overlay = cv2.addWeighted(original_bgr, alpha, heatmap, beta, 0)
cv2.imwrite(output_path, overlay)
# ------------------------------------------------------------------
# Optimized ResNet Grad-CAM (CPU)
# ------------------------------------------------------------------
def get_resnet_gradcam(image_path, predictor, output_path):
logger.info("Starting ResNet Grad-CAM generation...")
model = predictor.model
model.eval()
target_layer = model.model.layer4[-1]
hook = _GradCAMHook(target_layer)
try:
original_img = Image.open(image_path).convert("RGB")
input_tensor = predictor.test_transforms(original_img).unsqueeze(0)
output = model(input_tensor)
score, pred_class_idx = output[0].max(dim=0)
pred_class_idx = pred_class_idx.item()
logger.info(f"Predicted class index: {pred_class_idx}")
score.backward()
if hook.activation is None or hook.gradient is None:
raise RuntimeError("Failed to capture activations or gradients.")
# ----- Vectorized Grad-CAM on CPU -----
acts = hook.activation[0].detach().float() # [C, H, W]
grads = hook.gradient[0].detach().float() # [C, H, W]
weights = grads.mean(dim=(1, 2), keepdim=True) # [C, 1, 1]
cam = (weights * acts).sum(dim=0) # [H, W]
cam = F.relu(cam)
_postprocess_cam(cam, original_img, output_path, alpha=0.6, beta=0.4)
logger.info(f"ResNet Grad-CAM saved to: {output_path}")
return True
except Exception as e:
logger.exception("ResNet Grad-CAM generation failed.")
raise RuntimeError(f"ResNet Grad-CAM failed: {e}") from e
finally:
hook.remove()
# ------------------------------------------------------------------
# Optimized Fusion Grad-CAM (EfficientNet + ConvNeXt) (CPU)
# ------------------------------------------------------------------
def get_fusion_gradcam(image_path, predictor, output_path):
logger.info("Starting Fusion Grad-CAM generation...")
model = predictor.model
model.eval()
# FIX: PyTorch CPU does not support FP16 convolutions well.
# If the model is HalfTensor, cast it to FP32 for this pass.
is_half = next(model.parameters()).dtype == torch.float16
if is_half:
logger.info("FP16 model detected on CPU. Converting to FP32 for compatibility.")
model = model.float()
target_layer = model.eff_features[-1]
hook = _GradCAMHook(target_layer)
try:
original_img = Image.open(image_path).convert("RGB")
# CPU-only preprocessing (FloatTensor, no .to(device), no .half())
pixel_eff = predictor.eff_normalize(original_img).unsqueeze(0)
pixel_cnx = predictor.convnext_processor(
images=original_img, return_tensors="pt"
)["pixel_values"]
output = model(pixel_eff, pixel_cnx)
score, pred_class_idx = output[0].max(dim=0)
pred_class_idx = pred_class_idx.item()
logger.info(f"Predicted class index: {pred_class_idx}")
score.backward()
if hook.activation is None or hook.gradient is None:
raise RuntimeError("Failed to capture activations or gradients.")
# ----- Vectorized Grad-CAM on CPU -----
acts = hook.activation[0].detach().float() # [C, H, W]
grads = hook.gradient[0].detach().float() # [C, H, W]
weights = grads.mean(dim=(1, 2), keepdim=True) # [C, 1, 1]
cam = (weights * acts).sum(dim=0) # [H, W]
cam = F.relu(cam)
_postprocess_cam(cam, original_img, output_path, alpha=0.5, beta=0.6)
logger.info(f"Fusion Grad-CAM saved to: {output_path}")
return True
except Exception as e:
logger.exception("Fusion Grad-CAM generation failed.")
raise RuntimeError(f"Fusion Grad-CAM failed: {e}") from e
finally:
hook.remove()