dynaflip-base / image_processing_dynaflip.py
jlee-larr's picture
Upload folder using huggingface_hub
988579a verified
"""DynaFLIP image processor."""
from typing import Dict, List, Optional, Union
import numpy as np
from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
from transformers.image_transforms import (
normalize,
resize,
to_channel_dimension_format,
)
from transformers.image_utils import (
ChannelDimension,
ImageInput,
infer_channel_dimension_format,
is_batched,
make_list_of_images,
to_numpy_array,
valid_images,
)
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
class DynaFLIPImageProcessor(BaseImageProcessor):
"""Image processor for DynaFLIP models.
Applies resize and ImageNet normalization to input images.
Args:
size: Target image size {"height": H, "width": W}.
image_mean: Per-channel mean for normalization.
image_std: Per-channel std for normalization.
do_resize: Whether to resize images.
do_normalize: Whether to normalize images.
do_rescale: Whether to rescale pixel values from [0,255] to [0,1].
"""
model_input_names = ["pixel_values"]
def __init__(
self,
size: Optional[Dict[str, int]] = None,
image_mean: Optional[List[float]] = None,
image_std: Optional[List[float]] = None,
do_resize: bool = True,
do_normalize: bool = True,
do_rescale: bool = True,
rescale_factor: float = 1 / 255.0,
**kwargs,
):
super().__init__(**kwargs)
self.size = size or {"height": 224, "width": 224}
self.image_mean = image_mean or IMAGENET_MEAN
self.image_std = image_std or IMAGENET_STD
self.do_resize = do_resize
self.do_normalize = do_normalize
self.do_rescale = do_rescale
self.rescale_factor = rescale_factor
def preprocess(
self,
images: ImageInput,
size: Optional[Dict[str, int]] = None,
image_mean: Optional[List[float]] = None,
image_std: Optional[List[float]] = None,
do_resize: Optional[bool] = None,
do_normalize: Optional[bool] = None,
do_rescale: Optional[bool] = None,
return_tensors: Optional[str] = None,
data_format: ChannelDimension = ChannelDimension.FIRST,
**kwargs,
) -> BatchFeature:
"""Preprocess images for DynaFLIP.
Args:
images: Single image or batch of images (PIL, numpy, or tensor).
size: Override target size.
return_tensors: "pt" for PyTorch tensors, "np" for numpy.
Returns:
BatchFeature with "pixel_values".
"""
size = size or self.size
image_mean = image_mean or self.image_mean
image_std = image_std or self.image_std
do_resize = do_resize if do_resize is not None else self.do_resize
do_normalize = do_normalize if do_normalize is not None else self.do_normalize
do_rescale = do_rescale if do_rescale is not None else self.do_rescale
images = make_list_of_images(images)
if not valid_images(images):
raise ValueError("Invalid image input.")
processed = []
for image in images:
image = to_numpy_array(image)
input_data_format = infer_channel_dimension_format(image)
if do_resize:
image = resize(
image,
size=(size["height"], size["width"]),
input_data_format=input_data_format,
)
if do_rescale:
image = image * self.rescale_factor
if do_normalize:
image = normalize(
image,
mean=image_mean,
std=image_std,
input_data_format=input_data_format,
)
image = to_channel_dimension_format(
image, data_format, input_channel_dim=input_data_format
)
processed.append(image)
data = {"pixel_values": processed}
return BatchFeature(data=data, tensor_type=return_tensors)