jlee-larr
/

dynaflip-base

Zero-Shot Image Classification

feature-extraction

Model card Files Files and versions

dynaflip-base / image_processing_dynaflip.py

jlee-larr's picture

Upload folder using huggingface_hub

988579a verified 5 days ago

history blame contribute delete

4.13 kB

	"""DynaFLIP image processor."""

	from typing import Dict, List, Optional, Union

	import numpy as np
	from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
	from transformers.image_transforms import (
	normalize,
	resize,
	to_channel_dimension_format,
	)
	from transformers.image_utils import (
	ChannelDimension,
	ImageInput,
	infer_channel_dimension_format,
	is_batched,
	make_list_of_images,
	to_numpy_array,
	valid_images,
	)


	IMAGENET_MEAN = [0.485, 0.456, 0.406]
	IMAGENET_STD = [0.229, 0.224, 0.225]


	class DynaFLIPImageProcessor(BaseImageProcessor):
	"""Image processor for DynaFLIP models.

	Applies resize and ImageNet normalization to input images.

	Args:
	size: Target image size {"height": H, "width": W}.
	image_mean: Per-channel mean for normalization.
	image_std: Per-channel std for normalization.
	do_resize: Whether to resize images.
	do_normalize: Whether to normalize images.
	do_rescale: Whether to rescale pixel values from [0,255] to [0,1].
	"""

	model_input_names = ["pixel_values"]

	def __init__(
	self,
	size: Optional[Dict[str, int]] = None,
	image_mean: Optional[List[float]] = None,
	image_std: Optional[List[float]] = None,
	do_resize: bool = True,
	do_normalize: bool = True,
	do_rescale: bool = True,
	rescale_factor: float = 1 / 255.0,
	**kwargs,
	):
	super().__init__(**kwargs)
	self.size = size or {"height": 224, "width": 224}
	self.image_mean = image_mean or IMAGENET_MEAN
	self.image_std = image_std or IMAGENET_STD
	self.do_resize = do_resize
	self.do_normalize = do_normalize
	self.do_rescale = do_rescale
	self.rescale_factor = rescale_factor

	def preprocess(
	self,
	images: ImageInput,
	size: Optional[Dict[str, int]] = None,
	image_mean: Optional[List[float]] = None,
	image_std: Optional[List[float]] = None,
	do_resize: Optional[bool] = None,
	do_normalize: Optional[bool] = None,
	do_rescale: Optional[bool] = None,
	return_tensors: Optional[str] = None,
	data_format: ChannelDimension = ChannelDimension.FIRST,
	**kwargs,
	) -> BatchFeature:
	"""Preprocess images for DynaFLIP.

	Args:
	images: Single image or batch of images (PIL, numpy, or tensor).
	size: Override target size.
	return_tensors: "pt" for PyTorch tensors, "np" for numpy.
	Returns:
	BatchFeature with "pixel_values".
	"""
	size = size or self.size
	image_mean = image_mean or self.image_mean
	image_std = image_std or self.image_std
	do_resize = do_resize if do_resize is not None else self.do_resize
	do_normalize = do_normalize if do_normalize is not None else self.do_normalize
	do_rescale = do_rescale if do_rescale is not None else self.do_rescale

	images = make_list_of_images(images)
	if not valid_images(images):
	raise ValueError("Invalid image input.")

	processed = []
	for image in images:
	image = to_numpy_array(image)
	input_data_format = infer_channel_dimension_format(image)

	if do_resize:
	image = resize(
	image,
	size=(size["height"], size["width"]),
	input_data_format=input_data_format,
	)

	if do_rescale:
	image = image * self.rescale_factor

	if do_normalize:
	image = normalize(
	image,
	mean=image_mean,
	std=image_std,
	input_data_format=input_data_format,
	)

	image = to_channel_dimension_format(
	image, data_format, input_channel_dim=input_data_format
	)
	processed.append(image)

	data = {"pixel_values": processed}
	return BatchFeature(data=data, tensor_type=return_tensors)