ChatGPT-ImageCaptioner2

Build error

App Files Files Community

MattyWhite commited on Dec 26, 2023

Commit

4247594

1 Parent(s): 4de593d

Create testold.txt

Browse files

Files changed (1) hide show

testold.txt +184 -0

testold.txt ADDED Viewed

	@@ -0,0 +1,184 @@

+import os
+from langchain.llms import OpenAI, OpenAIChat
+os.system("pip install -U gradio")
+import sys
+import radio as gr
+cmd22 = "pip install pydantic==1.*"
+cmd0 = "pip -m pip install 'https://github.com/facebookresearch/detectron2.git@5aeb252b194b93dc2879b4ac34bc51a31b5aee13'"
+# cmd0 = "python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'"
+# cmd0 = "python -m pip install 'https://github.com/facebookresearch/detectron2.git'"
+os.system(cmd0)
+os.system(cmd22)
+# clone and install Detic
+os.system(
+    "git clone https://github.com/facebookresearch/Detic.git --recurse-submodules"
+)
+os.chdir("Detic")
+# Install detectron2
+import torch
+# Some basic setup:
+# Setup detectron2 logger
+import detectron2
+from detectron2.utils.logger import setup_logger
+setup_logger()
+# import some common libraries
+import sys
+import numpy as np
+import os, json, cv2, random
+# import some common detectron2 utilities
+from detectron2 import model_zoo
+from detectron2.engine import DefaultPredictor
+from detectron2.config import get_cfg
+from detectron2.utils.visualizer import Visualizer
+from detectron2.data import MetadataCatalog, DatasetCatalog
+# Detic libraries
+sys.path.insert(0, "third_party/CenterNet2/projects/CenterNet2/")
+sys.path.insert(0, "third_party/CenterNet2/")
+from centernet.config import add_centernet_config
+from detic.config import add_detic_config
+from detic.modeling.utils import reset_cls_test
+from PIL import Image
+# Build the detector and download our pretrained weights
+cfg = get_cfg()
+add_centernet_config(cfg)
+add_detic_config(cfg)
+cfg.MODEL.DEVICE = "cpu"
+cfg.merge_from_file("configs/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml")
+cfg.MODEL.WEIGHTS = "https://dl.fbaipublicfiles.com/detic/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.pth"
+cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
+cfg.MODEL.ROI_BOX_HEAD.ZEROSHOT_WEIGHT_PATH = "rand"
+cfg.MODEL.ROI_HEADS.ONE_CLASS_PER_PROPOSAL = (
+    True  # For better visualization purpose. Set to False for all classes.
+)
+predictor = DefaultPredictor(cfg)
+BUILDIN_CLASSIFIER = {
+    "lvis": "datasets/metadata/lvis_v1_clip_a+cname.npy",
+    "objects365": "datasets/metadata/o365_clip_a+cnamefix.npy",
+    "openimages": "datasets/metadata/oid_clip_a+cname.npy",
+    "coco": "datasets/metadata/coco_clip_a+cname.npy",
+}
+BUILDIN_METADATA_PATH = {
+    "lvis": "lvis_v1_val",
+    "objects365": "objects365_v2_val",
+    "openimages": "oid_val_expanded",
+    "coco": "coco_2017_val",
+}
+session_token = os.environ.get("SessionToken")
+def generate_caption(object_list_str, api_key, temperature):
+    query = f"You are an intelligent image captioner. I will hand you the objects and their position, and you should give me a detailed description that IS BOTH SUPER CONCISE AND SHORT for the photo. In this photo we have the following objects\n{object_list_str}"
+    # query = f"You are an intelligent image captioner. I will hand you the objects and their position, and you should give me a detailed description for the photo. In this photo we have the following objects\n{object_list_str}"
+    llm = OpenAIChat(
+        model_name="gpt-3.5-turbo", openai_api_key=api_key, temperature=temperature
+    )
+    # not gpt-4 yet!
+    try:
+        caption = llm(query)
+        caption = caption.strip()
+    except:
+        caption = "Sorry, something went wrong!"
+    return caption
+def inference(img, vocabulary, api_key, temperature):
+    metadata = MetadataCatalog.get(BUILDIN_METADATA_PATH[vocabulary])
+    classifier = BUILDIN_CLASSIFIER[vocabulary]
+    num_classes = len(metadata.thing_classes)
+    reset_cls_test(predictor.model, classifier, num_classes)
+    im = cv2.imread(img)
+    outputs = predictor(im)
+    v = Visualizer(im[:, :, ::-1], metadata)
+    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
+    detected_objects = []
+    object_list_str = []
+    box_locations = outputs["instances"].pred_boxes
+    box_loc_screen = box_locations.tensor.cpu().numpy()
+    for i, box_coord in enumerate(box_loc_screen):
+        x0, y0, x1, y1 = box_coord
+        width = x1 - x0
+        height = y1 - y0
+        predicted_label = metadata.thing_classes[outputs["instances"].pred_classes[i]]
+        detected_objects.append(
+            {
+                "prediction": predicted_label,
+                "x": int(x0),
+                "y": int(y0),
+                "w": int(width),
+                "h": int(height),
+            }
+        )
+        object_list_str.append(
+            f"{predicted_label} - X:({int(x0)} Y: {int(y0)} Width {int(width)} Height: {int(height)})"
+        )
+    if api_key is not None:
+        gpt_response = generate_caption(object_list_str, api_key, temperature)
+    else:
+        gpt_response = "Please paste your OpenAI key to use"
+    return (
+        Image.fromarray(np.uint8(out.get_image())).convert("RGB"),
+        gpt_response,
+    )
+with gr.Blocks() as demo:
+    with gr.Column():
+        gr.Markdown("# Image Captioning using Detic and ChatGPT with LangChain 🦜️🔗")
+        gr.Markdown(
+            "Use Detic to detect objects in an image and then use `gpt-3.5-turbo` to describe the image."
+        )
+    with gr.Row():
+        with gr.Column():
+            inp = gr.Image(label="Input Image", type="filepath")
+            with gr.Column():
+                openai_api_key_textbox = gr.Textbox(
+                    placeholder="Paste your OpenAI API key (sk-...)",
+                    show_label=False,
+                    lines=1,
+                    type="password",
+                )
+                temperature = gr.Slider(0, 1, 0.1, label="Temperature")
+                vocab = gr.Dropdown(
+                    ["lvis", "objects365", "openimages", "coco"],
+                    label="Detic Vocabulary",
+                    value="lvis",
+                )
+            btn_detic = gr.Button("Run Detic and ChatGPT")
+        with gr.Column():
+            output_desc = gr.Textbox(label="Description Description", lines=5)
+            outviz = gr.Image(label="Visualization", type="pil")
+    btn_detic.click(
+        fn=inference,
+        inputs=[inp, vocab, openai_api_key_textbox, temperature],
+        outputs=[outviz, output_desc],
+    )
+demo.launch(debug=False)