MattyWhite commited on
Commit
4247594
·
1 Parent(s): 4de593d

Create testold.txt

Browse files
Files changed (1) hide show
  1. testold.txt +184 -0
testold.txt ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.llms import OpenAI, OpenAIChat
3
+ os.system("pip install -U gradio")
4
+ import sys
5
+ import radio as gr
6
+ cmd22 = "pip install pydantic==1.*"
7
+
8
+ cmd0 = "pip -m pip install 'https://github.com/facebookresearch/detectron2.git@5aeb252b194b93dc2879b4ac34bc51a31b5aee13'"
9
+ # cmd0 = "python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'"
10
+ # cmd0 = "python -m pip install 'https://github.com/facebookresearch/detectron2.git'"
11
+
12
+ os.system(cmd0)
13
+ os.system(cmd22)
14
+
15
+ # clone and install Detic
16
+ os.system(
17
+ "git clone https://github.com/facebookresearch/Detic.git --recurse-submodules"
18
+ )
19
+ os.chdir("Detic")
20
+
21
+ # Install detectron2
22
+ import torch
23
+
24
+ # Some basic setup:
25
+ # Setup detectron2 logger
26
+ import detectron2
27
+ from detectron2.utils.logger import setup_logger
28
+
29
+ setup_logger()
30
+
31
+ # import some common libraries
32
+ import sys
33
+ import numpy as np
34
+ import os, json, cv2, random
35
+
36
+ # import some common detectron2 utilities
37
+ from detectron2 import model_zoo
38
+ from detectron2.engine import DefaultPredictor
39
+ from detectron2.config import get_cfg
40
+ from detectron2.utils.visualizer import Visualizer
41
+ from detectron2.data import MetadataCatalog, DatasetCatalog
42
+
43
+ # Detic libraries
44
+ sys.path.insert(0, "third_party/CenterNet2/projects/CenterNet2/")
45
+ sys.path.insert(0, "third_party/CenterNet2/")
46
+ from centernet.config import add_centernet_config
47
+ from detic.config import add_detic_config
48
+ from detic.modeling.utils import reset_cls_test
49
+
50
+ from PIL import Image
51
+
52
+ # Build the detector and download our pretrained weights
53
+ cfg = get_cfg()
54
+ add_centernet_config(cfg)
55
+ add_detic_config(cfg)
56
+ cfg.MODEL.DEVICE = "cpu"
57
+ cfg.merge_from_file("configs/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml")
58
+ cfg.MODEL.WEIGHTS = "https://dl.fbaipublicfiles.com/detic/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.pth"
59
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model
60
+ cfg.MODEL.ROI_BOX_HEAD.ZEROSHOT_WEIGHT_PATH = "rand"
61
+ cfg.MODEL.ROI_HEADS.ONE_CLASS_PER_PROPOSAL = (
62
+ True # For better visualization purpose. Set to False for all classes.
63
+ )
64
+ predictor = DefaultPredictor(cfg)
65
+
66
+ BUILDIN_CLASSIFIER = {
67
+ "lvis": "datasets/metadata/lvis_v1_clip_a+cname.npy",
68
+ "objects365": "datasets/metadata/o365_clip_a+cnamefix.npy",
69
+ "openimages": "datasets/metadata/oid_clip_a+cname.npy",
70
+ "coco": "datasets/metadata/coco_clip_a+cname.npy",
71
+ }
72
+
73
+ BUILDIN_METADATA_PATH = {
74
+ "lvis": "lvis_v1_val",
75
+ "objects365": "objects365_v2_val",
76
+ "openimages": "oid_val_expanded",
77
+ "coco": "coco_2017_val",
78
+ }
79
+
80
+ session_token = os.environ.get("SessionToken")
81
+
82
+
83
+ def generate_caption(object_list_str, api_key, temperature):
84
+ query = f"You are an intelligent image captioner. I will hand you the objects and their position, and you should give me a detailed description that IS BOTH SUPER CONCISE AND SHORT for the photo. In this photo we have the following objects\n{object_list_str}"
85
+
86
+ # query = f"You are an intelligent image captioner. I will hand you the objects and their position, and you should give me a detailed description for the photo. In this photo we have the following objects\n{object_list_str}"
87
+ llm = OpenAIChat(
88
+ model_name="gpt-3.5-turbo", openai_api_key=api_key, temperature=temperature
89
+ )
90
+ # not gpt-4 yet!
91
+
92
+ try:
93
+ caption = llm(query)
94
+ caption = caption.strip()
95
+ except:
96
+ caption = "Sorry, something went wrong!"
97
+
98
+ return caption
99
+
100
+
101
+ def inference(img, vocabulary, api_key, temperature):
102
+ metadata = MetadataCatalog.get(BUILDIN_METADATA_PATH[vocabulary])
103
+ classifier = BUILDIN_CLASSIFIER[vocabulary]
104
+ num_classes = len(metadata.thing_classes)
105
+ reset_cls_test(predictor.model, classifier, num_classes)
106
+
107
+ im = cv2.imread(img)
108
+
109
+ outputs = predictor(im)
110
+ v = Visualizer(im[:, :, ::-1], metadata)
111
+ out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
112
+
113
+ detected_objects = []
114
+ object_list_str = []
115
+
116
+ box_locations = outputs["instances"].pred_boxes
117
+ box_loc_screen = box_locations.tensor.cpu().numpy()
118
+
119
+ for i, box_coord in enumerate(box_loc_screen):
120
+ x0, y0, x1, y1 = box_coord
121
+ width = x1 - x0
122
+ height = y1 - y0
123
+ predicted_label = metadata.thing_classes[outputs["instances"].pred_classes[i]]
124
+ detected_objects.append(
125
+ {
126
+ "prediction": predicted_label,
127
+ "x": int(x0),
128
+ "y": int(y0),
129
+ "w": int(width),
130
+ "h": int(height),
131
+ }
132
+ )
133
+ object_list_str.append(
134
+ f"{predicted_label} - X:({int(x0)} Y: {int(y0)} Width {int(width)} Height: {int(height)})"
135
+ )
136
+
137
+ if api_key is not None:
138
+ gpt_response = generate_caption(object_list_str, api_key, temperature)
139
+ else:
140
+ gpt_response = "Please paste your OpenAI key to use"
141
+
142
+ return (
143
+ Image.fromarray(np.uint8(out.get_image())).convert("RGB"),
144
+ gpt_response,
145
+ )
146
+
147
+
148
+ with gr.Blocks() as demo:
149
+ with gr.Column():
150
+ gr.Markdown("# Image Captioning using Detic and ChatGPT with LangChain 🦜️🔗")
151
+ gr.Markdown(
152
+ "Use Detic to detect objects in an image and then use `gpt-3.5-turbo` to describe the image."
153
+ )
154
+
155
+ with gr.Row():
156
+ with gr.Column():
157
+ inp = gr.Image(label="Input Image", type="filepath")
158
+ with gr.Column():
159
+ openai_api_key_textbox = gr.Textbox(
160
+ placeholder="Paste your OpenAI API key (sk-...)",
161
+ show_label=False,
162
+ lines=1,
163
+ type="password",
164
+ )
165
+ temperature = gr.Slider(0, 1, 0.1, label="Temperature")
166
+ vocab = gr.Dropdown(
167
+ ["lvis", "objects365", "openimages", "coco"],
168
+ label="Detic Vocabulary",
169
+ value="lvis",
170
+ )
171
+
172
+ btn_detic = gr.Button("Run Detic and ChatGPT")
173
+ with gr.Column():
174
+ output_desc = gr.Textbox(label="Description Description", lines=5)
175
+ outviz = gr.Image(label="Visualization", type="pil")
176
+
177
+ btn_detic.click(
178
+ fn=inference,
179
+ inputs=[inp, vocab, openai_api_key_textbox, temperature],
180
+ outputs=[outviz, output_desc],
181
+ )
182
+
183
+
184
+ demo.launch(debug=False)