import torch
import transformers
from ..registry import ModelAttribute, model_zoo
# ===============================
# Register single-image SAM
# define data gen function
def data_gen():
# Generated from following code snippet
#
# from PIL import Image
# import requests
# from transformers import Blip2Processor, Blip2Model
# import torch
# processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
# url = "http://images.cocodataset.org/val2017/000000039769.jpg"
# image = Image.open(requests.get(url, stream=True).raw)
# prompt = "Question: how many cats are there? Answer:"
# inputs = processor(images=image, text=prompt, return_tensors="pt").to(device, torch.float16)
pixel_values = torch.rand(1, 3, 224, 224, dtype=torch.float32)
input_ids = torch.tensor([[2, 45641, 35, 141, 171, 10017, 32, 89, 116, 31652, 35]], dtype=torch.int64)
attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=torch.int64)
labels = torch.tensor([[34, 56]], dtype=torch.int64)
return dict(pixel_values=pixel_values, input_ids=input_ids, attention_mask=attention_mask, labels=labels)
# define output transform function
output_transform_fn = lambda x: x
# define loss funciton
loss_fn_blip2_model = lambda x: x.loss
config = transformers.Blip2Config()
config.vision_config.patch_size = 14
config.text_config.num_hidden_layers = 1
config.qformer_config.num_hidden_layers = 1
config.vision_config.num_hidden_layers = 1
config.qformer_config.attention_probs_dropout_prob = 0
config.qformer_config.hidden_dropout_prob = 0
config.text_config.dropout = 0
# register the blip2 variants
model_zoo.register(
name="transformers_blip2",
model_fn=lambda: transformers.Blip2Model(config),
data_gen_fn=data_gen,
output_transform_fn=output_transform_fn,
loss_fn=loss_fn_blip2_model,
model_attribute=ModelAttribute(has_control_flow=True),
)
name="transformers_blip2_conditional_gerneration",
model_fn=lambda: transformers.Blip2ForConditionalGeneration(config),