yujiepan/clip-vit-tiny-random-patch14-336

This model is intended for debugging.

Usage

from transformers import CLIPProcessor, CLIPModel, CLIPConfig
from PIL import Image
import requests
import torch

model_id = "yujiepan/clip-vit-tiny-random-patch14-336"
model = CLIPModel.from_pretrained(model_id).cuda()
processor = CLIPProcessor.from_pretrained(model_id)

url = "https://assets-c4akfrf5b4d3f4b7.z01.azurefd.net/assets/2024/04/BMDataViz_661fb89f3845e.png"
image = Image.open(requests.get(url, stream=True).raw)
text = "A description of the image"
inputs = processor(text=[text], images=image, return_tensors="pt", padding=True).to("cuda")
with torch.no_grad():
    outputs = model(**inputs)
logits_per_image = outputs.logits_per_image  # shape: [batch_size, num_texts]
logits_per_text = outputs.logits_per_text  # shape: [batch_size, num_images]
probs = logits_per_image.softmax(dim=1)  # shape: [batch_size, num_texts]
print(probs)

Codes

from transformers import CLIPProcessor, CLIPModel, CLIPConfig
from PIL import Image
import requests
import torch

model_name = "openai/clip-vit-large-patch14-336"
config = CLIPConfig.from_pretrained(model_name)
config = config.to_dict()
config["projection_dim"] = 8
config["text_config"]["hidden_size"] = 8
config["text_config"]["projection_dim"] = 8
config["text_config"]["intermediate_size"] = 16
config["text_config"]["num_hidden_layers"] = 2
config["text_config"]["num_attention_heads"] = 2
config["vision_config"]["hidden_size"] = 8
config["vision_config"]["projection_dim"] = 8
config["vision_config"]["intermediate_size"] = 16
config["vision_config"]["num_hidden_layers"] = 2
config["vision_config"]["num_attention_heads"] = 2
config = CLIPConfig.from_dict(config)
model = CLIPModel(config).half().cuda()
processor = CLIPProcessor.from_pretrained(model_name)

url = "https://assets-c4akfrf5b4d3f4b7.z01.azurefd.net/assets/2024/04/BMDataViz_661fb89f3845e.png"
image = Image.open(requests.get(url, stream=True).raw)
text = "A description of the image"
inputs = processor(text=[text], images=image, return_tensors="pt", padding=True).to("cuda")
with torch.no_grad():
    outputs = model(**inputs)
logits_per_image = outputs.logits_per_image  # shape: [batch_size, num_texts]
logits_per_text = outputs.logits_per_text  # shape: [batch_size, num_images]
probs = logits_per_image.softmax(dim=1)  # shape: [batch_size, num_texts]
print(probs)

model.push_to_hub("yujiepan/clip-vit-tiny-random-patch14-336")
processor.push_to_hub("yujiepan/clip-vit-tiny-random-patch14-336")
Downloads last month
11,320
Safetensors
Model size
408k params
Tensor type
FP16
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Collection including yujiepan/clip-vit-tiny-random-patch14-336