54 lines
1.2 KiB
Python
54 lines
1.2 KiB
Python
import warnings
|
|
|
|
import torch
|
|
from PIL import Image
|
|
from torchvision import transforms
|
|
import torch.nn.functional as F
|
|
|
|
# Optional dependency warnings from DINOv2 internals are non-critical.
|
|
warnings.filterwarnings("ignore", message="xFormers is not available.*", category=UserWarning)
|
|
|
|
# Load model
|
|
model = torch.hub.load(
|
|
'facebookresearch/dinov2',
|
|
'dinov2_vitb14'
|
|
)
|
|
|
|
model.eval()
|
|
|
|
# Device
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
model = model.to(device)
|
|
|
|
# Image preprocessing
|
|
transform = transforms.Compose([
|
|
transforms.Resize((518, 518)), # DINOv2 recommended size
|
|
transforms.ToTensor(),
|
|
])
|
|
|
|
def get_embedding(image_path):
|
|
# Load image
|
|
image = Image.open(image_path).convert("RGB")
|
|
|
|
# Transform
|
|
tensor = transform(image).unsqueeze(0).to(device)
|
|
|
|
# Generate embedding
|
|
with torch.no_grad():
|
|
embedding = model(tensor)
|
|
|
|
# Normalize embedding (important for cosine similarity)
|
|
embedding = F.normalize(embedding, p=2, dim=1)
|
|
|
|
return embedding.cpu()
|
|
|
|
# Example
|
|
emb1 = get_embedding(r"data_images\B0B39FFJHF\03.jpg")
|
|
emb2 = get_embedding(r"data_images\B09RWY127Q\03.jpg")
|
|
|
|
# Cosine similarity
|
|
similarity = torch.nn.functional.pdist(
|
|
torch.cat([emb1, emb2])
|
|
)
|
|
|
|
print("Distance:", similarity.item()) |