listing-radar/dino_image_matching.py

54 lines
1.2 KiB
Python

import warnings
import torch
from PIL import Image
from torchvision import transforms
import torch.nn.functional as F
# Optional dependency warnings from DINOv2 internals are non-critical.
warnings.filterwarnings("ignore", message="xFormers is not available.*", category=UserWarning)
# Load model
model = torch.hub.load(
'facebookresearch/dinov2',
'dinov2_vitb14'
)
model.eval()
# Device
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
# Image preprocessing
transform = transforms.Compose([
transforms.Resize((518, 518)), # DINOv2 recommended size
transforms.ToTensor(),
])
def get_embedding(image_path):
# Load image
image = Image.open(image_path).convert("RGB")
# Transform
tensor = transform(image).unsqueeze(0).to(device)
# Generate embedding
with torch.no_grad():
embedding = model(tensor)
# Normalize embedding (important for cosine similarity)
embedding = F.normalize(embedding, p=2, dim=1)
return embedding.cpu()
# Example
emb1 = get_embedding(r"data_images\B0B39FFJHF\03.jpg")
emb2 = get_embedding(r"data_images\B09RWY127Q\03.jpg")
# Cosine similarity
similarity = torch.nn.functional.pdist(
torch.cat([emb1, emb2])
)
print("Distance:", similarity.item())