From 0fe92f182fc5007e1eecae54950bb126d2610028 Mon Sep 17 00:00:00 2001 From: ambag12 Date: Fri, 8 May 2026 20:24:26 +0500 Subject: [PATCH] qdrant injestion x get query has been completed wait for Scrap results --- .gitignore | 4 +- .../__pycache__/db_setup.cpython-313.pyc | Bin 2327 -> 0 bytes dev_backend/__pycache__/main.cpython-313.pyc | Bin 1183 -> 0 bytes .../__pycache__/views.cpython-313.pyc | Bin 1234 -> 0 bytes dev_backend/vector_db_router/models.py | 16 ++-- dev_backend/vector_db_router/plugins.py | 90 ++++++++++++++++++ dev_backend/vector_db_router/serializers.py | 4 +- dev_backend/vector_db_router/views.py | 70 ++++++++++++-- model_export/dino_image_matching.py | 3 +- 9 files changed, 168 insertions(+), 19 deletions(-) delete mode 100644 dev_backend/__pycache__/db_setup.cpython-313.pyc delete mode 100644 dev_backend/__pycache__/main.cpython-313.pyc delete mode 100644 dev_backend/mysql_process/__pycache__/views.cpython-313.pyc create mode 100644 dev_backend/vector_db_router/plugins.py diff --git a/.gitignore b/.gitignore index efe7b25..13ffc51 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ *pyc** **pycache** *agent/** -**downloaded_images** \ No newline at end of file +**downloaded_images** +**model_export** +**cpython** \ No newline at end of file diff --git a/dev_backend/__pycache__/db_setup.cpython-313.pyc b/dev_backend/__pycache__/db_setup.cpython-313.pyc deleted file mode 100644 index cdc274ac3fe2e87e92347ad41e5ee15458c4f2af..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2327 zcma)7&2Jk;6rcU@di}Ast8RmP@6oZD7*@+fcr72sW|;Z7vh6yIAuM(y%pAu`rE#?kaX12!tjZ zYfeL~>Q}rN5(X}tI-Fmb#@d+fVAwgj#fGQBLEUll=T6b!0Ip%nV5bWsrPH%Yb;h}B z9`rV@&d~6gOX~U4+K76hL}LwnF0ZN=&XxxJwfD}d!#+NDwlwV94djRO$MfomtVF{G zfmn#8S22~R93Rzm$NAVM1{Ei5$E9M;CN7l?-PKvtW%lL$0hNOU9xCN)0Q*n|s%3ON zhY(ba%Bw;zZ=nt}f{u#Q=r)%DR1$pm&9|Sor=Y?nVSSprEer*su;T=h(NJK||M3%j zKN&I4arCzE3L58*3gcW>9GXo}x^As6mox3MZnCTOz17px^RyqLfD90(h)IKPrHbt- zH_MYCsA-^V+8$9-*-HwCT`oiWa#hRC8M)IH$E{csT1hu_k~169Vd6;!o(7i7SAfNw zF{U}#ovL-$W@ys{cD1WsqG^pyNhfUD3Xj7=d5Y%H6Q$$szP{g%FDWPI23L78(7qB& z%|}-y6i+XhpPBR4eYtH}ZhLlPd_%k^w>@{dVT0?De3bhl(2c%q2JkI+NW3Ef_#u@K z35z_J56Fvx#BfLe^%goMaQEnZ0^r|tj`E=N1|ycyI?Cwx#t0f^zTQv;RqPckZiL%x zM;1RC^4EQ;EdkPk-$)C-6!9%K-@ffj(qenRm!(BuXc$UN#N|nQ5Q5f(gA*t7?xtVpHx)}U-@1~j-{@l-qx z#pFP2C?BCH~28M|GU9o`|rN5Eu}u3 zJM$pcI&aUNd>Cn6j$&{v+)sgg68+o^Xfe#?6XIe-VmO(~9~Kt(bNQ^i*d;N1SO7iLi$ox!i8b5hwZ4tk zI_ilpHqt|xA=|SlQoS7*AR`FDHYy<&TeHRxeP(bx-SHmNo^i?Nm9I5b}! z7>~0(6HJwU!MMR^H?#@T#MY$7kAO-*8vI1a0p`UB^O*Y{tpP+0UgduHmrR-X7Zs!p>Zzl)N9f`bx>!dC>!|Bb)LBQb!qNj2`BZrv zPcF>foLz}`tSIq?OE)jw`ea$@T*r5JbS*2dt%M>AyKe5f)xR9dY{3nOhS6V2Fu~77 R|4s$D_O+zUb&z8a(LcT~FSq~z diff --git a/dev_backend/__pycache__/main.cpython-313.pyc b/dev_backend/__pycache__/main.cpython-313.pyc deleted file mode 100644 index a33d8a7309aa7f4b9d8fa18b169696504c5522fb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1183 zcmbtT&rj4q6rQ$A+udci%MT%341svqgIhJh1cN4+h|)!aTaKGF-R*#l-L}lkF0fuC zG2sdagBKGxp*J|e(LVvjL>xlUcre~DoK2kB?gAP-IfuUQd-J{5d0%EuQmF)BtN!Je z`&|U!8z-$5Z-SFo0st?82}~gmP!t6u6hR@@lAgtaa)w)|C|sy)^>4{o{BLFQT(c%D z%0uk@eElo-^O?lFVyH+p;wawWQa)iMQPOBf?F}Bwr;Id8H*i~CGcqV+bfAs~m-C%Q z7wT@}OAPn)gJOCB3`IW%jHv^_R7};34~wX`*ju>X+&UME-Dbi}>VnxmEU}7Yh(CeYHISK>o^{qvk|1d1vp!Um=qum z10TbCz7qznPqG3{dV%d&PC%evW7QoqkXW-0vVCHeJs0|fCV2`&jNQOz1qnM0EfiD< zMD;e^3Cg&x=m>Ja>Qx@CD)MN&k>ju~>y-sO>(NvrSRo3^tQ}L!A%fHHJdF{12G^5% zIq*DKChSKvUSl9YmQ%|1Q3)?#Dh8OwLYq9HGCKnF9jZ7k^6d(wie18u9o4d?T@P9o zRi>FUcIYCBS%vtmrF|@cJV*%4RZhLhm6%5{`qrutJ;dTCT=L}%7(VCZHn8 z5lnmsgGb=;A-H@5ZXAMJhhX$GNWMsYl~env?)`M{&o)UBpG(IH*8daed=EO8N7m*y hgWaLAT~6=s11ZYyb`8F{z4L6R!tx)$1R7)~{0*voErS36 diff --git a/dev_backend/mysql_process/__pycache__/views.cpython-313.pyc b/dev_backend/mysql_process/__pycache__/views.cpython-313.pyc deleted file mode 100644 index 86d7527857f0b94e8ee961703e5b6273c05f1d94..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1234 zcmY*Y&u<%55Pompde>fO?YNXQv>}+13YkJo!Bv&kw2InPVU5y~Evk?$S)1MGu(00U z%x*$maVS3|4oJ`o2X1gnB@RV!;LmWB3#=L?65`OCkvSmI1M@bHMMv8A=9`)K=FK;= zLtWPhN^Sdzdms_=hX6;C8!5vBRK6u9F=>Ms6lhuKQbRHrFhd5}$Uw$WKuPTkV^mO$ zEM!wGH*$ssnvsWmiZhJ~LkB(8l}5oRLNV3V#-ve#Qkk@r)8zapH5_XlTYY&1{iM>L7=pNz;&;=itK%TUys@&%qcHFzem7SQ zA#gVBI7-x`yK`IIjss{rYc{rVEGlZB<2Uqhd5M&-=th znR{MPz8!$)V2P?7_%T-R{*4b2Bt^*fX4JL=hbM}|W6Sj_G88abk{eSEioh(2p~9eu zI!p=!#w(0EyseYcZgnt6ghmWy=K`TVNa{wUbNx0kxLuj zS>1B^$I*P)OD08|EHCw>EW=4mFo9om5tqm09vNsXQ-4%E_5J$~_5R%Do#K^#_R2qx z^2djSqN8wXoBeY2SNikyox-Jl_R_xthUO>H3?BcC*60^nk$jE&H`+P&-7~MUA2a9J zPpn2)D$gcL+zavXAc{)Vj)mNWF35;LG$UmJL@OXG7V(cJS!WH0EZz)3{LGLSwkfR5 z^6U?Iw>Qu4#PgOIGdBQHN1{J9g`f*tX_RXoMspS&c0jYjunp;mCk&}c*?2&Pc!t1p z;$*ho#c}lbWjKRS3|F*)3l>f39+}xE^<7fmC*^(e#-F6RN6zk%SAN&0zG~jn1`3fT zzMTDZ_I`es&I}}#)_>Ed?tk*b?d|67nT7s|g&qCkfT8)IaH2n5d2pfs=C$3kOa1Ak Vox=5D1|362r04$ str: + """ + Download an image from URL and save it in data/temp/ folder. + + Args: + url (str): Image URL + filename (str, optional): Custom filename. If None, extracted from URL. + + Returns: + str: Full path to the downloaded image + """ + try: + # Get project root directory (where your main script is) + root_dir = Path(os.path.dirname(os.path.abspath(__file__))).parent + + # Create data/temp folder structure + temp_dir = root_dir / "data" / "temp" + temp_dir.mkdir(parents=True, exist_ok=True) + + # Generate filename if not provided + if not filename: + parsed_url = urlparse(url) + filename = os.path.basename(parsed_url.path) + if not filename or "." not in filename: + # Fallback filename + ext = filename.split('.')[-1] if '.' in filename else 'jpg' + filename = f"image_{hash(url) % 100000}.{ext}" + + # Ensure filename has extension + if '.' not in filename: + filename += ".jpg" + + file_path = temp_dir / filename + + # Download the image + response = requests.get(url, stream=True, timeout=30) + response.raise_for_status() + + # Save image + with open(file_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + print(f"✅ Image downloaded: {file_path}") + return str(file_path) + + except Exception as e: + print(f"❌ Failed to download image: {e}") + raise + +def read_image(image_path: str) -> Image.Image: + """ + Read an image from the given path and return a PIL Image object. + + Args: + image_path (str): Path to the image file + + Returns: + PIL.Image.Image: Loaded image + + Raises: + FileNotFoundError: If image doesn't exist + Exception: For other image loading errors + """ + try: + if not os.path.exists(image_path): + raise FileNotFoundError(f"Image not found at path: {image_path}") + + # Open the image + image = Image.open(image_path) + + # Convert to RGB (important for DINOv2 and most models) + if image.mode != "RGB": + image = image.convert("RGB") + + print(f"✅ Image loaded successfully: {image_path} | Size: {image.size}") + return image + + except FileNotFoundError as e: + print(f"❌ File not found: {e}") + raise + except Exception as e: + print(f"❌ Failed to read image: {e}") + raise \ No newline at end of file diff --git a/dev_backend/vector_db_router/serializers.py b/dev_backend/vector_db_router/serializers.py index dd43e20..7682200 100644 --- a/dev_backend/vector_db_router/serializers.py +++ b/dev_backend/vector_db_router/serializers.py @@ -10,7 +10,9 @@ class CreateCollectionSerializer(BaseModel): class QueryCollectionSerializer(BaseModel): collection_name: str - query_vector: List[float] + url: str + score_threshold: float = 0.3 # Euclidean distance — lower = more similar. 0.3 = very tight match + limit: int = 10 class UpdateCollectionSerializer(BaseModel): collection_name: str diff --git a/dev_backend/vector_db_router/views.py b/dev_backend/vector_db_router/views.py index b87275e..12ebfb7 100644 --- a/dev_backend/vector_db_router/views.py +++ b/dev_backend/vector_db_router/views.py @@ -2,6 +2,7 @@ from db_setup import get_qdrant_client from typing import Annotated from fastapi import Depends, HTTPException, APIRouter from qdrant_client import AsyncQdrantClient +from .plugins import download_image,read_image from fastapi.responses import JSONResponse from .serializers import ( CreateCollectionSerializer, @@ -9,7 +10,7 @@ from .serializers import ( UpdateCollectionSerializer, DeleteCollectionSerializer ) -from model_export.dino_image_matching import get_vectors +from model_export.dino_image_matching import get_vectors,get_embedding from .models import CollectionHandler import os app_router = APIRouter() @@ -132,16 +133,65 @@ async def query_collection_endpoint( body: QueryCollectionSerializer ): try: - handler = CollectionHandler( - collection_name=body.collection_name, - vector=body.query_vector, - vector_size=len(body.query_vector), - payload={}, - id=0 - ) - result = await handler.search(body.query_vector) - return JSONResponse({"results": str(result)}) + result = [] + if isinstance(body.url, str): + # Handle semicolon-separated URLs by taking the first one + target_url = body.url.split(';')[0].strip() if ';' in body.url else body.url + log.info(f"Querying collection {body.collection_name} with URL: {target_url}") + downloaded_image_path = download_image(target_url) + query_vector = get_embedding(downloaded_image_path) + # get_embedding already returns a flat list of 768 floats + + handler = CollectionHandler( + collection_name=body.collection_name, + vector=query_vector, + vector_size=len(query_vector), + payload={}, + id=0, + client=q + ) + search_result = await handler.search( + query_vector, + score_threshold=body.score_threshold, + limit=body.limit + ) + if search_result: + result = [ + {"id": p.id, "score": p.score, "payload": p.payload} + for p in search_result + ] + else: + result = [] # No match within threshold + + elif isinstance(body.url, list): + result = [] + for url in body.url: + downloaded_image_path = download_image(url) + query_vector = get_embedding(downloaded_image_path) + # get_embedding already returns a flat list of 768 floats + + handler = CollectionHandler( + collection_name=body.collection_name, + vector=query_vector, + vector_size=len(query_vector), + payload={}, + id=0, + client=q + ) + search_result = await handler.search( + query_vector, + score_threshold=body.score_threshold, + limit=body.limit + ) + if search_result: + result.append([ + {"id": p.id, "score": p.score, "payload": p.payload} + for p in search_result + ]) + + return JSONResponse({"results": result}) except Exception as e: + log.error(f"Query failed: {e}") raise HTTPException(status_code=500, detail=str(e)) @app_router.put("/update") diff --git a/model_export/dino_image_matching.py b/model_export/dino_image_matching.py index 54c369f..70edcc2 100644 --- a/model_export/dino_image_matching.py +++ b/model_export/dino_image_matching.py @@ -46,7 +46,8 @@ def get_embedding(image_path): # Normalize embedding (important for cosine similarity) embedding = F.normalize(embedding, p=2, dim=1) - return embedding.cpu() + # Return flat list (squeeze batch dim) + return embedding.squeeze(0).cpu().tolist() def get_vectors(image_input, item): try: