184 lines
6.1 KiB
Python
184 lines
6.1 KiB
Python
import logging
|
|
from typing import List, Optional
|
|
|
|
import httpx
|
|
from fastapi import APIRouter, HTTPException, Query
|
|
|
|
from .serializers import (
|
|
AmitTemuScrapeRequest,
|
|
SavedProduct,
|
|
TemuScrapeRequest,
|
|
TemuScrapeResponse,
|
|
TemuSearchResponse,
|
|
TemuProduct,
|
|
)
|
|
from .service import TemuScraperService
|
|
from .storage import get_storage
|
|
|
|
app_router = APIRouter()
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def _products_from_items(items: list) -> List[TemuProduct]:
|
|
return [TemuProduct.model_validate(item) for item in items]
|
|
|
|
|
|
def _sovereigntaylor_response(
|
|
request: TemuScrapeRequest,
|
|
items: list,
|
|
meta: dict,
|
|
saved_run_id: int,
|
|
saved_at: str,
|
|
) -> TemuScrapeResponse:
|
|
products = _products_from_items(items)
|
|
return TemuScrapeResponse(
|
|
scraper=meta.get("scraper", "sovereigntaylor"),
|
|
actor_id=meta.get("actor_id"),
|
|
saved_run_id=saved_run_id,
|
|
saved_at=saved_at,
|
|
search_terms=request.search_terms,
|
|
product_urls=request.product_urls,
|
|
category=request.category,
|
|
max_results=request.max_results,
|
|
product_count=len(products),
|
|
products=products,
|
|
)
|
|
|
|
|
|
def _amit123_response(
|
|
request: AmitTemuScrapeRequest,
|
|
items: list,
|
|
meta: dict,
|
|
saved_run_id: int,
|
|
saved_at: str,
|
|
) -> TemuScrapeResponse:
|
|
products = _products_from_items(items)
|
|
return TemuScrapeResponse(
|
|
scraper=meta.get("scraper", "amit123"),
|
|
actor_id=meta.get("actor_id"),
|
|
saved_run_id=saved_run_id,
|
|
saved_at=saved_at,
|
|
search_queries=request.search_queries,
|
|
currency=request.currency,
|
|
max_results=request.max_results,
|
|
product_count=len(products),
|
|
products=products,
|
|
)
|
|
|
|
|
|
def _search_saved(
|
|
scraper: str,
|
|
q: str,
|
|
limit: int,
|
|
run_id: Optional[int],
|
|
since: Optional[str],
|
|
) -> TemuSearchResponse:
|
|
storage = get_storage()
|
|
results, total = storage.search_products(
|
|
scraper=scraper,
|
|
query=q,
|
|
limit=limit,
|
|
run_id=run_id,
|
|
since=since,
|
|
)
|
|
return TemuSearchResponse(
|
|
query=q,
|
|
scraper=scraper,
|
|
match_count=len(results),
|
|
total_saved_for_scraper=total,
|
|
run_id=run_id,
|
|
since=since,
|
|
results=[SavedProduct.model_validate(r) for r in results],
|
|
)
|
|
|
|
|
|
async def _handle_scrape(coro):
|
|
try:
|
|
return await coro
|
|
except ValueError as e:
|
|
log.error(f"Validation or config error: {e}")
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
except httpx.HTTPStatusError as e:
|
|
log.error(f"Apify returned {e.response.status_code}: {e.response.text}")
|
|
raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
|
|
except httpx.RequestError as e:
|
|
log.error(f"Network error calling Apify: {e}")
|
|
raise HTTPException(status_code=502, detail=f"Upstream request failed: {e}")
|
|
except Exception as e:
|
|
log.error(f"Unexpected error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@app_router.post("/scrape", response_model=TemuScrapeResponse)
|
|
async def scrape_and_save_sovereigntaylor(body: TemuScrapeRequest):
|
|
"""
|
|
Run Apify sovereignigtaylor scraper, save results with timestamp, and index them.
|
|
|
|
Expensive — use GET /scrape to search saved results instead of re-scraping.
|
|
"""
|
|
async def run():
|
|
service = TemuScraperService()
|
|
items, meta = await service.scrape_sovereigntaylor(body)
|
|
storage = get_storage()
|
|
run_id, saved_at, _ = storage.save_scrape(
|
|
scraper="sovereigntaylor",
|
|
actor_id=meta.get("actor_id"),
|
|
request_payload=body.model_dump(),
|
|
items=items,
|
|
)
|
|
log.info(f"Saved {len(items)} products as run_id={run_id} at {saved_at}")
|
|
return _sovereigntaylor_response(body, items, meta, run_id, saved_at)
|
|
|
|
return await _handle_scrape(run())
|
|
|
|
|
|
@app_router.get("/scrape", response_model=TemuSearchResponse)
|
|
async def search_saved_sovereigntaylor(
|
|
q: str = Query(..., description="Search saved sovereignigtaylor results (FTS index)"),
|
|
limit: int = Query(50, ge=1, le=500),
|
|
run_id: Optional[int] = Query(None, description="Filter to a specific saved scrape run"),
|
|
since: Optional[str] = Query(None, description="Only results saved at or after this ISO timestamp"),
|
|
):
|
|
"""Search locally saved scrape results. Does not call Apify."""
|
|
if not q.strip():
|
|
raise HTTPException(status_code=400, detail="Query parameter q is required")
|
|
return _search_saved("sovereigntaylor", q, limit, run_id, since)
|
|
|
|
|
|
@app_router.post("/scrape/amit123", response_model=TemuScrapeResponse)
|
|
async def scrape_and_save_amit123(body: AmitTemuScrapeRequest):
|
|
"""
|
|
Run Apify amit123 scraper, save results with timestamp, and index them.
|
|
|
|
Expensive — use GET /scrape/amit123 to search saved results.
|
|
"""
|
|
async def run():
|
|
service = TemuScraperService()
|
|
items, meta = await service.scrape_amit123(body)
|
|
storage = get_storage()
|
|
run_id, saved_at, _ = storage.save_scrape(
|
|
scraper="amit123",
|
|
actor_id=meta.get("actor_id"),
|
|
request_payload=body.model_dump(),
|
|
items=items,
|
|
)
|
|
log.info(f"Saved {len(items)} products as run_id={run_id} at {saved_at}")
|
|
return _amit123_response(body, items, meta, run_id, saved_at)
|
|
|
|
return await _handle_scrape(run())
|
|
|
|
|
|
@app_router.get("/scrape/amit123", response_model=TemuSearchResponse)
|
|
async def search_saved_amit123(
|
|
q: str = Query(..., description="Search saved amit123 results (FTS index)"),
|
|
limit: int = Query(50, ge=1, le=500),
|
|
run_id: Optional[int] = Query(None, description="Filter to a specific saved scrape run"),
|
|
since: Optional[str] = Query(None, description="Only results saved at or after this ISO timestamp"),
|
|
):
|
|
"""Search locally saved scrape results. Does not call Apify."""
|
|
if not q.strip():
|
|
raise HTTPException(status_code=400, detail="Query parameter q is required")
|
|
return _search_saved("amit123", q, limit, run_id, since)
|