import logging from typing import List, Optional import httpx from fastapi import APIRouter, HTTPException, Query from .serializers import ( AmitTemuScrapeRequest, SavedProduct, TemuScrapeRequest, TemuScrapeResponse, TemuSearchResponse, TemuProduct, ) from .service import TemuScraperService from .storage import get_storage app_router = APIRouter() logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") log = logging.getLogger(__name__) def _products_from_items(items: list) -> List[TemuProduct]: return [TemuProduct.model_validate(item) for item in items] def _sovereigntaylor_response( request: TemuScrapeRequest, items: list, meta: dict, saved_run_id: int, saved_at: str, ) -> TemuScrapeResponse: products = _products_from_items(items) return TemuScrapeResponse( scraper=meta.get("scraper", "sovereigntaylor"), actor_id=meta.get("actor_id"), saved_run_id=saved_run_id, saved_at=saved_at, search_terms=request.search_terms, product_urls=request.product_urls, category=request.category, max_results=request.max_results, product_count=len(products), products=products, ) def _amit123_response( request: AmitTemuScrapeRequest, items: list, meta: dict, saved_run_id: int, saved_at: str, ) -> TemuScrapeResponse: products = _products_from_items(items) return TemuScrapeResponse( scraper=meta.get("scraper", "amit123"), actor_id=meta.get("actor_id"), saved_run_id=saved_run_id, saved_at=saved_at, search_queries=request.search_queries, currency=request.currency, max_results=request.max_results, product_count=len(products), products=products, ) def _search_saved( scraper: str, q: str, limit: int, run_id: Optional[int], since: Optional[str], ) -> TemuSearchResponse: storage = get_storage() results, total = storage.search_products( scraper=scraper, query=q, limit=limit, run_id=run_id, since=since, ) return TemuSearchResponse( query=q, scraper=scraper, match_count=len(results), total_saved_for_scraper=total, run_id=run_id, since=since, results=[SavedProduct.model_validate(r) for r in results], ) async def _handle_scrape(coro): try: return await coro except ValueError as e: log.error(f"Validation or config error: {e}") raise HTTPException(status_code=400, detail=str(e)) except httpx.HTTPStatusError as e: log.error(f"Apify returned {e.response.status_code}: {e.response.text}") raise HTTPException(status_code=e.response.status_code, detail=e.response.text) except httpx.RequestError as e: log.error(f"Network error calling Apify: {e}") raise HTTPException(status_code=502, detail=f"Upstream request failed: {e}") except Exception as e: log.error(f"Unexpected error: {e}") raise HTTPException(status_code=500, detail=str(e)) @app_router.post("/scrape", response_model=TemuScrapeResponse) async def scrape_and_save_sovereigntaylor(body: TemuScrapeRequest): """ Run Apify sovereignigtaylor scraper, save results with timestamp, and index them. Expensive — use GET /scrape to search saved results instead of re-scraping. """ async def run(): service = TemuScraperService() items, meta = await service.scrape_sovereigntaylor(body) storage = get_storage() run_id, saved_at, _ = storage.save_scrape( scraper="sovereigntaylor", actor_id=meta.get("actor_id"), request_payload=body.model_dump(), items=items, ) log.info(f"Saved {len(items)} products as run_id={run_id} at {saved_at}") return _sovereigntaylor_response(body, items, meta, run_id, saved_at) return await _handle_scrape(run()) @app_router.get("/scrape", response_model=TemuSearchResponse) async def search_saved_sovereigntaylor( q: str = Query(..., description="Search saved sovereignigtaylor results (FTS index)"), limit: int = Query(50, ge=1, le=500), run_id: Optional[int] = Query(None, description="Filter to a specific saved scrape run"), since: Optional[str] = Query(None, description="Only results saved at or after this ISO timestamp"), ): """Search locally saved scrape results. Does not call Apify.""" if not q.strip(): raise HTTPException(status_code=400, detail="Query parameter q is required") return _search_saved("sovereigntaylor", q, limit, run_id, since) @app_router.post("/scrape/amit123", response_model=TemuScrapeResponse) async def scrape_and_save_amit123(body: AmitTemuScrapeRequest): """ Run Apify amit123 scraper, save results with timestamp, and index them. Expensive — use GET /scrape/amit123 to search saved results. """ async def run(): service = TemuScraperService() items, meta = await service.scrape_amit123(body) storage = get_storage() run_id, saved_at, _ = storage.save_scrape( scraper="amit123", actor_id=meta.get("actor_id"), request_payload=body.model_dump(), items=items, ) log.info(f"Saved {len(items)} products as run_id={run_id} at {saved_at}") return _amit123_response(body, items, meta, run_id, saved_at) return await _handle_scrape(run()) @app_router.get("/scrape/amit123", response_model=TemuSearchResponse) async def search_saved_amit123( q: str = Query(..., description="Search saved amit123 results (FTS index)"), limit: int = Query(50, ge=1, le=500), run_id: Optional[int] = Query(None, description="Filter to a specific saved scrape run"), since: Optional[str] = Query(None, description="Only results saved at or after this ISO timestamp"), ): """Search locally saved scrape results. Does not call Apify.""" if not q.strip(): raise HTTPException(status_code=400, detail="Query parameter q is required") return _search_saved("amit123", q, limit, run_id, since)