listing-radar/temu_scraper_api/views.py

184 lines
6.1 KiB
Python

import logging
from typing import List, Optional
import httpx
from fastapi import APIRouter, HTTPException, Query
from .serializers import (
AmitTemuScrapeRequest,
SavedProduct,
TemuScrapeRequest,
TemuScrapeResponse,
TemuSearchResponse,
TemuProduct,
)
from .service import TemuScraperService
from .storage import get_storage
app_router = APIRouter()
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
log = logging.getLogger(__name__)
def _products_from_items(items: list) -> List[TemuProduct]:
return [TemuProduct.model_validate(item) for item in items]
def _sovereigntaylor_response(
request: TemuScrapeRequest,
items: list,
meta: dict,
saved_run_id: int,
saved_at: str,
) -> TemuScrapeResponse:
products = _products_from_items(items)
return TemuScrapeResponse(
scraper=meta.get("scraper", "sovereigntaylor"),
actor_id=meta.get("actor_id"),
saved_run_id=saved_run_id,
saved_at=saved_at,
search_terms=request.search_terms,
product_urls=request.product_urls,
category=request.category,
max_results=request.max_results,
product_count=len(products),
products=products,
)
def _amit123_response(
request: AmitTemuScrapeRequest,
items: list,
meta: dict,
saved_run_id: int,
saved_at: str,
) -> TemuScrapeResponse:
products = _products_from_items(items)
return TemuScrapeResponse(
scraper=meta.get("scraper", "amit123"),
actor_id=meta.get("actor_id"),
saved_run_id=saved_run_id,
saved_at=saved_at,
search_queries=request.search_queries,
currency=request.currency,
max_results=request.max_results,
product_count=len(products),
products=products,
)
def _search_saved(
scraper: str,
q: str,
limit: int,
run_id: Optional[int],
since: Optional[str],
) -> TemuSearchResponse:
storage = get_storage()
results, total = storage.search_products(
scraper=scraper,
query=q,
limit=limit,
run_id=run_id,
since=since,
)
return TemuSearchResponse(
query=q,
scraper=scraper,
match_count=len(results),
total_saved_for_scraper=total,
run_id=run_id,
since=since,
results=[SavedProduct.model_validate(r) for r in results],
)
async def _handle_scrape(coro):
try:
return await coro
except ValueError as e:
log.error(f"Validation or config error: {e}")
raise HTTPException(status_code=400, detail=str(e))
except httpx.HTTPStatusError as e:
log.error(f"Apify returned {e.response.status_code}: {e.response.text}")
raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
except httpx.RequestError as e:
log.error(f"Network error calling Apify: {e}")
raise HTTPException(status_code=502, detail=f"Upstream request failed: {e}")
except Exception as e:
log.error(f"Unexpected error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app_router.post("/scrape", response_model=TemuScrapeResponse)
async def scrape_and_save_sovereigntaylor(body: TemuScrapeRequest):
"""
Run Apify sovereignigtaylor scraper, save results with timestamp, and index them.
Expensive — use GET /scrape to search saved results instead of re-scraping.
"""
async def run():
service = TemuScraperService()
items, meta = await service.scrape_sovereigntaylor(body)
storage = get_storage()
run_id, saved_at, _ = storage.save_scrape(
scraper="sovereigntaylor",
actor_id=meta.get("actor_id"),
request_payload=body.model_dump(),
items=items,
)
log.info(f"Saved {len(items)} products as run_id={run_id} at {saved_at}")
return _sovereigntaylor_response(body, items, meta, run_id, saved_at)
return await _handle_scrape(run())
@app_router.get("/scrape", response_model=TemuSearchResponse)
async def search_saved_sovereigntaylor(
q: str = Query(..., description="Search saved sovereignigtaylor results (FTS index)"),
limit: int = Query(50, ge=1, le=500),
run_id: Optional[int] = Query(None, description="Filter to a specific saved scrape run"),
since: Optional[str] = Query(None, description="Only results saved at or after this ISO timestamp"),
):
"""Search locally saved scrape results. Does not call Apify."""
if not q.strip():
raise HTTPException(status_code=400, detail="Query parameter q is required")
return _search_saved("sovereigntaylor", q, limit, run_id, since)
@app_router.post("/scrape/amit123", response_model=TemuScrapeResponse)
async def scrape_and_save_amit123(body: AmitTemuScrapeRequest):
"""
Run Apify amit123 scraper, save results with timestamp, and index them.
Expensive — use GET /scrape/amit123 to search saved results.
"""
async def run():
service = TemuScraperService()
items, meta = await service.scrape_amit123(body)
storage = get_storage()
run_id, saved_at, _ = storage.save_scrape(
scraper="amit123",
actor_id=meta.get("actor_id"),
request_payload=body.model_dump(),
items=items,
)
log.info(f"Saved {len(items)} products as run_id={run_id} at {saved_at}")
return _amit123_response(body, items, meta, run_id, saved_at)
return await _handle_scrape(run())
@app_router.get("/scrape/amit123", response_model=TemuSearchResponse)
async def search_saved_amit123(
q: str = Query(..., description="Search saved amit123 results (FTS index)"),
limit: int = Query(50, ge=1, le=500),
run_id: Optional[int] = Query(None, description="Filter to a specific saved scrape run"),
since: Optional[str] = Query(None, description="Only results saved at or after this ISO timestamp"),
):
"""Search locally saved scrape results. Does not call Apify."""
if not q.strip():
raise HTTPException(status_code=400, detail="Query parameter q is required")
return _search_saved("amit123", q, limit, run_id, since)