listing-radar/temu_scraper_api/service.py

83 lines
2.5 KiB
Python

import os
from typing import Any, Dict, List, Literal, Optional, Tuple
import httpx
from dotenv import load_dotenv
from .serializers import AmitTemuScrapeRequest, TemuScrapeRequest
load_dotenv()
ScraperKey = Literal["sovereigntaylor", "amit123"]
ACTORS: Dict[ScraperKey, Dict[str, str]] = {
"sovereigntaylor": {
"id": "sovereigntaylor/temu-product-scraper",
"slug": "sovereigntaylor~temu-product-scraper",
},
"amit123": {
"id": "amit123/temu-products-scraper",
"slug": "amit123~temu-products-scraper",
},
}
class TemuScraperService:
"""Runs Temu scrapers on Apify."""
def __init__(
self,
api_token: Optional[str] = None,
timeout: float = 600.0,
):
self.api_token = api_token or os.getenv("APIFY_API_TOKEN")
self.timeout = timeout
if not self.api_token:
raise ValueError("APIFY_API_TOKEN is not set in environment")
def _run_sync_url(self, scraper: ScraperKey) -> str:
slug = ACTORS[scraper]["slug"]
return (
f"https://api.apify.com/v2/acts/{slug}/run-sync-get-dataset-items"
)
async def _run_actor(
self,
scraper: ScraperKey,
run_input: Dict[str, Any],
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
params = {"token": self.api_token}
url = self._run_sync_url(scraper)
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.post(url, params=params, json=run_input)
response.raise_for_status()
items = response.json()
if not isinstance(items, list):
raise ValueError(f"Unexpected Apify response type: {type(items).__name__}")
meta = {
"scraper": scraper,
"actor_id": ACTORS[scraper]["id"],
"item_count": len(items),
}
return items, meta
async def scrape_sovereigntaylor(
self, request: TemuScrapeRequest
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
return await self._run_actor("sovereigntaylor", request.to_actor_input())
async def scrape_amit123(
self, request: AmitTemuScrapeRequest
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
return await self._run_actor("amit123", request.to_actor_input())
async def scrape(
self, request: TemuScrapeRequest
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
"""Backward-compatible alias for sovereignigtaylor scraper."""
return await self.scrape_sovereigntaylor(request)