83 lines
2.5 KiB
Python
83 lines
2.5 KiB
Python
import os
|
|
from typing import Any, Dict, List, Literal, Optional, Tuple
|
|
|
|
import httpx
|
|
from dotenv import load_dotenv
|
|
|
|
from .serializers import AmitTemuScrapeRequest, TemuScrapeRequest
|
|
|
|
load_dotenv()
|
|
|
|
ScraperKey = Literal["sovereigntaylor", "amit123"]
|
|
|
|
ACTORS: Dict[ScraperKey, Dict[str, str]] = {
|
|
"sovereigntaylor": {
|
|
"id": "sovereigntaylor/temu-product-scraper",
|
|
"slug": "sovereigntaylor~temu-product-scraper",
|
|
},
|
|
"amit123": {
|
|
"id": "amit123/temu-products-scraper",
|
|
"slug": "amit123~temu-products-scraper",
|
|
},
|
|
}
|
|
|
|
|
|
class TemuScraperService:
|
|
"""Runs Temu scrapers on Apify."""
|
|
|
|
def __init__(
|
|
self,
|
|
api_token: Optional[str] = None,
|
|
timeout: float = 600.0,
|
|
):
|
|
self.api_token = api_token or os.getenv("APIFY_API_TOKEN")
|
|
self.timeout = timeout
|
|
|
|
if not self.api_token:
|
|
raise ValueError("APIFY_API_TOKEN is not set in environment")
|
|
|
|
def _run_sync_url(self, scraper: ScraperKey) -> str:
|
|
slug = ACTORS[scraper]["slug"]
|
|
return (
|
|
f"https://api.apify.com/v2/acts/{slug}/run-sync-get-dataset-items"
|
|
)
|
|
|
|
async def _run_actor(
|
|
self,
|
|
scraper: ScraperKey,
|
|
run_input: Dict[str, Any],
|
|
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
|
|
params = {"token": self.api_token}
|
|
url = self._run_sync_url(scraper)
|
|
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
response = await client.post(url, params=params, json=run_input)
|
|
response.raise_for_status()
|
|
items = response.json()
|
|
|
|
if not isinstance(items, list):
|
|
raise ValueError(f"Unexpected Apify response type: {type(items).__name__}")
|
|
|
|
meta = {
|
|
"scraper": scraper,
|
|
"actor_id": ACTORS[scraper]["id"],
|
|
"item_count": len(items),
|
|
}
|
|
return items, meta
|
|
|
|
async def scrape_sovereigntaylor(
|
|
self, request: TemuScrapeRequest
|
|
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
|
|
return await self._run_actor("sovereigntaylor", request.to_actor_input())
|
|
|
|
async def scrape_amit123(
|
|
self, request: AmitTemuScrapeRequest
|
|
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
|
|
return await self._run_actor("amit123", request.to_actor_input())
|
|
|
|
async def scrape(
|
|
self, request: TemuScrapeRequest
|
|
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
|
|
"""Backward-compatible alias for sovereignigtaylor scraper."""
|
|
return await self.scrape_sovereigntaylor(request)
|