175 lines
5.5 KiB
Python
175 lines
5.5 KiB
Python
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
from typing import List, Literal, Optional, Any, Dict
|
|
|
|
CurrencyCode = Literal["USD", "EUR"]
|
|
|
|
|
|
class ProxyConfiguration(BaseModel):
|
|
use_apify_proxy: bool = Field(True, serialization_alias="useApifyProxy")
|
|
apify_proxy_groups: Optional[List[str]] = Field(
|
|
default=["RESIDENTIAL"],
|
|
serialization_alias="apifyProxyGroups",
|
|
)
|
|
|
|
|
|
class TemuScrapeRequest(BaseModel):
|
|
search_terms: List[str] = Field(
|
|
default_factory=list,
|
|
description="Keywords to search on Temu",
|
|
serialization_alias="searchTerms",
|
|
)
|
|
product_urls: List[str] = Field(
|
|
default_factory=list,
|
|
description="Direct Temu product page URLs",
|
|
serialization_alias="productUrls",
|
|
)
|
|
category: str = Field(
|
|
default="",
|
|
description="Category slug (e.g. electronics, women-clothing)",
|
|
)
|
|
max_price: int = Field(
|
|
default=0,
|
|
ge=0,
|
|
description="Max price in USD; 0 means no filter",
|
|
serialization_alias="maxPrice",
|
|
)
|
|
max_results: int = Field(
|
|
default=50,
|
|
ge=0,
|
|
description="Max products to scrape; 0 for unlimited",
|
|
serialization_alias="maxResults",
|
|
)
|
|
proxy_configuration: Optional[ProxyConfiguration] = Field(
|
|
default=None,
|
|
serialization_alias="proxyConfiguration",
|
|
)
|
|
|
|
@model_validator(mode="after")
|
|
def require_scrape_target(self) -> "TemuScrapeRequest":
|
|
if not self.search_terms and not self.product_urls and not self.category:
|
|
raise ValueError(
|
|
"Provide at least one of search_terms, product_urls, or category"
|
|
)
|
|
return self
|
|
|
|
def to_actor_input(self) -> Dict[str, Any]:
|
|
payload: Dict[str, Any] = {
|
|
"searchTerms": self.search_terms,
|
|
"productUrls": self.product_urls,
|
|
"category": self.category,
|
|
"maxPrice": self.max_price,
|
|
"maxResults": self.max_results,
|
|
}
|
|
proxy = self.proxy_configuration or ProxyConfiguration()
|
|
payload["proxyConfiguration"] = proxy.model_dump(
|
|
by_alias=True, exclude_none=True
|
|
)
|
|
return payload
|
|
|
|
|
|
class AmitTemuScrapeRequest(BaseModel):
|
|
"""Input for amit123/temu-products-scraper."""
|
|
|
|
search_queries: List[str] = Field(
|
|
...,
|
|
min_length=1,
|
|
description="Search keywords on Temu",
|
|
serialization_alias="searchQueries",
|
|
)
|
|
currency: CurrencyCode = Field(
|
|
default="USD",
|
|
description="Price currency (USD or EUR)",
|
|
)
|
|
max_results: int = Field(
|
|
default=40,
|
|
ge=20,
|
|
le=200,
|
|
description="Max products per search query (20-200)",
|
|
serialization_alias="maxResults",
|
|
)
|
|
|
|
def to_actor_input(self) -> Dict[str, Any]:
|
|
return {
|
|
"searchQueries": self.search_queries,
|
|
"currency": self.currency,
|
|
"maxResults": self.max_results,
|
|
}
|
|
|
|
|
|
class TemuProductVariant(BaseModel):
|
|
sku_id: Optional[str] = Field(None, alias="skuId")
|
|
title: Optional[str] = None
|
|
price: Optional[float] = None
|
|
original_price: Optional[float] = Field(None, alias="originalPrice")
|
|
image: Optional[str] = None
|
|
available: Optional[bool] = None
|
|
attributes: Optional[Dict[str, Any]] = None
|
|
|
|
model_config = ConfigDict(populate_by_name=True, extra="allow")
|
|
|
|
|
|
class TemuProduct(BaseModel):
|
|
title: Optional[str] = None
|
|
price: Optional[float] = None
|
|
original_price: Optional[float] = Field(None, alias="originalPrice")
|
|
discount: Optional[str] = None
|
|
sold: Optional[str] = None
|
|
rating: Optional[float] = None
|
|
reviews: Optional[int] = None
|
|
category: Optional[str] = None
|
|
images: List[str] = Field(default_factory=list)
|
|
variants: List[Dict[str, Any]] = Field(default_factory=list)
|
|
store: Optional[str] = None
|
|
shipping_info: Optional[str] = Field(None, alias="shippingInfo")
|
|
url: Optional[str] = None
|
|
product_id: Optional[str] = Field(None, alias="productId")
|
|
brand: Optional[str] = None
|
|
currency: Optional[str] = None
|
|
search_term: Optional[str] = Field(None, alias="searchTerm")
|
|
scraped_at: Optional[str] = Field(None, alias="scrapedAt")
|
|
|
|
model_config = ConfigDict(populate_by_name=True, extra="allow")
|
|
|
|
|
|
class SavedProduct(BaseModel):
|
|
id: int
|
|
run_id: int
|
|
scraper: str
|
|
saved_at: str
|
|
title: Optional[str] = None
|
|
url: Optional[str] = None
|
|
product_id: Optional[str] = None
|
|
search_term: Optional[str] = None
|
|
product: Dict[str, Any] = Field(default_factory=dict)
|
|
rank: Optional[float] = None
|
|
|
|
model_config = ConfigDict(extra="allow")
|
|
|
|
|
|
class TemuScrapeResponse(BaseModel):
|
|
scraper: str = "sovereigntaylor"
|
|
actor_id: Optional[str] = None
|
|
saved_run_id: Optional[int] = None
|
|
saved_at: Optional[str] = None
|
|
search_terms: List[str] = Field(default_factory=list)
|
|
search_queries: List[str] = Field(default_factory=list)
|
|
product_urls: List[str] = Field(default_factory=list)
|
|
category: str = ""
|
|
currency: Optional[str] = None
|
|
max_results: int = 0
|
|
product_count: int = 0
|
|
run_id: Optional[str] = None
|
|
dataset_id: Optional[str] = None
|
|
dataset_url: Optional[str] = None
|
|
products: List[TemuProduct] = Field(default_factory=list)
|
|
|
|
|
|
class TemuSearchResponse(BaseModel):
|
|
query: str
|
|
scraper: str
|
|
match_count: int = 0
|
|
total_saved_for_scraper: int = 0
|
|
run_id: Optional[int] = None
|
|
since: Optional[str] = None
|
|
results: List[SavedProduct] = Field(default_factory=list)
|