"""Smoke test for Temu scrapers (POST=Apify+save, GET=search cache). Usage: python test_temu_scraper.py scrape "wireless earbuds" 10 python test_temu_scraper.py search "wireless" 20 python test_temu_scraper.py scrape "women dress" 40 amit123 python test_temu_scraper.py search "dress" 10 amit123 Requires APIFY_API_TOKEN in .env for scrape mode only. """ import asyncio import os import sys from dotenv import load_dotenv load_dotenv() from temu_scraper_api.serializers import AmitTemuScrapeRequest, TemuScrapeRequest from temu_scraper_api.service import TemuScraperService from temu_scraper_api.storage import get_storage async def scrape(query: str, max_results: int, scraper: str) -> None: if not os.getenv("APIFY_API_TOKEN"): print("[FAIL] APIFY_API_TOKEN not set in .env") sys.exit(1) storage = get_storage() storage.init_db() service = TemuScraperService() if scraper == "amit123": max_results = max(max_results, 20) request = AmitTemuScrapeRequest(search_queries=[query], max_results=max_results) print(f"[amit123] Scraping '{query}' (max_results={max_results})...\n") items, meta = await service.scrape_amit123(request) request_payload = request.model_dump() else: request = TemuScrapeRequest(search_terms=[query], max_results=max_results) print(f"[sovereigntaylor] Scraping '{query}' (max_results={max_results})...\n") items, meta = await service.scrape_sovereigntaylor(request) request_payload = request.model_dump() run_id, saved_at, count = storage.save_scrape( scraper=scraper, actor_id=meta.get("actor_id"), request_payload=request_payload, items=items, ) print(f"Actor: {meta.get('actor_id')}") print(f"Saved run_id={run_id} at {saved_at} ({count} products)\n") _print_items(items) def search(query: str, limit: int, scraper: str) -> None: storage = get_storage() storage.init_db() results, total = storage.search_products(scraper=scraper, query=query, limit=limit) print(f"[{scraper}] Search '{query}' -> {len(results)} matches ({total} total saved)\n") for i, row in enumerate(results[:10], 1): product = row["product"] title = row.get("title") or product.get("title", "N/A") price = product.get("price") or product.get("price_info.price_str", "N/A") print(f"{i}. [{row['saved_at']}] run={row['run_id']} {title}") print(f" price={price} rank={row.get('rank')}") print(f" {row.get('url')}\n") if len(results) > 10: print(f"... and {len(results) - 10} more") def _print_items(items: list) -> None: for i, item in enumerate(items[:10], 1): title = item.get("title", "N/A") price = item.get("price") or item.get("price_info.price_str", "N/A") url = item.get("url") or item.get("link_url", "") print(f"{i}. {title}") print(f" price={price}") print(f" {url}\n") if len(items) > 10: print(f"... and {len(items) - 10} more") async def main(): mode = sys.argv[1] if len(sys.argv) > 1 else "scrape" query = sys.argv[2] if len(sys.argv) > 2 else "wireless earbuds" limit = int(sys.argv[3]) if len(sys.argv) > 3 else 5 scraper = sys.argv[4] if len(sys.argv) > 4 else "sovereigntaylor" if mode == "search": search(query, limit, scraper) else: await scrape(query, limit, scraper) if __name__ == "__main__": asyncio.run(main())