rename file
parent
19dddd7551
commit
2170cdfc68
|
@ -0,0 +1,109 @@
|
|||
import os
|
||||
import pickle
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
from time import sleep
|
||||
import json
|
||||
import time
|
||||
|
||||
|
||||
with open("marketplaces.json", "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
with open("cookies.json", "r", encoding="utf-8") as f:
|
||||
cookies_ref = json.load(f)
|
||||
|
||||
# Or if it's a Python dict already:
|
||||
marketplaces = data["marketplaces"]
|
||||
|
||||
|
||||
def get_driver():
|
||||
options = Options()
|
||||
options.add_argument("--headless")
|
||||
options.add_argument("--disable-blink-features=AutomationControlled")
|
||||
driver = webdriver.Chrome(options=options)
|
||||
return driver
|
||||
|
||||
def save_cookies(driver, path):
|
||||
with open(path, "wb") as f:
|
||||
pickle.dump(driver.get_cookies(), f)
|
||||
|
||||
|
||||
def save_ranking(rankings, file_path):
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
json.dump(rankings, f, ensure_ascii=False, indent=4)
|
||||
|
||||
def load_cookies(driver, path):
|
||||
with open(path, "rb") as f:
|
||||
cookies = pickle.load(f)
|
||||
for cookie in cookies:
|
||||
if 'sameSite' in cookie:
|
||||
cookie.pop('sameSite') # Optional fix if Chrome complains
|
||||
driver.add_cookie(cookie)
|
||||
|
||||
|
||||
def check_sponsored(item):
|
||||
try:
|
||||
# Check if any element inside contains the exact text "Sponsored"
|
||||
sponsored_labels = item.find_elements(By.XPATH, './/*[contains(text(), "Sponsored")]')
|
||||
for label in sponsored_labels:
|
||||
if label.text.strip().lower() == "sponsored":
|
||||
return 1
|
||||
except:
|
||||
return 0
|
||||
|
||||
def check_consist_utopia( title ):
|
||||
return 1 if "Utopia" in title else 0
|
||||
|
||||
|
||||
|
||||
def get_amazon_ranks(url, marketplace, keyword):
|
||||
print( '[INFO] Getting Amazon Ranks for: ', marketplace, keyword)
|
||||
url = f"https://www.{url}/s?k={keyword.replace(' ', '+')}"
|
||||
driver.get(url)
|
||||
count =1
|
||||
ranks = []
|
||||
|
||||
COOKIE_FILE = f"{cookies_ref[marketplace]['cookies_name']}";
|
||||
print(COOKIE_FILE)
|
||||
|
||||
# Load cookies if available
|
||||
if os.path.exists(COOKIE_FILE):
|
||||
load_cookies(driver, COOKIE_FILE)
|
||||
driver.get(url)
|
||||
else:
|
||||
print("No cookie file found, visiting fresh")
|
||||
driver.get(url)
|
||||
sleep(5) # Give time to solve CAPTCHA manually (if needed)
|
||||
save_cookies(driver, COOKIE_FILE)
|
||||
|
||||
sleep(3) # Wait for JS to load
|
||||
items = driver.find_elements(By.XPATH, '//div[contains(@class,"s-result-item") and @data-asin]')
|
||||
for idx, item in enumerate(items, start=1):
|
||||
asin = item.get_attribute("data-asin")
|
||||
try:
|
||||
sponsored = check_sponsored(item)
|
||||
title = item.find_element(By.XPATH, './/h2//span').text
|
||||
if title == 'Results':
|
||||
continue
|
||||
if sponsored == None :
|
||||
ranks.append({'rank' : count , 'title' : title , 'marketplace' : marketplace , 'keyword': keyword, 'sponsored' : 0, 'asin' : asin , 'is_utopia' : check_consist_utopia(title) })
|
||||
count += 1
|
||||
except:
|
||||
continue
|
||||
|
||||
file_path = f"{int(time.time() * 1000)}-{marketplace}-{keyword}.json"
|
||||
|
||||
save_ranking(ranks, file_path )
|
||||
|
||||
|
||||
driver = get_driver()
|
||||
for marketplace, details in marketplaces.items():
|
||||
url = details['url']
|
||||
get_amazon_ranks(url, marketplace, 'pillows')
|
||||
driver.quit()
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue