add profile

main
saif 2025-08-12 13:12:14 +05:00
parent c6a2442023
commit c56d666619
2 changed files with 35 additions and 1013 deletions

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,9 @@ import pickle
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from time import sleep
import json
import time
@ -21,17 +24,31 @@ with open("cookies.json", "r", encoding="utf-8") as f:
# Or if it's a Python dict already:
marketplaces = data["marketplaces"]
BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking'
#BASE_PATH= 'data'
#BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking'
BASE_PATH= 'data'
MAX_PAGE = 10
def get_driver():
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options)
#options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled") # Removes automation flag
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
options.add_argument("--start-maximized")
options.add_argument("user-data-dir=/home/ec2-user/keyword_ranking_crawler/chrome_path")
options.add_argument("profile-directory=Default")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
# Remove navigator.webdriver
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
return driver
def save_cookies(driver, path):
@ -156,6 +173,7 @@ driver = get_driver()
for keyword in keywords:
for marketplace, details in marketplaces.items():
if marketplace == 'AMAZON_USA':
url = details['url']
ratingPrefix = details['ratingPrefix']
count =1