add profile

main
saif 2025-08-12 13:12:14 +05:00
parent c6a2442023
commit c56d666619
2 changed files with 35 additions and 1013 deletions

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,9 @@ import pickle
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from time import sleep from time import sleep
import json import json
import time import time
@ -21,17 +24,31 @@ with open("cookies.json", "r", encoding="utf-8") as f:
# Or if it's a Python dict already: # Or if it's a Python dict already:
marketplaces = data["marketplaces"] marketplaces = data["marketplaces"]
BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking' #BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking'
#BASE_PATH= 'data' BASE_PATH= 'data'
MAX_PAGE = 10 MAX_PAGE = 10
def get_driver(): def get_driver():
options = Options() options = Options()
options.add_argument("--headless") #options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("--disable-blink-features=AutomationControlled") # Removes automation flag
driver = webdriver.Chrome(options=options) options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
options.add_argument("--start-maximized")
options.add_argument("user-data-dir=/home/ec2-user/keyword_ranking_crawler/chrome_path")
options.add_argument("profile-directory=Default")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
# Remove navigator.webdriver
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
return driver return driver
def save_cookies(driver, path): def save_cookies(driver, path):
@ -156,6 +173,7 @@ driver = get_driver()
for keyword in keywords: for keyword in keywords:
for marketplace, details in marketplaces.items(): for marketplace, details in marketplaces.items():
if marketplace == 'AMAZON_USA':
url = details['url'] url = details['url']
ratingPrefix = details['ratingPrefix'] ratingPrefix = details['ratingPrefix']
count =1 count =1