added selenium config

main
saif 2025-08-19 11:18:05 +05:00
parent beddc579c4
commit 65eae04ac0
1 changed files with 26 additions and 2 deletions

View File

@ -53,13 +53,37 @@ BASE_PATH= config[ACTIVE_ENV]["data_path"]
MAX_PAGE = 2 MAX_PAGE = 2
def get_driver(): def get_driver():
options = Options() options = Options()
if ACTIVE_ENV == "prod": if ACTIVE_ENV == "prod":
options.add_argument("--headless") options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-infobars")
options.add_argument("--disable-extensions")
options.add_argument("--start-maximized")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--window-size=1920,1080")
# Set a realistic user agent
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/115.0.0.0 Safari/537.36"
)
# Experimental options to hide automation
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
options.add_experimental_option("prefs", {
"profile.default_content_setting_values.cookies": 2
})
driver = webdriver.Chrome(options=options) driver = webdriver.Chrome(options=options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
});
"""
})
return driver return driver
def save_cookies(driver, path): def save_cookies(driver, path):
@ -143,7 +167,7 @@ def get_amazon_ranks(url, marketplace, ratingPrefix, keyword, page, count):
sleep(1) # Give time to solve CAPTCHA manually (if needed) sleep(1) # Give time to solve CAPTCHA manually (if needed)
save_cookies(driver, COOKIE_FILE) save_cookies(driver, COOKIE_FILE)
sleep(2) # Wait for JS to load sleep(1) # Wait for JS to load
items = driver.find_elements(By.XPATH, '//div[contains(@class,"s-result-item") and @data-asin]') items = driver.find_elements(By.XPATH, '//div[contains(@class,"s-result-item") and @data-asin]')
for idx, item in enumerate(items, start=1): for idx, item in enumerate(items, start=1):
try: try: