diff --git a/scrapper.py b/scrapper.py index 933060a..32c967f 100644 --- a/scrapper.py +++ b/scrapper.py @@ -53,13 +53,37 @@ BASE_PATH= config[ACTIVE_ENV]["data_path"] MAX_PAGE = 2 - def get_driver(): options = Options() if ACTIVE_ENV == "prod": options.add_argument("--headless") options.add_argument("--disable-blink-features=AutomationControlled") + options.add_argument("--disable-infobars") + options.add_argument("--disable-extensions") + options.add_argument("--start-maximized") + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + options.add_argument("--window-size=1920,1080") + # Set a realistic user agent + options.add_argument( + "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/115.0.0.0 Safari/537.36" + ) + # Experimental options to hide automation + options.add_experimental_option("excludeSwitches", ["enable-automation"]) + options.add_experimental_option("useAutomationExtension", False) + options.add_experimental_option("prefs", { + "profile.default_content_setting_values.cookies": 2 + }) driver = webdriver.Chrome(options=options) + driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { + "source": """ + Object.defineProperty(navigator, 'webdriver', { + get: () => undefined + }); + """ + }) return driver def save_cookies(driver, path): @@ -143,7 +167,7 @@ def get_amazon_ranks(url, marketplace, ratingPrefix, keyword, page, count): sleep(1) # Give time to solve CAPTCHA manually (if needed) save_cookies(driver, COOKIE_FILE) - sleep(2) # Wait for JS to load + sleep(1) # Wait for JS to load items = driver.find_elements(By.XPATH, '//div[contains(@class,"s-result-item") and @data-asin]') for idx, item in enumerate(items, start=1): try: