add profile

2025-08-12 13:12:14 +05:00 · 2025-08-12 13:12:14 +05:00 · c56d666619
parent c6a2442023
commit c56d666619
2 changed files with 35 additions and 1013 deletions
--- a/keywords.json
+++ b/keywords.json
--- a/scrapper.py
+++ b/scrapper.py
@ -3,6 +3,9 @@ import pickle
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from webdriver_manager.chrome import ChromeDriverManager
+
 from time import sleep
 import json
 import time
@ -21,17 +24,31 @@ with open("cookies.json", "r", encoding="utf-8") as f:
 # Or if it's a Python dict already:
 marketplaces = data["marketplaces"]

-BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking'
-#BASE_PATH= 'data'
+#BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking'
+BASE_PATH= 'data'
 MAX_PAGE = 10



 def get_driver():
    options = Options()
-    options.add_argument("--headless")
-    options.add_argument("--disable-blink-features=AutomationControlled")
-    driver = webdriver.Chrome(options=options)
+    #options.add_argument("--headless")
+    options.add_argument("--disable-blink-features=AutomationControlled")  # Removes automation flag
+    options.add_experimental_option("excludeSwitches", ["enable-automation"])
+    options.add_experimental_option('useAutomationExtension', False)
+    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
+    options.add_argument("--start-maximized")
+    options.add_argument("user-data-dir=/home/ec2-user/keyword_ranking_crawler/chrome_path")
+    options.add_argument("profile-directory=Default")
+    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
+    # Remove navigator.webdriver
+    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
+    "source": """
+        Object.defineProperty(navigator, 'webdriver', {
+            get: () => undefined
+        })
+    """
+})
    return driver

 def save_cookies(driver, path):
@ -156,6 +173,7 @@ driver = get_driver()

 for keyword in keywords:
    for marketplace, details in marketplaces.items():
+        if marketplace == 'AMAZON_USA':
            url = details['url']
            ratingPrefix = details['ratingPrefix']
            count =1