diff --git a/scrapper.py b/scrapper.py index 6078268..4f046fb 100644 --- a/scrapper.py +++ b/scrapper.py @@ -1,13 +1,10 @@ import os import pickle -import glob -import shutil from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager -import tempfile from time import sleep @@ -15,7 +12,6 @@ import json import time import re -chrome_profile_path = "/home/ec2-user/keyword_ranking_crawler/chrome_path_copy" with open("marketplaces.json", "r", encoding="utf-8") as f: data = json.load(f) @@ -29,6 +25,7 @@ with open("cookies.json", "r", encoding="utf-8") as f: # Or if it's a Python dict already: marketplaces = data["marketplaces"] +chrome_profile_path = '/home/ec2-user/keyword_ranking_crawler/chrome_path' BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking' #BASE_PATH= 'data' MAX_PAGE = 10 @@ -36,19 +33,6 @@ MAX_PAGE = 10 def get_driver(): - # Create a unique temp folder for this run - temp_profile = tempfile.mkdtemp() - # Copy profile but skip lock files - for item in os.listdir(chrome_profile_path): - s = os.path.join(chrome_profile_path, item) - d = os.path.join(temp_profile, item) - if item.startswith("Singleton"): # Skip lock files - continue - if os.path.isdir(s): - shutil.copytree(s, d, dirs_exist_ok=True) - else: - shutil.copy2(s, d) - options = Options() #options.add_argument("--headless") options.add_argument("--disable-blink-features=AutomationControlled") # Removes automation flag @@ -56,8 +40,8 @@ def get_driver(): options.add_experimental_option('useAutomationExtension', False) options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36") options.add_argument("--start-maximized") - options.add_argument(f"user-data-dir={temp_profile}") options.add_argument("profile-directory=Default") + options.add_argument(f"user-data-dir={chrome_profile_path}") driver = webdriver.Chrome( service=Service(ChromeDriverManager().install()),options=options) # Remove navigator.webdriver driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {