main
saif 2025-08-12 14:13:41 +05:00
parent bf29a7dd87
commit 4dd819d876
1 changed files with 2 additions and 18 deletions

View File

@ -1,13 +1,10 @@
import os
import pickle
import glob
import shutil
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import tempfile
from time import sleep
@ -15,7 +12,6 @@ import json
import time
import re
chrome_profile_path = "/home/ec2-user/keyword_ranking_crawler/chrome_path_copy"
with open("marketplaces.json", "r", encoding="utf-8") as f:
data = json.load(f)
@ -29,6 +25,7 @@ with open("cookies.json", "r", encoding="utf-8") as f:
# Or if it's a Python dict already:
marketplaces = data["marketplaces"]
chrome_profile_path = '/home/ec2-user/keyword_ranking_crawler/chrome_path'
BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking'
#BASE_PATH= 'data'
MAX_PAGE = 10
@ -36,19 +33,6 @@ MAX_PAGE = 10
def get_driver():
# Create a unique temp folder for this run
temp_profile = tempfile.mkdtemp()
# Copy profile but skip lock files
for item in os.listdir(chrome_profile_path):
s = os.path.join(chrome_profile_path, item)
d = os.path.join(temp_profile, item)
if item.startswith("Singleton"): # Skip lock files
continue
if os.path.isdir(s):
shutil.copytree(s, d, dirs_exist_ok=True)
else:
shutil.copy2(s, d)
options = Options()
#options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled") # Removes automation flag
@ -56,8 +40,8 @@ def get_driver():
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
options.add_argument("--start-maximized")
options.add_argument(f"user-data-dir={temp_profile}")
options.add_argument("profile-directory=Default")
options.add_argument(f"user-data-dir={chrome_profile_path}")
driver = webdriver.Chrome( service=Service(ChromeDriverManager().install()),options=options)
# Remove navigator.webdriver
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {