main
saif 2025-08-12 14:13:41 +05:00
parent bf29a7dd87
commit 4dd819d876
1 changed files with 2 additions and 18 deletions

View File

@ -1,13 +1,10 @@
import os import os
import pickle import pickle
import glob
import shutil
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager from webdriver_manager.chrome import ChromeDriverManager
import tempfile
from time import sleep from time import sleep
@ -15,7 +12,6 @@ import json
import time import time
import re import re
chrome_profile_path = "/home/ec2-user/keyword_ranking_crawler/chrome_path_copy"
with open("marketplaces.json", "r", encoding="utf-8") as f: with open("marketplaces.json", "r", encoding="utf-8") as f:
data = json.load(f) data = json.load(f)
@ -29,6 +25,7 @@ with open("cookies.json", "r", encoding="utf-8") as f:
# Or if it's a Python dict already: # Or if it's a Python dict already:
marketplaces = data["marketplaces"] marketplaces = data["marketplaces"]
chrome_profile_path = '/home/ec2-user/keyword_ranking_crawler/chrome_path'
BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking' BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking'
#BASE_PATH= 'data' #BASE_PATH= 'data'
MAX_PAGE = 10 MAX_PAGE = 10
@ -36,19 +33,6 @@ MAX_PAGE = 10
def get_driver(): def get_driver():
# Create a unique temp folder for this run
temp_profile = tempfile.mkdtemp()
# Copy profile but skip lock files
for item in os.listdir(chrome_profile_path):
s = os.path.join(chrome_profile_path, item)
d = os.path.join(temp_profile, item)
if item.startswith("Singleton"): # Skip lock files
continue
if os.path.isdir(s):
shutil.copytree(s, d, dirs_exist_ok=True)
else:
shutil.copy2(s, d)
options = Options() options = Options()
#options.add_argument("--headless") #options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled") # Removes automation flag options.add_argument("--disable-blink-features=AutomationControlled") # Removes automation flag
@ -56,8 +40,8 @@ def get_driver():
options.add_experimental_option('useAutomationExtension', False) options.add_experimental_option('useAutomationExtension', False)
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36") options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
options.add_argument("--start-maximized") options.add_argument("--start-maximized")
options.add_argument(f"user-data-dir={temp_profile}")
options.add_argument("profile-directory=Default") options.add_argument("profile-directory=Default")
options.add_argument(f"user-data-dir={chrome_profile_path}")
driver = webdriver.Chrome( service=Service(ChromeDriverManager().install()),options=options) driver = webdriver.Chrome( service=Service(ChromeDriverManager().install()),options=options)
# Remove navigator.webdriver # Remove navigator.webdriver
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {