fixes
parent
bf29a7dd87
commit
4dd819d876
20
scrapper.py
20
scrapper.py
|
@ -1,13 +1,10 @@
|
|||
import os
|
||||
import pickle
|
||||
import glob
|
||||
import shutil
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
import tempfile
|
||||
|
||||
|
||||
from time import sleep
|
||||
|
@ -15,7 +12,6 @@ import json
|
|||
import time
|
||||
import re
|
||||
|
||||
chrome_profile_path = "/home/ec2-user/keyword_ranking_crawler/chrome_path_copy"
|
||||
|
||||
with open("marketplaces.json", "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
@ -29,6 +25,7 @@ with open("cookies.json", "r", encoding="utf-8") as f:
|
|||
# Or if it's a Python dict already:
|
||||
marketplaces = data["marketplaces"]
|
||||
|
||||
chrome_profile_path = '/home/ec2-user/keyword_ranking_crawler/chrome_path'
|
||||
BASE_PATH= '/mnt/AmazonReports/Amazon/keyword_ranking'
|
||||
#BASE_PATH= 'data'
|
||||
MAX_PAGE = 10
|
||||
|
@ -36,19 +33,6 @@ MAX_PAGE = 10
|
|||
|
||||
|
||||
def get_driver():
|
||||
# Create a unique temp folder for this run
|
||||
temp_profile = tempfile.mkdtemp()
|
||||
# Copy profile but skip lock files
|
||||
for item in os.listdir(chrome_profile_path):
|
||||
s = os.path.join(chrome_profile_path, item)
|
||||
d = os.path.join(temp_profile, item)
|
||||
if item.startswith("Singleton"): # Skip lock files
|
||||
continue
|
||||
if os.path.isdir(s):
|
||||
shutil.copytree(s, d, dirs_exist_ok=True)
|
||||
else:
|
||||
shutil.copy2(s, d)
|
||||
|
||||
options = Options()
|
||||
#options.add_argument("--headless")
|
||||
options.add_argument("--disable-blink-features=AutomationControlled") # Removes automation flag
|
||||
|
@ -56,8 +40,8 @@ def get_driver():
|
|||
options.add_experimental_option('useAutomationExtension', False)
|
||||
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
|
||||
options.add_argument("--start-maximized")
|
||||
options.add_argument(f"user-data-dir={temp_profile}")
|
||||
options.add_argument("profile-directory=Default")
|
||||
options.add_argument(f"user-data-dir={chrome_profile_path}")
|
||||
driver = webdriver.Chrome( service=Service(ChromeDriverManager().install()),options=options)
|
||||
# Remove navigator.webdriver
|
||||
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
||||
|
|
Loading…
Reference in New Issue