remove locks
parent
03adb32bf0
commit
bf29a7dd87
24
scrapper.py
24
scrapper.py
|
@ -7,6 +7,8 @@ from selenium.webdriver.chrome.options import Options
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.chrome.service import Service
|
from selenium.webdriver.chrome.service import Service
|
||||||
from webdriver_manager.chrome import ChromeDriverManager
|
from webdriver_manager.chrome import ChromeDriverManager
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
from time import sleep
|
from time import sleep
|
||||||
import json
|
import json
|
||||||
|
@ -15,13 +17,6 @@ import re
|
||||||
|
|
||||||
chrome_profile_path = "/home/ec2-user/keyword_ranking_crawler/chrome_path_copy"
|
chrome_profile_path = "/home/ec2-user/keyword_ranking_crawler/chrome_path_copy"
|
||||||
|
|
||||||
# Remove Chrome's lock files
|
|
||||||
for lock_file in glob.glob(os.path.join(chrome_profile_path, "Singleton*")):
|
|
||||||
try:
|
|
||||||
os.remove(lock_file)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
with open("marketplaces.json", "r", encoding="utf-8") as f:
|
with open("marketplaces.json", "r", encoding="utf-8") as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
|
@ -41,6 +36,19 @@ MAX_PAGE = 10
|
||||||
|
|
||||||
|
|
||||||
def get_driver():
|
def get_driver():
|
||||||
|
# Create a unique temp folder for this run
|
||||||
|
temp_profile = tempfile.mkdtemp()
|
||||||
|
# Copy profile but skip lock files
|
||||||
|
for item in os.listdir(chrome_profile_path):
|
||||||
|
s = os.path.join(chrome_profile_path, item)
|
||||||
|
d = os.path.join(temp_profile, item)
|
||||||
|
if item.startswith("Singleton"): # Skip lock files
|
||||||
|
continue
|
||||||
|
if os.path.isdir(s):
|
||||||
|
shutil.copytree(s, d, dirs_exist_ok=True)
|
||||||
|
else:
|
||||||
|
shutil.copy2(s, d)
|
||||||
|
|
||||||
options = Options()
|
options = Options()
|
||||||
#options.add_argument("--headless")
|
#options.add_argument("--headless")
|
||||||
options.add_argument("--disable-blink-features=AutomationControlled") # Removes automation flag
|
options.add_argument("--disable-blink-features=AutomationControlled") # Removes automation flag
|
||||||
|
@ -48,7 +56,7 @@ def get_driver():
|
||||||
options.add_experimental_option('useAutomationExtension', False)
|
options.add_experimental_option('useAutomationExtension', False)
|
||||||
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
|
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
|
||||||
options.add_argument("--start-maximized")
|
options.add_argument("--start-maximized")
|
||||||
options.add_argument(f"user-data-dir={chrome_profile_path}")
|
options.add_argument(f"user-data-dir={temp_profile}")
|
||||||
options.add_argument("profile-directory=Default")
|
options.add_argument("profile-directory=Default")
|
||||||
driver = webdriver.Chrome( service=Service(ChromeDriverManager().install()),options=options)
|
driver = webdriver.Chrome( service=Service(ChromeDriverManager().install()),options=options)
|
||||||
# Remove navigator.webdriver
|
# Remove navigator.webdriver
|
||||||
|
|
Loading…
Reference in New Issue