initial commit

main
saif 2025-08-04 17:57:25 +05:00
commit 19dddd7551
19 changed files with 392 additions and 0 deletions

58
cookies.json Normal file
View File

@ -0,0 +1,58 @@
{
"AMAZON_USA": {
"marketplace": "AMAZON_USA",
"cookies_name": "cookies/amazon_us_cookies.pkl"
},
"AMAZON_CA": {
"marketplace": "AMAZON_CA",
"cookies_name":"cookies/amazon_ca_cookies.pkl"
},
"AMAZON_SE": {
"marketplace": "AMAZON_SE",
"cookies_name": "cookies/amazon_se_cookies.pkl"
},
"AMAZON_ES": {
"marketplace": "AMAZON_ES",
"cookies_name": "cookies/amazon_es_cookies.pkl"
},
"AMAZON_FR": {
"marketplace": "AMAZON_FR",
"cookies_name": "cookies/amazon_fr_cookies.pkl"
},
"AMAZON_IT": {
"marketplace": "AMAZON_IT",
"cookies_name": "cookies/amazon_it_cookies.pkl"
},
"AMAZON_JP": {
"marketplace": "AMAZON_JP",
"cookies_name": "cookies/amazon_jb_cookies.pkl"
},
"AMAZON_UK": {
"marketplace": "AMAZON_UK",
"cookies_name": "cookies/amazon_uk_cookies.pkl"
},
"AMAZON_DE": {
"marketplace": "AMAZON_DE",
"cookies_name": "cookies/amazon_de_cookies.pkl"
},
"AMAZON_MX": {
"marketplace": "AMAZON_MX",
"cookies_name": "cookies/amazon_mx_cookies.pkl"
},
"AMAZON_AU": {
"marketplace": "AMAZON_AU",
"cookies_name": "cookies/amazon_au_cookies.pkl"
},
"AMAZON_TR": {
"marketplace": "AMAZON_TR",
"cookies_name": "cookies/amazon_tr_cookies.pkl"
},
"AMAZON_PL": {
"marketplace": "AMAZON_PL",
"cookies_name": "cookies/amazon_pl_cookies.pkl"
},
"AMAZON_NL": {
"marketplace": "AMAZON_NL",
"cookies_name": "cookies/amazon_nl_cookies.pkl"
}
}

Binary file not shown.

View File

@ -0,0 +1 @@
<EFBFBD>]<5D>.

View File

@ -0,0 +1 @@
<EFBFBD>]<5D>.

View File

@ -0,0 +1 @@
<EFBFBD>]<5D>.

View File

@ -0,0 +1 @@
<EFBFBD>]<5D>.

View File

@ -0,0 +1 @@
<EFBFBD>]<5D>.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
<EFBFBD>]<5D>.

View File

@ -0,0 +1 @@
<EFBFBD>]<5D>.

View File

@ -0,0 +1 @@
<EFBFBD>]<5D>.

View File

@ -0,0 +1 @@
<EFBFBD>]<5D>.

View File

@ -0,0 +1 @@
<EFBFBD>]<5D>.

145
marketplaces.json Normal file
View File

@ -0,0 +1,145 @@
{
"marketplaces": {
"AMAZON_USA": {
"url": "amazon.com",
"datePrefix": " on ",
"ratingPrefix": " out ",
"marketplaceId": "ATVPDKIKX0DER",
"sellerId": "A3AQP8TDYVYCGL",
"monsSelDirMcid": "amzn1.merchant.d.ACNTLSC3KHN32CA2ZGAMI6CIQOYA",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"spApiRegion": "na"
},
"AMAZON_CA": {
"url": "amazon.ca",
"datePrefix": " on ",
"ratingPrefix": " out ",
"marketplaceId": "A2EUQ1WTGCTBG2",
"sellerId": "A3AQP8TDYVYCGL",
"monsSelDirMcid": "amzn1.merchant.d.ACNTLSC3KHN32CA2ZGAMI6CIQOYA",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : false
},
"AMAZON_ES": {
"url": "amazon.es",
"datePrefix": " el ",
"ratingPrefix": " out ",
"marketplaceId": "A1RKKUPIHCS9HS",
"sellerId": "A2PQ31EAG2KKDB",
"monsSelDirMcid": "amzn1.merchant.d.ABJPA72DQNTVVLRKE5THKMTM2S7Q",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : true
},
"AMAZON_MX": {
"url": "amazon.com.mx",
"datePrefix": " on ",
"ratingPrefix": " out ",
"marketplaceId": "A1AM78C64UM0Y8",
"sellerId": "A3AQP8TDYVYCGL",
"monsSelDirMcid": "amzn1.merchant.d.ACNTLSC3KHN32CA2ZGAMI6CIQOYA",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : true
},
"AMAZON_DE": {
"url": "amazon.de",
"datePrefix": " on ",
"ratingPrefix": " out ",
"marketplaceId": "A1PA6795UKMFR9",
"sellerId": "A2PQ31EAG2KKDB",
"monsSelDirMcid": "amzn1.merchant.d.ABJPA72DQNTVVLRKE5THKMTM2S7Q",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : true
},
"AMAZON_UK": {
"url": "amazon.co.uk",
"datePrefix": " on ",
"ratingPrefix": " out ",
"marketplaceId": "A1F83G8C2ARO7P",
"sellerId": "A2PQ31EAG2KKDB",
"monsSelDirMcid": "amzn1.merchant.d.ABJPA72DQNTVVLRKE5THKMTM2S7Q",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"spApiRegion": "eu"
},
"AMAZON_FR": {
"url": "amazon.fr",
"datePrefix": " le ",
"ratingPrefix": " sur ",
"marketplaceId": "A13V1IB3VIYZZH",
"sellerId": "A2PQ31EAG2KKDB",
"monsSelDirMcid": "amzn1.merchant.d.ABJPA72DQNTVVLRKE5THKMTM2S7Q",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : true
},
"AMAZON_IT": {
"url": "amazon.it",
"datePrefix": " il ",
"ratingPrefix": " out ",
"marketplaceId": "APJ6JRA9NG5V4",
"sellerId": "A2PQ31EAG2KKDB",
"monsSelDirMcid": "amzn1.merchant.d.ABJPA72DQNTVVLRKE5THKMTM2S7Q",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : true
},
"AMAZON_JP": {
"url": "amazon.co.jp",
"datePrefix": " on ",
"ratingPrefix": " out ",
"marketplaceId": "A1VC38T7YXB528",
"sellerId": "ASXGWNT2IP97D",
"monsSelDirMcid": "amzn1.merchant.d.ABJPA72DQNTVVLRKE5THKMTM2S7Q",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : true
},
"AMAZON_AU": {
"url": "amazon.com.au",
"datePrefix": " on ",
"ratingPrefix": " out ",
"marketplaceId": "A39IBJ37TRP1C6",
"sellerId": "A10ATVJQO4YLOJ",
"monsSelDirMcid": "amzn1.merchant.d.AC2O7FOGAVY6N2ZGFWJDEKXDAUAA",
"monsSelDirPaid": "amzn1.pa.d.ACBRNFLIYBKMXXHCO4MLCNV2OV3A",
"existsInEurope" : false
},
"AMAZON_NL": {
"url": "amazon.nl",
"datePrefix": " op ",
"ratingPrefix": " van ",
"marketplaceId": "A1805IZSGTT6HS",
"sellerId": "A2PQ31EAG2KKDB",
"monsSelDirMcid": "amzn1.merchant.d.ABJPA72DQNTVVLRKE5THKMTM2S7Q",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : true
},
"AMAZON_SE": {
"url": "amazon.se",
"datePrefix": " den ",
"ratingPrefix": " out ",
"marketplaceId": "A2NODRKZP88ZB9",
"sellerId": "A2PQ31EAG2KKDB",
"monsSelDirMcid": "amzn1.merchant.d.ABJPA72DQNTVVLRKE5THKMTM2S7Q",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : true
},
"AMAZON_PL": {
"url": "amazon.pl",
"datePrefix": " dnia ",
"ratingPrefix": " out ",
"marketplaceId": "A1C3SOZRARQ6R3",
"sellerId": "A2PQ31EAG2KKDB",
"monsSelDirMcid": "amzn1.merchant.d.ABJPA72DQNTVVLRKE5THKMTM2S7Q",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : true
},
"AMAZON_TR": {
"url": "amazon.com.tr",
"datePrefix": " ",
"dateSuffix": " tarihinde değerlendirildi",
"ratingPrefix": " out ",
"marketplaceId": "A33AVAJ2PDY3EV",
"sellerId": "A2PQ31EAG2KKDB",
"monsSelDirMcid": "amzn1.merchant.d.ABJPA72DQNTVVLRKE5THKMTM2S7Q",
"monsSelDirPaid": "amzn1.pa.d.ADY3WQHX65LDGRB7ZIY7L3H67AMA",
"existsInEurope" : true
}
}
}

109
scapper.py Normal file
View File

@ -0,0 +1,109 @@
import os
import pickle
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from time import sleep
import json
import time
with open("marketplaces.json", "r", encoding="utf-8") as f:
data = json.load(f)
with open("cookies.json", "r", encoding="utf-8") as f:
cookies_ref = json.load(f)
# Or if it's a Python dict already:
marketplaces = data["marketplaces"]
def get_driver():
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options)
return driver
def save_cookies(driver, path):
with open(path, "wb") as f:
pickle.dump(driver.get_cookies(), f)
def save_ranking(rankings, file_path):
with open(file_path, "w", encoding="utf-8") as f:
json.dump(rankings, f, ensure_ascii=False, indent=4)
def load_cookies(driver, path):
with open(path, "rb") as f:
cookies = pickle.load(f)
for cookie in cookies:
if 'sameSite' in cookie:
cookie.pop('sameSite') # Optional fix if Chrome complains
driver.add_cookie(cookie)
def check_sponsored(item):
try:
# Check if any element inside contains the exact text "Sponsored"
sponsored_labels = item.find_elements(By.XPATH, './/*[contains(text(), "Sponsored")]')
for label in sponsored_labels:
if label.text.strip().lower() == "sponsored":
return 1
except:
return 0
def check_consist_utopia( title ):
return 1 if "Utopia" in title else 0
def get_amazon_ranks(url, marketplace, keyword):
print( '[INFO] Getting Amazon Ranks for: ', marketplace, keyword)
url = f"https://www.{url}/s?k={keyword.replace(' ', '+')}"
driver.get(url)
count =1
ranks = []
COOKIE_FILE = f"{cookies_ref[marketplace]['cookies_name']}";
print(COOKIE_FILE)
# Load cookies if available
if os.path.exists(COOKIE_FILE):
load_cookies(driver, COOKIE_FILE)
driver.get(url)
else:
print("No cookie file found, visiting fresh")
driver.get(url)
sleep(5) # Give time to solve CAPTCHA manually (if needed)
save_cookies(driver, COOKIE_FILE)
sleep(3) # Wait for JS to load
items = driver.find_elements(By.XPATH, '//div[contains(@class,"s-result-item") and @data-asin]')
for idx, item in enumerate(items, start=1):
asin = item.get_attribute("data-asin")
try:
sponsored = check_sponsored(item)
title = item.find_element(By.XPATH, './/h2//span').text
if title == 'Results':
continue
if sponsored == None :
ranks.append({'rank' : count , 'title' : title , 'marketplace' : marketplace , 'keyword': keyword, 'sponsored' : 0, 'asin' : asin , 'is_utopia' : check_consist_utopia(title) })
count += 1
except:
continue
file_path = f"{int(time.time() * 1000)}-{marketplace}-{keyword}.json"
save_ranking(ranks, file_path )
driver = get_driver()
for marketplace, details in marketplaces.items():
url = details['url']
get_amazon_ranks(url, marketplace, 'pillows')
driver.quit()

56
send-data.js Normal file
View File

@ -0,0 +1,56 @@
const axios = require( 'axios' );
const fs = require( 'fs' );
const path = require( 'path' );
(async function() {
/**
* directory path
*/
let rootPath = '/mnt/AmazonReports/Amazon/keyword_ranking';
let processedPath = rootPath + '/processed';
if ( ! fs.existsSync( rootPath ) ) {
fs.mkdirSync( rootPath );
}
if ( ! fs.existsSync( processedPath ) ) {
fs.mkdirSync( processedPath );
}
/**
* read all files in directory, send data to cosmos then move to processed
*/
const jsonFiles = fs.readdirSync(rootPath)
.filter(file => path.extname(file).toLowerCase() === '.json')
.map(file => {
const filePath = path.join(rootPath, file);
const stats = fs.statSync(filePath);
return { file, birthtime: stats.birthtime };
})
.sort((a, b) => a.birthtime - b.birthtime) // ASCENDING order
.map(entry => entry.file); // extract filenames
for ( const file of jsonFiles ) {
try {
// read contents of the file
const filePath = path.join( rootPath, file );
const orders = JSON.parse( fs.readFileSync( filePath, 'utf-8' ) );
let payload = { progressList: orders };
console.log( `Processing: ${filePath}` );
// send post request to cosmos
let res = await axios.post( config[environment].cosmos_path_orders_progress, payload, {
headers: {
'Content-Type': 'application/json'
}
} );
if ( res['status'] == 200 ) {
fs.renameSync( filePath, path.join( processedPath, file ) )
}
} catch ( e ) {
console.log( e );
}
}
})();

14
test.py Normal file
View File

@ -0,0 +1,14 @@
import json
import requests
# File to read
filename = "1754304649015-toys.json"
# Read JSON file
with open(filename, "r", encoding="utf-8") as f:
data = json.load(f)
print(data)