temu-labels-crawler/index.js

275 lines
7.1 KiB
JavaScript

const puppeteer = require("puppeteer");
const axios = require("axios");
const luxon = require("luxon");
const { exit } = require("process");
const fs = require("fs");
const path = require("path");
const dotenv = require("dotenv").config({ path: __dirname + "/.env" });
const utils = require("./utils");
(async function () {
/**
* loading config data
*/
const config = JSON.parse(fs.readFileSync(__dirname + "/config.json"));
const environment = process.env["ENVIRONMENT"];
const cryptoConfig = utils.getCryptoConfig();
const email = utils.decryptString(
process.env["temu-email"],
cryptoConfig.algo,
cryptoConfig.key,
cryptoConfig.iv
);
const password = utils.decryptString(
process.env["temu-password"],
cryptoConfig.algo,
cryptoConfig.key,
cryptoConfig.iv
);
const currentTimestamp = luxon.DateTime.now().toFormat("yyyy-MM-dd");
/*
* load cookies
*/
const loadPageCookies = async function (page) {
const cookiesFileName = `cookies.json`;
if (fs.existsSync(__dirname + `/cookies/${cookiesFileName}`)) {
const cookiesStr = fs.readFileSync(
__dirname + `/cookies/${cookiesFileName}`
);
const cookies = JSON.parse(cookiesStr);
await page.setCookie(...cookies);
}
};
// launch browser and open page
const chromeProfilePath = path.resolve(
__dirname,
config[environment]["chrome_profile_path"]
);
const browser = await puppeteer.launch(
utils.getBrowserConfig(chromeProfilePath, environment)
);
const page = await browser.newPage();
await loadPageCookies(page);
await page.setViewport({
width: 1600,
height: 900,
});
// save cookies on page load
const cookiesFileName = `cookies.json`;
page.on("load", async function () {
// save cookies
const cookies = await page.cookies();
fs.writeFileSync(
__dirname + `/cookies/${cookiesFileName}`,
JSON.stringify(cookies, null, 2)
);
});
/*
* goto login page
*/
const loginPage = config[environment]["temuLoginPage"];
await page.goto(loginPage, {
waitUntil: ["domcontentloaded"],
});
await utils.tryTemuLogin(page, email, password, loginPage);
await new Promise((resolve) => setTimeout(resolve, 7000));
// goto unshipped order page
const ordersPage = config[environment]["temuOrdersPage"];
await page.goto(ordersPage, {
waitUntil: ["domcontentloaded"],
});
// orders array
let orders = [];
const pagination = 10;
let total_items = 0;
let currentPage = 1;
// get total items
await page
.waitForSelector("li.PGT_totalText_123", { timeout: 5000 })
.catch(() => {});
const liText = await page.evaluate(() => {
const liElement = document.querySelector("li.PGT_totalText_123");
return liElement ? liElement.textContent : null;
});
total_items = parseInt(liText.split(" ")[1]);
console.log(`Total Items count : ${total_items}`);
let total_pages = Math.ceil(total_items / pagination);
console.log(`Total Pages count : ${total_pages}`);
// transform data
const transformData = (data) => {
const result = {};
const pageItems = data.result.pageItems;
pageItems.forEach(item => {
const parentOrderSn = item.parentOrderMap.parentOrderSn;
const orderList = item.orderList;
if (!result[parentOrderSn]) {
result[parentOrderSn] = { items: [] };
}
orderList.forEach(order => {
const sku = order.extCodeList[0];
const quantity = order.quantity;
result[parentOrderSn].items.push({ sku, quantity });
});
});
return result;
};
let getPageOrderData = async (page) => {
/**
* capture response
*/
page.on("response", async function (res) {
try {
const req = res.request();
if (req.url().includes("/recentOrderList")) {
const resJson = await res.json();
// add into data
orders.push( transformData( resJson ) );
// if (resJson.hasOwnProperty("result")) {
// if (
// resJson["result"] !== null &&
// resJson["result"]["pageItems"] !== null
// ) {
// let pList = resJson["result"]["pageItems"];
// for (const pMap of pList) {
// console.log(pMap.parentOrderMap.parentOrderSn);
// orders.push(pMap.parentOrderMap.parentOrderSn);
// }
// }
// }
}
} catch (ex) {
console.log(ex);
}
});
};
// crawl next pages
while ( true ) {
console.log(`Crawling for page ${currentPage}`);
await utils.tryTemuLogin(page, email, password, loginPage);
await new Promise((resolve) => setTimeout(resolve, 4000));
// load cookies
await loadPageCookies(page);
// get orders data
await getPageOrderData(page);
// increment page
++currentPage;
// Evaluate the presence of both classes in the <li> element
const hasNextBtn = await page.evaluate(() => {
const liElement = document.querySelector(
"li.PGT_next_123.PGT_disabled_123"
);
return liElement == null;
});
// break if doesn't have next button
if (!hasNextBtn) {
console.log("No next button");
break;
}
if (currentPage > total_pages) {
console.log("Last Page Reached");
break;
}
// goto next page
if (hasNextBtn) {
await page.evaluate(() => {
const liElement = document.querySelector("li.PGT_next_123");
if (liElement) {
liElement.click();
}
});
}
// wait
await new Promise((r) => setTimeout(r, 5000));
}
try {
// fs.writeFileSync(
// `${__dirname}/orders/${currentTimestamp}.json`,
// JSON.stringify(orders, null, 2)
// );
} catch (e) {
console.log(e);
}
/*
* goto order page
*/
const syncOrders = async () => {
console.log(">--- Syncing Orders ----<");
try {
// get all orders of current date
const orders = JSON.parse(
fs.readFileSync(__dirname + `/orders/${currentTimestamp}.json`)
);
const poSet = new Set();
//
for (const order of orders ) {
for( const poKey of Object.keys(order) ) {
if( ! poSet.has( poKey ) ){
poSet.add( poKey )
const po = order[poKey];
// goto every order page
console.log(`Syncing for : ${poKey}`);
const orderUrl = utils.getTemuOrderPage(poKey);
await page.goto(orderUrl, {
waitUntil: ["domcontentloaded"],
});
const buyShippingBtn = "span._2DQ2xCuz";
await page.waitForSelector(buyShippingBtn, { timeout: 1000 }).catch(() => {});
let buyShippingBtnElement = (await page.$(buyShippingBtn)) !== null;
if (buyShippingBtnElement) {
await page.click(buyShippingBtn);
}
}
await new Promise((resolve) => setTimeout(resolve, 45 * 1000));
}
}
} catch (ex) {
console.log(ex);
}
}
await syncOrders();
await page.close();
await browser.close();
})();