From 90100709badd6dd5a61249c575d52576f2bb8507 Mon Sep 17 00:00:00 2001 From: MillenniumEarl Date: Mon, 2 Nov 2020 00:14:28 +0100 Subject: [PATCH] Addes user data fetching --- app/index.js | 94 +------------------------ app/scripts/constants/css-selector.js | 11 +-- app/scripts/network-helper.js | 4 +- app/scripts/user-scraper.js | 98 ++++++++++++++++++++++++++- test/user-test.js | 5 ++ 5 files changed, 109 insertions(+), 103 deletions(-) diff --git a/app/index.js b/app/index.js index a8d62cf..d1b16d8 100644 --- a/app/index.js +++ b/app/index.js @@ -2,11 +2,10 @@ // Modules from file const shared = require("./scripts/shared.js"); -const f95url = require("./scripts/constants/url.js"); -const f95selector = require("./scripts/constants/css-selector.js"); const networkHelper = require("./scripts/network-helper.js"); const scraper = require("./scripts/scraper.js"); const searcher = require("./scripts/searcher.js"); +const uScraper = require("./scripts/user-scraper.js"); // Classes from file const Credentials = require("./scripts/classes/credentials.js"); @@ -163,95 +162,6 @@ module.exports.getUserData = async function () { return null; } - const threads = await getUserWatchedGameThreads(null); - - const username = await page.evaluate( - /* istanbul ignore next */ (selector) => - document.querySelector(selector).innerText, - f95selector.USERNAME_ELEMENT - ); - - const avatarSrc = await page.evaluate( - /* istanbul ignore next */ (selector) => - document.querySelector(selector).getAttribute("src"), - f95selector.AVATAR_PIC - ); - - const ud = new UserData(); - ud.username = username; - ud.avatarSrc = networkHelper.isStringAValidURL(avatarSrc) ? avatarSrc : null; - ud.watchedThreads = threads; - - return ud; + return await uScraper.getUserData(); }; //#endregion - -//#region Private methods - -//#region User -/** - * @private - * Gets the list of URLs of threads the user follows. - * @param {puppeteer.Browser} browser Browser object used for navigation - * @returns {Promise} URL list - */ -async function getUserWatchedGameThreads() { - const page = null; - await page.goto(f95url.F95_WATCHED_THREADS); // Go to the thread page - - // Explicitly wait for the required items to load - await page.waitForSelector(f95selector.WATCHED_THREAD_FILTER_POPUP_BUTTON); - - // Show the popup - await Promise.all([ - page.click(f95selector.WATCHED_THREAD_FILTER_POPUP_BUTTON), - page.waitForSelector(f95selector.UNREAD_THREAD_CHECKBOX), - page.waitForSelector(f95selector.ONLY_GAMES_THREAD_OPTION), - page.waitForSelector(f95selector.FILTER_THREADS_BUTTON), - ]); - - // Set the filters - await page.evaluate( - /* istanbul ignore next */ (selector) => - document.querySelector(selector).removeAttribute("checked"), - f95selector.UNREAD_THREAD_CHECKBOX - ); // Also read the threads already read - - // Filter the threads - await page.click(f95selector.ONLY_GAMES_THREAD_OPTION); - await page.click(f95selector.FILTER_THREADS_BUTTON); - await page.waitForSelector(f95selector.WATCHED_THREAD_URLS); - - // Get the threads urls - const urls = []; - let nextPageExists = false; - do { - // Get all the URLs - for (const handle of await page.$$(f95selector.WATCHED_THREAD_URLS)) { - const src = await page.evaluate( - /* istanbul ignore next */ (element) => element.href, - handle - ); - // If 'unread' is left, it will redirect to the last unread post - const url = src.replace("/unread", ""); - urls.push(url); - } - - nextPageExists = await page.evaluate( - /* istanbul ignore next */ (selector) => document.querySelector(selector), - f95selector.WATCHED_THREAD_NEXT_PAGE - ); - - // Click to next page - if (nextPageExists) { - await page.click(f95selector.WATCHED_THREAD_NEXT_PAGE); - await page.waitForSelector(f95selector.WATCHED_THREAD_URLS); - } - } while (nextPageExists); - - await page.close(); - return urls; -} -//#endregion User - -//#endregion Private methods diff --git a/app/scripts/constants/css-selector.js b/app/scripts/constants/css-selector.js index 3c624a0..e985b2d 100644 --- a/app/scripts/constants/css-selector.js +++ b/app/scripts/constants/css-selector.js @@ -1,6 +1,7 @@ module.exports = Object.freeze({ BD_ENGINE_ID_SELECTOR: "div[id^=\"btn-prefix_1_\"]>span", BD_STATUS_ID_SELECTOR: "div[id^=\"btn-prefix_4_\"]>span", + GT_IMAGES: "img:not([title])[data-src^=\"https://attachments.f95zone.to\"][data-url=\"\"]", GT_TAGS: "a.tagItem", GT_TITLE: "h1.p-title-value", @@ -16,13 +17,7 @@ module.exports = Object.freeze({ GS_RESULT_BODY: "div.contentRow-main", GS_MEMBERSHIP: "li > a:not(.username)", GET_REQUEST_TOKEN: "input[name=\"_xfToken\"]", - - LOGIN_BUTTON: "button.button--icon--login", + UD_USERNAME_ELEMENT: "a[href=\"/account/\"] > span.p-navgroup-linkText", + UD_AVATAR_PIC: "a[href=\"/account/\"] > span.avatar > img[class^=\"avatar\"]", LOGIN_MESSAGE_ERROR: "div.blockMessage.blockMessage--error.blockMessage--iconic", - PASSWORD_INPUT: "input[name=\"password\"]", - USERNAME_ELEMENT: "a[href=\"/account/\"] > span.p-navgroup-linkText", - USERNAME_INPUT: "input[name=\"login\"]", - AVATAR_INFO: "span.avatar", - AVATAR_PIC: "a[href=\"/account/\"] > span.avatar > img[class^=\"avatar\"]", - FILTER_THREADS_BUTTON: "button[class=\"button--primary button\"]", }); diff --git a/app/scripts/network-helper.js b/app/scripts/network-helper.js index 19b609b..16b1503 100644 --- a/app/scripts/network-helper.js +++ b/app/scripts/network-helper.js @@ -2,7 +2,7 @@ // Public modules from npm const axios = require("axios").default; -const _ = require("lodash"); +const { isString } = require("lodash"); const ky = require("ky-universal").create({ throwHttpErrors: false, }); @@ -166,7 +166,7 @@ module.exports.fetchGETResponse = async function(url) { * @returns {String} */ module.exports.enforceHttpsUrl = function (url) { - return _.isString(url) ? url.replace(/^(https?:)?\/\//, "https://") : null; + return isString(url) ? url.replace(/^(https?:)?\/\//, "https://") : null; }; /** diff --git a/app/scripts/user-scraper.js b/app/scripts/user-scraper.js index 9a390c3..0d577de 100644 --- a/app/scripts/user-scraper.js +++ b/app/scripts/user-scraper.js @@ -1 +1,97 @@ -"use strict"; \ No newline at end of file +"use strict"; + +// Public modules from npm +const cheerio = require("cheerio"); + +// Modules from file +const networkHelper = require("./network-helper.js"); +const f95Selector = require("./constants/css-selector.js"); +const f95url = require("./constants/url.js"); +const UserData = require("./classes/user-data.js"); + +module.exports.getUserData = async function() { + // Fetch data + const data = await fetchUsernameAndAvatar(); + const urls = await fetchWatchedThreadURLs(); + + // Create object + const ud = new UserData(); + ud.username = data.username; + ud.avatarSrc = data.source; + ud.watchedThreads = urls; + + return ud; +}; + +//#region Private methods +async function fetchUsernameAndAvatar() { + // Fetch page + const html = await networkHelper.fetchHTML(f95url.F95_BASE_URL); + + // Load HTML response + const $ = cheerio.load(html); + const body = $("body"); + + // Fetch username + const username = body.find(f95Selector.UD_USERNAME_ELEMENT).first().text().trim(); + + // Fetch user avatar image source + const source = body.find(f95Selector.UD_AVATAR_PIC).first().attr("src"); + + return { + username, + source + }; +} + +async function fetchWatchedThreadURLs() { + // Local variables + let currentURL = f95url.F95_WATCHED_THREADS; + const wathcedThreadURLs = []; + + do { + // Fetch page + const html = await networkHelper.fetchHTML(currentURL); + + // Load HTML response + const $ = cheerio.load(html); + const body = $("body"); + + // Find the URLs + const urls = fetchPageURLs(body); + wathcedThreadURLs.push(...urls); + + // Find the next page (if any) + currentURL = fetchNextPageURL(body); + } + while (currentURL); + + return wathcedThreadURLs; +} + +function fetchPageURLs(body) { + const elements = body.find(f95Selector.WT_URLS); + + return elements.map(function extractURLs(idx, e) { + // Obtain the link (replace "unread" only for the unread threads) + const partialLink = e.attribs.href.replace("unread", ""); + + // Compose and return the URL + return new URL(partialLink, f95url.F95_BASE_URL).toString(); + }).get(); +} + +/** + * + * @param {cheerio.Cheerio} body + */ +function fetchNextPageURL(body) { + const element = body.find(f95Selector.WT_NEXT_PAGE).first(); + + // No element found + if(element.length === 0) return null; + + // Compose and return the URL + return new URL(element.attr("href"), f95url.F95_BASE_URL).toString(); +} +//#endregion Private methods \ No newline at end of file diff --git a/test/user-test.js b/test/user-test.js index 8569a15..5ec4fbc 100644 --- a/test/user-test.js +++ b/test/user-test.js @@ -8,6 +8,7 @@ const searcher = require("../app/scripts/searcher.js"); const scraper = require("../app/scripts/scraper.js"); const Credentials = require("../app/scripts/classes/credentials.js"); const networkHelper = require("../app/scripts/network-helper.js"); +const uScraper = require("../app/scripts/user-scraper.js"); // Configure the .env reader dotenv.config(); @@ -16,6 +17,10 @@ dotenv.config(); auth().then(async function searchGames(result) { if(!result) return; + console.log("Fetching user data..."); + const userdata = await uScraper.getUserData(); + console.log(`${userdata.username} follows ${userdata.watchedThreads.length} threads`); + // Search for Kingdom Of Deception data await search("kingdom of deception");