From 138a112e96dab2727552e9ec7125567b89eff84b Mon Sep 17 00:00:00 2001 From: MillenniumEarl Date: Thu, 25 Feb 2021 19:07:36 +0100 Subject: [PATCH] Adapt to new Result class --- src/scripts/fetch-data/fetch-platform-data.ts | 14 ++-- src/scripts/fetch-data/fetch-thread.ts | 21 +++--- src/scripts/network-helper.ts | 71 +++++++++---------- src/scripts/scrape-data/scrape-thread.ts | 33 ++++----- src/scripts/scrape-data/scrape-user.ts | 42 ++++++----- 5 files changed, 93 insertions(+), 88 deletions(-) diff --git a/src/scripts/fetch-data/fetch-platform-data.ts b/src/scripts/fetch-data/fetch-platform-data.ts index 6daaffa..7616286 100644 --- a/src/scripts/fetch-data/fetch-platform-data.ts +++ b/src/scripts/fetch-data/fetch-platform-data.ts @@ -51,15 +51,17 @@ export default async function fetchPlatformData(): Promise { if (!readCache(shared.cachePath)) { // Load the HTML const html = await fetchHTML(f95url.F95_LATEST_UPDATES); - + // Parse data - const data = parseLatestPlatformHTML(html); + if (html.isSuccess()) { + const data = parseLatestPlatformHTML(html.value); - // Assign data - assignLatestPlatformData(data); + // Assign data + assignLatestPlatformData(data); - // Cache data - saveCache(shared.cachePath); + // Cache data + saveCache(shared.cachePath); + } else throw html.value; } } //#endregion Public methods diff --git a/src/scripts/fetch-data/fetch-thread.ts b/src/scripts/fetch-data/fetch-thread.ts index c203b80..df05861 100644 --- a/src/scripts/fetch-data/fetch-thread.ts +++ b/src/scripts/fetch-data/fetch-thread.ts @@ -42,18 +42,21 @@ async function fetchResultURLs(url: string, limit: number = 30): Promise { - const elementSelector = $(el); - return extractLinkFromResult(elementSelector); - }).get(); + // Here we get all the DIV that are the body of the various query results + const results = $("body").find(f95Selector.GS_RESULT_BODY); - return urls; + // Than we extract the URLs + const urls = results.slice(0, limit).map((idx, el) => { + const elementSelector = $(el); + return extractLinkFromResult(elementSelector); + }).get(); + + return urls; + } else throw html.value; } /** diff --git a/src/scripts/network-helper.ts b/src/scripts/network-helper.ts index 03eba1e..f151219 100644 --- a/src/scripts/network-helper.ts +++ b/src/scripts/network-helper.ts @@ -12,6 +12,8 @@ import { urls as f95url } from "./constants/url.js"; import { selectors as f95selector } from "./constants/css-selector.js"; import LoginResult from "./classes/login-result.js"; import credentials from "./classes/credentials.js"; +import { failure, Result, success } from "./classes/result.js"; +import { GenericAxiosError, InvalidF95Token, UnexpectedResponseContentType } from "./classes/errors.js"; // Global variables const userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) " + @@ -31,26 +33,23 @@ const commonConfig = { /** * Gets the HTML code of a page. */ -export async function fetchHTML(url: string): Promise { - // Local variables - let returnValue = null; - +export async function fetchHTML(url: string): Promise> { // Fetch the response of the platform const response = await fetchGETResponse(url); - // Manage response - /* istambul ignore next */ - if (!response) { - shared.logger.warn(`Unable to fetch HTML for ${url}`); - } - /* istambul ignore next */ - else if (!response.headers["content-type"].includes("text/html")) { - // The response is not a HTML page - shared.logger.warn(`The ${url} returned a ${response.headers["content-type"]} response`); - } + if (response.isSuccess()) { + const isHTML = response.value["content-type"].includes("text/html"); - returnValue = response.data; - return returnValue; + const unexpectedResponseError = new UnexpectedResponseContentType({ + id: 2, + message: `Expected HTML but received ${response.value["content-type"]}`, + error: null + }); + + return isHTML ? + success(response.value.data as string) : + failure(unexpectedResponseError); + } else return failure(response.value as GenericAxiosError); } /** @@ -63,7 +62,7 @@ export async function fetchHTML(url: string): Promise { */ export async function authenticate(credentials: credentials, force: boolean = false): Promise { shared.logger.info(`Authenticating with user ${credentials.username}`); - if (!credentials.token) throw new Error(`Invalid token for auth: ${credentials.token}`); + if (!credentials.token) throw new InvalidF95Token(`Invalid token for auth: ${credentials.token}`); // Secure the URL const secureURL = enforceHttpsUrl(f95url.F95_LOGIN_URL); @@ -104,43 +103,43 @@ export async function authenticate(credentials: credentials, force: boolean = fa /** * Obtain the token used to authenticate the user to the platform. - * @returns {Promise} Token or `null` if an error arise */ -export async function getF95Token(): Promise { +export async function getF95Token() { // Fetch the response of the platform const response = await fetchGETResponse(f95url.F95_LOGIN_URL); - /* istambul ignore next */ - if (!response) { - shared.logger.warn("Unable to get the token for the session"); - return null; - } - // The response is a HTML page, we need to find the with name "_xfToken" - const $ = cheerio.load(response.data as string); - return $("body").find(f95selector.GET_REQUEST_TOKEN).attr("value"); + if (response.isSuccess()) { + // The response is a HTML page, we need to find the with name "_xfToken" + const $ = cheerio.load(response.value.data as string); + return $("body").find(f95selector.GET_REQUEST_TOKEN).attr("value"); + } else throw response.value; } //#region Utility methods /** * Performs a GET request to a specific URL and returns the response. - * If the request generates an error (for example 400) `null` is returned. */ -export async function fetchGETResponse(url: string): Promise> { +export async function fetchGETResponse(url: string): Promise>>{ // Secure the URL const secureURL = enforceHttpsUrl(url); try { // Fetch and return the response - return await axios.get(secureURL, commonConfig); + const response = await axios.get(secureURL, commonConfig); + return success(response); } catch (e) { shared.logger.error(`Error ${e.message} occurred while trying to fetch ${secureURL}`); - return null; + const genericError = new GenericAxiosError({ + id: 1, + message:`Error ${e.message} occurred while trying to fetch ${secureURL}`, + error: e + }); + return failure(genericError); } } /** * Enforces the scheme of the URL is https and returns the new URL. - * @param {String} url * @returns {String} Secure URL or `null` if the argument is not a string */ export function enforceHttpsUrl(url: string): string { @@ -149,12 +148,9 @@ export function enforceHttpsUrl(url: string): string { /** * Check if the url belongs to the domain of the F95 platform. - * @param {String} url URL to check - * @returns {Boolean} true if the url belongs to the domain, false otherwise */ export function isF95URL(url: string): boolean { - if (url.toString().startsWith(f95url.F95_BASE_URL)) return true; - else return false; + return url.toString().startsWith(f95url.F95_BASE_URL); }; /** @@ -167,8 +163,7 @@ export function isStringAValidURL(url: string): boolean { // Many thanks to Daveo at StackOverflow (https://preview.tinyurl.com/y2f2e2pc) const expression = /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/; const regex = new RegExp(expression); - if (url.match(regex)) return true; - else return false; + return url.match(regex).length > 0; }; /** diff --git a/src/scripts/scrape-data/scrape-thread.ts b/src/scripts/scrape-data/scrape-thread.ts index 572ea6c..37bf0b2 100644 --- a/src/scripts/scrape-data/scrape-thread.ts +++ b/src/scripts/scrape-data/scrape-thread.ts @@ -24,25 +24,26 @@ export async function getPostInformation(url: string): Promise // Fetch HTML and prepare Cheerio const html = await fetchHTML(url); - if (!html) return null; - const $ = cheerio.load(html); - const body = $("body"); - const mainPost = $(f95Selector.GS_POSTS).first(); - - // Extract data - const postData = parseCheerioMainPost($, mainPost); - const TJsonLD = getJSONLD(body); + if (html.isSuccess()) { + const $ = cheerio.load(html.value); + const body = $("body"); + const mainPost = $(f95Selector.GS_POSTS).first(); - // Fill in the HandiWork element with the information obtained - const hw: HandiWork = {} as HandiWork; - fillWithJSONLD(hw, TJsonLD); - fillWithPostData(hw, postData); - fillWithPrefixes(hw, body); - hw.tags = extractTags(body); + // Extract data + const postData = parseCheerioMainPost($, mainPost); + const TJsonLD = getJSONLD(body); - shared.logger.info(`Founded data for ${hw.name}`); - return hw; + // Fill in the HandiWork element with the information obtained + const hw: HandiWork = {} as HandiWork; + fillWithJSONLD(hw, TJsonLD); + fillWithPostData(hw, postData); + fillWithPrefixes(hw, body); + hw.tags = extractTags(body); + + shared.logger.info(`Founded data for ${hw.name}`); + return hw; + } else throw html.value; }; //#endregion Public methods diff --git a/src/scripts/scrape-data/scrape-user.ts b/src/scripts/scrape-data/scrape-user.ts index 76530fa..bf9c684 100644 --- a/src/scripts/scrape-data/scrape-user.ts +++ b/src/scripts/scrape-data/scrape-user.ts @@ -36,20 +36,22 @@ async function fetchUsernameAndAvatar(): Promise<{ [s: string]: string; }> { // Fetch page const html = await fetchHTML(f95url.F95_BASE_URL); - // Load HTML response - const $ = cheerio.load(html); - const body = $("body"); + if (html.isSuccess()) { + // Load HTML response + const $ = cheerio.load(html.value); + const body = $("body"); - // Fetch username - const username = body.find(f95Selector.UD_USERNAME_ELEMENT).first().text().trim(); + // Fetch username + const username = body.find(f95Selector.UD_USERNAME_ELEMENT).first().text().trim(); - // Fetch user avatar image source - const source = body.find(f95Selector.UD_AVATAR_PIC).first().attr("src"); + // Fetch user avatar image source + const source = body.find(f95Selector.UD_AVATAR_PIC).first().attr("src"); - return { - username, - source - }; + return { + username, + source + }; + } else throw html.value; } /** @@ -73,16 +75,18 @@ async function fetchWatchedGameThreadURLs(): Promise { // Fetch page const html = await fetchHTML(currentURL); - // Load HTML response - const $ = cheerio.load(html); - const body = $("body"); + if (html.isSuccess()) { + // Load HTML response + const $ = cheerio.load(html.value); + const body = $("body"); - // Find the URLs - const urls = fetchPageURLs(body); - watchedGameThreadURLs.push(...urls); + // Find the URLs + const urls = fetchPageURLs(body); + watchedGameThreadURLs.push(...urls); - // Find the next page (if any) - currentURL = fetchNextPageURL(body); + // Find the next page (if any) + currentURL = fetchNextPageURL(body); + } else throw html.value; } while (currentURL);