From 91f809f249a0b0a1bb64a42e9cb067c242009950 Mon Sep 17 00:00:00 2001 From: MillenniumEarl Date: Sun, 21 Feb 2021 14:40:05 +0100 Subject: [PATCH] Renamed types and fields --- src/scripts/classes/handiwork/animation.ts | 42 +++---- src/scripts/classes/handiwork/asset.ts | 40 +++--- src/scripts/classes/handiwork/comic.ts | 34 ++--- src/scripts/classes/handiwork/game.ts | 48 +++---- src/scripts/classes/handiwork/handiwork.ts | 67 +++++----- src/scripts/interfaces.d.ts | 112 ++++++++--------- src/scripts/platform-data.ts | 36 +++--- src/scripts/post-parser.ts | 134 ++++++++++---------- src/scripts/scraper.ts | 140 +++++++++++---------- 9 files changed, 330 insertions(+), 323 deletions(-) diff --git a/src/scripts/classes/handiwork/animation.ts b/src/scripts/classes/handiwork/animation.ts index 4d6a19b..c13d4a1 100644 --- a/src/scripts/classes/handiwork/animation.ts +++ b/src/scripts/classes/handiwork/animation.ts @@ -1,31 +1,31 @@ "use strict"; // Modules from files -import { AuthorType, IAnimation, RatingType, CategoryType } from "../../interfaces"; +import { TAuthor, IAnimation, TRating, TCategory } from "../../interfaces"; export default class Animation implements IAnimation { //#region Properties - Censored: boolean; - Genre: string[]; - Installation: string; - Language: string[]; - Lenght: string; - Pages: string; - Resolution: string[]; - Authors: AuthorType[]; - Category: CategoryType; - Changelog: string[]; - Cover: string; - ID: number; - LastThreadUpdate: Date; - Name: string; - Overview: string; - Prefixes: string[]; - Rating: RatingType; - Tags: string[]; - ThreadPublishingDate: Date; - Url: string; + censored: boolean; + genre: string[]; + installation: string; + language: string[]; + lenght: string; + pages: string; + resolution: string[]; + authors: TAuthor[]; + category: TCategory; + changelog: string[]; + cover: string; + id: number; + lastThreadUpdate: Date; + name: string; + overview: string; + prefixes: string[]; + rating: TRating; + tags: string[]; + threadPublishingDate: Date; + url: string; //#endregion Properties } \ No newline at end of file diff --git a/src/scripts/classes/handiwork/asset.ts b/src/scripts/classes/handiwork/asset.ts index f9fa969..f5c124d 100644 --- a/src/scripts/classes/handiwork/asset.ts +++ b/src/scripts/classes/handiwork/asset.ts @@ -1,30 +1,30 @@ "use strict"; // Modules from files -import { AuthorType, IAsset, RatingType, CategoryType } from "../../interfaces"; +import { TAuthor, IAsset, TRating, TCategory } from "../../interfaces"; export default class Asset implements IAsset { //#region Properties - AssetLink: string; - AssociatedAssets: string[]; - CompatibleSoftware: string; - IncludedAssets: string[]; - OfficialLinks: string[]; - SKU: string; - Authors: AuthorType[]; - Category: CategoryType; - Changelog: string[]; - Cover: string; - ID: number; - LastThreadUpdate: Date; - Name: string; - Overview: string; - Prefixes: string[]; - Rating: RatingType; - Tags: string[]; - ThreadPublishingDate: Date; - Url: string; + assetLink: string; + associatedAssets: string[]; + compatibleSoftware: string; + includedAssets: string[]; + officialLinks: string[]; + sku: string; + authors: TAuthor[]; + category: TCategory; + changelog: string[]; + cover: string; + id: number; + lastThreadUpdate: Date; + name: string; + overview: string; + prefixes: string[]; + rating: TRating; + tags: string[]; + threadPublishingDate: Date; + url: string; //#endregion Properties } \ No newline at end of file diff --git a/src/scripts/classes/handiwork/comic.ts b/src/scripts/classes/handiwork/comic.ts index f507593..4f266a8 100644 --- a/src/scripts/classes/handiwork/comic.ts +++ b/src/scripts/classes/handiwork/comic.ts @@ -1,26 +1,26 @@ "use strict"; // Modules from files -import { AuthorType, IComic, RatingType, CategoryType } from "../../interfaces"; +import { TAuthor, IComic, TRating, TCategory } from "../../interfaces"; export default class Comic implements IComic { //#region Properties - Genre: string[]; - Pages: string; - Resolution: string[]; - Authors: AuthorType[]; - Category: CategoryType; - Changelog: string[]; - Cover: string; - ID: number; - LastThreadUpdate: Date; - Name: string; - Overview: string; - Prefixes: string[]; - Rating: RatingType; - Tags: string[]; - ThreadPublishingDate: Date; - Url: string; + genre: string[]; + pages: string; + resolution: string[]; + authors: TAuthor[]; + category: TCategory; + changelog: string[]; + cover: string; + id: number; + lastThreadUpdate: Date; + name: string; + overview: string; + prefixes: string[]; + rating: TRating; + tags: string[]; + threadPublishingDate: Date; + url: string; //#endregion Properties } \ No newline at end of file diff --git a/src/scripts/classes/handiwork/game.ts b/src/scripts/classes/handiwork/game.ts index ac837c4..d391953 100644 --- a/src/scripts/classes/handiwork/game.ts +++ b/src/scripts/classes/handiwork/game.ts @@ -1,34 +1,34 @@ "use strict"; // Modules from files -import { AuthorType, EngineType, IGame, RatingType, StatusType, CategoryType } from "../../interfaces"; +import { TAuthor, TEngine, IGame, TRating, TStatus, TCategory } from "../../interfaces"; export default class Game implements IGame { //#region Properties - Censored: boolean; - Genre: string[]; - Installation: string; - Language: string[]; - LastRelease: Date; - OS: string[]; - Version: string; - Authors: AuthorType[]; - Category: CategoryType; - Changelog: string[]; - Cover: string; - ID: number; - LastThreadUpdate: Date; - Name: string; - Overview: string; - Prefixes: string[]; - Rating: RatingType; - Tags: string[]; - ThreadPublishingDate: Date; - Url: string; - Engine: EngineType; - Mod: boolean; - Status: StatusType; + censored: boolean; + engine: TEngine; + genre: string[]; + installation: string; + language: string[]; + lastRelease: Date; + mod: boolean; + os: string[]; + status: TStatus; + version: string; + authors: TAuthor[]; + category: TCategory; + changelog: string[]; + cover: string; + id: number; + lastThreadUpdate: Date; + name: string; + overview: string; + prefixes: string[]; + rating: TRating; + tags: string[]; + threadPublishingDate: Date; + url: string; //#endregion Properties } \ No newline at end of file diff --git a/src/scripts/classes/handiwork/handiwork.ts b/src/scripts/classes/handiwork/handiwork.ts index f683633..e52c794 100644 --- a/src/scripts/classes/handiwork/handiwork.ts +++ b/src/scripts/classes/handiwork/handiwork.ts @@ -1,7 +1,7 @@ "use strict"; // Modules from files -import { AuthorType, RatingType, IHandiwork , EngineType, CategoryType, StatusType} from "../../interfaces"; +import { TAuthor, TRating, IHandiwork, TEngine, TCategory, TStatus } from "../../interfaces"; /** * It represents a generic work, be it a game, a comic, an animation or an asset. @@ -9,39 +9,38 @@ import { AuthorType, RatingType, IHandiwork , EngineType, CategoryType, StatusTy export default class HandiWork implements IHandiwork { //#region Properties - AssetLink: string; - AssociatedAssets: string[]; - Censored: boolean; - Changelog: string[]; - CompatibleSoftware: string; - Genre: string[]; - IncludedAssets: string[]; - Installation: string; - Language: string[]; - LastRelease: Date; - Lenght: string; - OfficialLinks: string[]; - OS: string[]; - Pages: string; - Password: string; - Resolution: string[]; - SKU: string; - Version: string; - Authors: AuthorType[]; - Category: CategoryType; - Cover: string; - ID: number; - LastThreadUpdate: Date; - Name: string; - Overview: string; - Prefixes: string[]; - Rating: RatingType; - Tags: string[]; - ThreadPublishingDate: Date; - Url: string; - Engine: EngineType; - Mod: boolean; - Status: StatusType; + censored: boolean; + engine: TEngine; + genre: string[]; + installation: string; + language: string[]; + lastRelease: Date; + mod: boolean; + os: string[]; + status: TStatus; + version: string; + authors: TAuthor[]; + category: TCategory; + changelog: string[]; + cover: string; + id: number; + lastThreadUpdate: Date; + name: string; + overview: string; + prefixes: string[]; + rating: TRating; + tags: string[]; + threadPublishingDate: Date; + url: string; + pages: string; + resolution: string[]; + lenght: string; + assetLink: string; + associatedAssets: string[]; + compatibleSoftware: string; + includedAssets: string[]; + officialLinks: string[]; + sku: string; //#endregion Properties } \ No newline at end of file diff --git a/src/scripts/interfaces.d.ts b/src/scripts/interfaces.d.ts index 4fc4aff..b99d670 100644 --- a/src/scripts/interfaces.d.ts +++ b/src/scripts/interfaces.d.ts @@ -1,63 +1,63 @@ /** * Data relating to an external platform (i.e. Patreon). */ -export type ExternalPlatformType = { +export type TExternalPlatform = { /** - * Name of the platform. + * name of the platform. */ - Name: string, + name: string, /** - * Link to the platform. + * link to the platform. */ - Link: string + link: string } /** * Information about the author of a work. */ -export type AuthorType = { +export type TAuthor = { /** * Plain name or username of the author. */ - Name: string, + name: string, /** * */ - Platforms: ExternalPlatformType[], + platforms: TExternalPlatform[], } /** * Information on the evaluation of a work. */ -export type RatingType = { +export type TRating = { /** - * Average value of evaluations. + * average value of evaluations. */ - Average: number, + average: number, /** * Best rating received. */ - Best: number, + best: number, /** * Number of ratings made by users. */ - Count: number, + count: number, } /** * List of possible graphics engines used for game development. */ -export type EngineType = "QSP" | "RPGM" | "Unity" | "HTML" | "RAGS" | "Java" | "Ren'Py" | "Flash" | "ADRIFT" | "Others" | "Tads" | "Wolf RPG" | "Unreal Engine" | "WebGL"; +export type TEngine = "QSP" | "RPGM" | "Unity" | "HTML" | "RAGS" | "Java" | "Ren'Py" | "Flash" | "ADRIFT" | "Others" | "Tads" | "Wolf RPG" | "Unreal Engine" | "WebGL"; /** * List of possible progress states associated with a game. */ -export type StatusType = "Completed" | "Ongoing" | "Abandoned" | "Onhold"; +export type TStatus = "Completed" | "Ongoing" | "Abandoned" | "Onhold"; /** * List of possible categories of a particular work. */ -export type CategoryType = "games" | "comics" | "animations" | "assets"; +export type TCategory = "games" | "comics" | "animations" | "assets"; /** * Collection of values defined for each @@ -67,55 +67,55 @@ export interface IBasic { /** * Authors of the work. */ - Authors: AuthorType[], + authors: TAuthor[], /** * Category of the work.. */ - Category: CategoryType, + category: TCategory, /** * List of changes of the work for each version. */ - Changelog: string[], + changelog: string[], /** - * Link to the cover image of the work. + * link to the cover image of the work. */ - Cover: string, + cover: string, /** * Unique ID of the work on the platform. */ - ID: number, + id: number, /** * Last update of the opera thread. */ - LastThreadUpdate: Date, + lastThreadUpdate: Date, /** * Plain name of the work (without tags and/or prefixes) */ - Name: string, + name: string, /** * Work description */ - Overview: string, + overview: string, /** * List of prefixes associated with the work. */ - Prefixes: string[], + prefixes: string[], /** * Evaluation of the work by the users of the platform. */ - Rating: RatingType, + rating: TRating, /** * List of tags associated with the work. */ - Tags: string[], + tags: string[], /** * Date of publication of the thread associated with the work. */ - ThreadPublishingDate: Date, + threadPublishingDate: Date, /** * URL to the work's official conversation on the F95Zone portal. */ - Url: string, + url: string, } /** @@ -126,43 +126,43 @@ export interface IGame extends IBasic { * Specify whether the work has censorship * measures regarding NSFW scenes */ - Censored: boolean, + censored: boolean, /** * Graphics engine used for game development. */ - Engine: EngineType, + engine: TEngine, /** * List of genres associated with the work. */ - Genre: string[], + genre: string[], /** * Author's Guide to Installation. */ - Installation: string, + installation: string, /** * List of available languages. */ - Language: string[], + language: string[], /** * Last time the work underwent updates. */ - LastRelease: Date, + lastRelease: Date, /** * Indicates that this item represents a mod. */ - Mod: boolean, + mod: boolean, /** * List of OS for which the work is compatible. */ - OS: string[], + os: string[], /** * Indicates the progress of a game. */ - Status: StatusType, + status: TStatus, /** * Version of the work. */ - Version: string, + version: string, } /** @@ -172,15 +172,15 @@ export interface IComic extends IBasic { /** * List of genres associated with the work. */ - Genre: string[], + genre: string[], /** * Number of pages or elements that make up the work. */ - Pages: string, + pages: string, /** * List of resolutions available for the work. */ - Resolution: string[], + resolution: string[], } /** @@ -191,31 +191,31 @@ export interface IAnimation extends IBasic { * Specify whether the work has censorship * measures regarding NSFW scenes */ - Censored: boolean, + censored: boolean, /** * List of genres associated with the work. */ - Genre: string[], + genre: string[], /** * Author's Guide to Installation. */ - Installation: string, + installation: string, /** * List of available languages. */ - Language: string[], + language: string[], /** * Length of the animation. */ - Lenght: string, + lenght: string, /** * Number of pages or elements that make up the work. */ - Pages: string, + pages: string, /** * List of resolutions available for the work. */ - Resolution: string[], + resolution: string[], } /** @@ -225,28 +225,28 @@ export interface IAsset extends IBasic { /** * External URL of the asset. */ - AssetLink: string, + assetLink: string, /** * List of URLs of assets associated with the work * (for example same collection). */ - AssociatedAssets: string[], + associatedAssets: string[], /** * Software compatible with the work. */ - CompatibleSoftware: string, + compatibleSoftware: string, /** * List of assets url included in the work or used to develop it. */ - IncludedAssets: string[], + includedAssets: string[], /** * List of official links of the work, external to the platform. */ - OfficialLinks: string[], + officialLinks: string[], /** * Unique SKU value of the work. */ - SKU: string, + sku: string, } /** diff --git a/src/scripts/platform-data.ts b/src/scripts/platform-data.ts index 20835db..b7417c6 100644 --- a/src/scripts/platform-data.ts +++ b/src/scripts/platform-data.ts @@ -16,28 +16,28 @@ import { fetchHTML } from "./network-helper.js"; /** * Represents the single element contained in the data categories. */ -interface SingleOptionObj { - ID: number, - Name: string, - Class: string +interface ISingleOption { + id: number, + name: string, + class: string } /** * Represents the set of values associated with a specific category of data. */ -interface CategoryResObj { - ID: number, - Name: string, - Prefixes: SingleOptionObj[] +interface ICategoryResource { + id: number, + name: string, + prefixes: ISingleOption[] } /** * Represents the set of tags present on the platform- */ -interface LatestResObj { - Prefixes: CategoryResObj[], - Tags: DictType, - Options: string +interface ILatestResource { + prefixes: ICategoryResource[], + tags: DictType, + options: string } //#endregion Interface definitions @@ -107,7 +107,7 @@ function saveCache(path: string): void { * Given the HTML code of the response from the F95Zone, * parse it and return the result. */ -function parseLatestPlatformHTML(html: string): LatestResObj{ +function parseLatestPlatformHTML(html: string): ILatestResource{ const $ = cheerio.load(html); // Clean the JSON string @@ -122,24 +122,24 @@ function parseLatestPlatformHTML(html: string): LatestResObj{ * @private * Assign to the local variables the values from the F95Zone. */ -function assignLatestPlatformData(data: LatestResObj): void { +function assignLatestPlatformData(data: ILatestResource): void { // Local variables const scrapedData = {}; // Parse and assign the values that are NOT tags - for (const p of data.Prefixes) { + for (const p of data.prefixes) { // Prepare the dict const dict: DictType = {}; - for (const e of p.Prefixes) dict[e.ID] = e.Name.replace("'", "'"); + for (const e of p.prefixes) dict[e.id] = e.name.replace("'", "'"); // Save the property - scrapedData[p.Name] = dict; + scrapedData[p.name] = dict; } // Save the values shared.setPrefixPair("engines", Object.assign({}, scrapedData["Engine"])); shared.setPrefixPair("statuses", Object.assign({}, scrapedData["Status"])); shared.setPrefixPair("others", Object.assign({}, scrapedData["Other"])); - shared.setPrefixPair("tags", data.Tags); + shared.setPrefixPair("tags", data.tags); } //#endregion \ No newline at end of file diff --git a/src/scripts/post-parser.ts b/src/scripts/post-parser.ts index 52fbfea..285ab64 100644 --- a/src/scripts/post-parser.ts +++ b/src/scripts/post-parser.ts @@ -2,15 +2,15 @@ //#region Interfaces export interface IPostElement { - Type: "Empty" | "Text" | "Link" | "Image" | "Spoiler", - Name: string, - Text: string, - Content: IPostElement[] + type: "Empty" | "Text" | "Link" | "Image" | "Spoiler", + name: string, + text: string, + content: IPostElement[] } export interface ILink extends IPostElement { - Type: "Image" | "Link", - Href: string, + type: "Image" | "Link", + href: string, } //#endregion Interfaces @@ -30,7 +30,7 @@ export function parseCheerioMainPost($: cheerio.Root, post: cheerio.Cheerio): IP // First fetch all the elements in the post const elements = post.contents().toArray().map(el => { const node = parseCheerioNode($, el); - if (node.Name || node.Text || node.Content.length != 0) { + if (node.name || node.text || node.content.length != 0) { return node; } }).filter(el => el); @@ -56,15 +56,15 @@ function parseCheerioSpoilerNode($: cheerio.Root, spoiler: cheerio.Cheerio): IPo const BUTTON_CLASS = "button.bbCodeSpoiler-button"; const SPOILER_CONTENT_CLASS = "div.bbCodeSpoiler-content > div.bbCodeBlock--spoiler > div.bbCodeBlock-content"; const content: IPostElement = { - Type: "Spoiler", - Name: "", - Text: "", - Content: [] + type: "Spoiler", + name: "", + text: "", + content: [] }; // Find the title of the spoiler (contained in the button) const button = spoiler.find(BUTTON_CLASS).toArray().shift(); - content.Name = $(button).text().trim(); + content.name = $(button).text().trim(); // Parse the content of the spoiler spoiler.find(SPOILER_CONTENT_CLASS).contents().map((idx, el) => { @@ -74,21 +74,21 @@ function parseCheerioSpoilerNode($: cheerio.Root, spoiler: cheerio.Cheerio): IPo // Parse nested spoiler if (element.attr("class") === "bbCodeSpoiler") { const spoiler = parseCheerioSpoilerNode($, element); - content.Content.push(spoiler); + content.content.push(spoiler); } //@ts-ignore // else if (el.name === "br") { // // Add new line - // content.Text += "\n"; + // content.text += "\n"; // } else if (el.type === "text") { // Append text - content.Text += element.text(); + content.text += element.text(); } }); // Clean text - content.Text = content.Text.replace(/\s\s+/g, ' ').trim(); + content.text = content.text.replace(/\s\s+/g, ' ').trim(); return content; } @@ -125,28 +125,26 @@ function getCheerioNonChildrenText(node: cheerio.Cheerio): string { function parseCheerioLinkNode(element: cheerio.Cheerio): ILink | null { //@ts-ignore const name = element[0]?.name; - let returnValue: ILink = null; + const link: ILink = { + name: "", + type: "Link", + text: "", + href: "", + content: [] + }; if (name === "img") { - returnValue = { - Name: "", - Type: "Image", - Text: element.attr("alt"), - Href: element.attr("data-src"), - Content: [] - } + link.type = "Image"; + link.text = element.attr("alt"); + link.href = element.attr("data-src"); } else if (name === "a") { - returnValue = { - Name: "", - Type: "Link", - Text: element.text().replace(/\s\s+/g, ' ').trim(), - Href: element.attr("href"), - Content: [] - } + link.type = "Link"; + link.text = element.text().replace(/\s\s+/g, ' ').trim(); + link.href = element.attr("href"); } - return returnValue; + return link.href ? link : null; } /** @@ -154,21 +152,21 @@ function parseCheerioLinkNode(element: cheerio.Cheerio): ILink | null { * in the `Content` field in case it has no information. */ function reducePostElement(element: IPostElement): IPostElement { - if (element.Content.length === 1) { - const content = element.Content[0] as IPostElement; - const nullValues = (!element.Name || !content.Name) && (!element.Text || !content.Text); - const sameValues = (element.Name === content.Name) || (element.Text === content.Text) + if (element.content.length === 1) { + const content = element.content[0] as IPostElement; + const nullValues = (!element.name || !content.name) && (!element.text || !content.text); + const sameValues = (element.name === content.name) || (element.text === content.text) if (nullValues || sameValues) { - element.Name = element.Name || content.Name; - element.Text = element.Text || content.Text; - element.Content = content.Content; - element.Type = content.Type; + element.name = element.name || content.name; + element.text = element.text || content.text; + element.content = content.content; + element.type = content.type; // If the content is a link, add the HREF to the element const contentILink = content as ILink; const elementILink = element as ILink; - if (contentILink.Href) elementILink.Href = contentILink.Href; + if (contentILink.href) elementILink.href = contentILink.href; } } @@ -182,22 +180,22 @@ function reducePostElement(element: IPostElement): IPostElement { function parseCheerioNode($: cheerio.Root, node: cheerio.Element, reduce = true): IPostElement { // Local variables let content: IPostElement = { - Type: "Empty", - Name: "", - Text: "", - Content: [] + type: "Empty", + name: "", + text: "", + content: [] }; const cheerioNode = $(node); if (isTextNode(node)) { - content.Text = cheerioNode.text().replace(/\s\s+/g, ' ').trim(); - content.Type = "Text"; + content.text = cheerioNode.text().replace(/\s\s+/g, ' ').trim(); + content.type = "Text"; } else { // Get the number of children that the element own const nChildren = cheerioNode.children().length; // Get the text of the element without childrens - content.Text = getCheerioNonChildrenText(cheerioNode); + content.text = getCheerioNonChildrenText(cheerioNode); // Parse spoilers if (cheerioNode.attr("class") === "bbCodeSpoiler") { @@ -205,8 +203,8 @@ function parseCheerioNode($: cheerio.Root, node: cheerio.Element, reduce = true) // Add element if not null if (spoiler) { - content.Content.push(spoiler); - content.Type = "Spoiler"; + content.content.push(spoiler); + content.type = "Spoiler"; } } // Parse links @@ -215,8 +213,8 @@ function parseCheerioNode($: cheerio.Root, node: cheerio.Element, reduce = true) // Add element if not null if (link) { - content.Content.push(link); - content.Type = "Link"; + content.content.push(link); + content.type = "Link"; } } else { cheerioNode.children().map((idx, el) => { @@ -224,8 +222,8 @@ function parseCheerioNode($: cheerio.Root, node: cheerio.Element, reduce = true) const childElement = parseCheerioNode($, el); // If the children is valid (not empty) push it - if ((childElement.Text || childElement.Content.length !== 0) && !isTextNode(el)) { - content.Content.push(childElement); + if ((childElement.text || childElement.content.length !== 0) && !isTextNode(el)) { + content.content.push(childElement); } }); } @@ -246,41 +244,41 @@ function parsePostElements(elements: IPostElement[]): IPostElement[] { for (let i = 0; i < elements.length; i++) { // If the text starts with a special char, clean it - const startWithSpecial = specialRegex.test(elements[i].Text); + const startWithSpecial = specialRegex.test(elements[i].text); // /^[-!$%^&*()_+|~=`{}\[\]:";'<>?,.\/]/ // Get the uppercase text - const upperText = elements[i].Text.toUpperCase(); + const upperText = elements[i].text.toUpperCase(); // Get the latest IPostElement in "pairs" const lastIndex = pairs.length - 1; const lastPair = pairs[lastIndex]; // If this statement is valid, we have a "data" - if (elements[i].Type === "Text" && startWithSpecial && pairs.length > 0) { + if (elements[i].type === "Text" && startWithSpecial && pairs.length > 0) { // We merge this element with the last element appended to 'pairs' - const cleanText = elements[i].Text.replace(specialCharsRegex, "").trim(); - lastPair.Text = lastPair.Text || cleanText; - lastPair.Content.push(...elements[i].Content); + const cleanText = elements[i].text.replace(specialCharsRegex, "").trim(); + lastPair.text = lastPair.text || cleanText; + lastPair.content.push(...elements[i].content); } // This is a special case - else if (elements[i].Text.startsWith("Overview:\n")) { + else if (elements[i].text.startsWith("Overview:\n")) { // We add the overview to the pairs as a text element - elements[i].Type = "Text"; - elements[i].Name = "Overview"; - elements[i].Text = elements[i].Text.replace("Overview:\n", ""); + elements[i].type = "Text"; + elements[i].name = "Overview"; + elements[i].text = elements[i].text.replace("Overview:\n", ""); pairs.push(elements[i]); } // We have an element referred to the previous "title" - else if (elements[i].Type != "Text" && pairs.length > 0) { + else if (elements[i].type != "Text" && pairs.length > 0) { // We append this element to the content of the last title - lastPair.Content.push(elements[i]); + lastPair.content.push(elements[i]); } // ... else we have a "title" (we need to swap the text to the name because it is a title) else { const swap: IPostElement = Object.assign({}, elements[i]); - swap.Name = elements[i].Text; - swap.Text = ""; + swap.name = elements[i].text; + swap.text = ""; pairs.push(swap); } } diff --git a/src/scripts/scraper.ts b/src/scripts/scraper.ts index e4048ec..313fe24 100644 --- a/src/scripts/scraper.ts +++ b/src/scripts/scraper.ts @@ -7,13 +7,11 @@ import luxon from "luxon"; // Modules from file import shared from "./shared.js"; import { fetchHTML } from "./network-helper.js"; -import { getJSONLD, JSONLD } from "./json-ld.js"; +import { getJSONLD, TJsonLD } from "./json-ld.js"; import { selectors as f95Selector } from "./constants/css-selector.js"; import HandiWork from "./classes/handiwork/handiwork.js"; -import { RatingType, IBasic, AuthorType, ExternalPlatformType, EngineType, StatusType, CategoryType } from "./interfaces.js"; -import { login } from "../index.js"; +import { TRating, IBasic, TAuthor, TExternalPlatform, TEngine, TStatus, TCategory } from "./interfaces.js"; import { ILink, IPostElement, parseCheerioMainPost } from "./post-parser.js"; -import Game from "./classes/handiwork/game.js"; //#region Public methods /** @@ -34,16 +32,16 @@ export async function getPostInformation(url: string): Promise // Extract data const postData = parseCheerioMainPost($, mainPost); - const JSONLD = getJSONLD($, body); + const TJsonLD = getJSONLD(body); // Fill in the HandiWork element with the information obtained const hw: HandiWork = {} as HandiWork; - fillWithJSONLD(hw, JSONLD); + fillWithJSONLD(hw, TJsonLD); fillWithPostData(hw, postData); fillWithPrefixes(hw, body); - hw.Tags = extractTags(body); + hw.tags = extractTags(body); - shared.logger.info(`Founded data for ${hw.Name}`); + shared.logger.info(`Founded data for ${hw.name}`); return hw; }; //#endregion Public methods @@ -52,6 +50,10 @@ export async function getPostInformation(url: string): Promise //#region Generic Utility +/** + * Convert a string to a boolean. + * Check also for `yes`/`no` and `1`/`0`. + */ function stringToBoolean(s: string): boolean { // Local variables const positiveTerms = ["true", "yes", "1"]; @@ -67,15 +69,15 @@ function stringToBoolean(s: string): boolean { /** * It processes the evaluations of a particular work starting from the data contained in the JSON+LD tag. */ -function parseRating(data: JSONLD): RatingType { +function parseRating(data: TJsonLD): TRating { shared.logger.trace("Parsing rating..."); // Local variables - const ratingTree = data["aggregateRating"] as JSONLD; - const rating: RatingType = { - Average: parseFloat(ratingTree["ratingValue"] as string), - Best: parseInt(ratingTree["bestRating"] as string), - Count: parseInt(ratingTree["ratingCount"] as string), + const ratingTree = data["aggregateRating"] as TJsonLD; + const rating: TRating = { + average: parseFloat(ratingTree["ratingValue"] as string), + best: parseInt(ratingTree["bestRating"] as string), + count: parseInt(ratingTree["ratingCount"] as string), }; return rating; @@ -96,6 +98,11 @@ function extractIDFromURL(url: string): number { return parseInt(match[0], 10); } +/** + * Clean the title of a HandiWork, removing prefixes + * and generic elements between square brackets, and + * returns the clean title of the work. + */ function cleanHeadline(headline: string): string { shared.logger.trace("Cleaning headline..."); @@ -105,9 +112,7 @@ function cleanHeadline(headline: string): string { // Get the title name let name = headline; - matches.forEach(function replaceElementsInTitle(e) { - name = name.replace(e, ""); - }); + matches.forEach(e => name = name.replace(e, "")); return name.trim(); } @@ -117,7 +122,7 @@ function cleanHeadline(headline: string): string { */ function getPostElementByName(elements: IPostElement[], name: string): IPostElement | undefined { return elements.find(el => { - return el.Name.toUpperCase() === name.toUpperCase(); + return el.name.toUpperCase() === name.toUpperCase(); }); } @@ -164,6 +169,7 @@ function isMod(prefix: string): boolean { } //#endregion Prefix Utility + /** * Compiles a HandiWork object with the data extracted * from the JSON+LD tags related to the object itself. @@ -171,25 +177,25 @@ function isMod(prefix: string): boolean { * `URL`, `ID`, `Category`, `Rating`, * `Name`, `ThreadPublishingDate`, `LastThreadUpdate`. */ -function fillWithJSONLD(hw: HandiWork, data: JSONLD) { +function fillWithJSONLD(hw: HandiWork, data: TJsonLD) { shared.logger.trace("Extracting data from JSON+LD..."); // Set the basic values - hw.Url = data["@id"] as string; - hw.ID = extractIDFromURL(hw.Url); - hw.Category = data["articleSection"] as CategoryType; - hw.Rating = parseRating(data); - hw.Name = cleanHeadline(data["headline"] as string); + hw.url = data["@id"] as string; + hw.id = extractIDFromURL(hw.url); + hw.category = data["articleSection"] as TCategory; + hw.rating = parseRating(data); + hw.name = cleanHeadline(data["headline"] as string); // Check and set the dates const published = data["datePublished"] as string; if (luxon.DateTime.fromISO(published).isValid) { - hw.ThreadPublishingDate = new Date(published); + hw.threadPublishingDate = new Date(published); } const modified = data["dateModified"] as string; if (luxon.DateTime.fromISO(modified).isValid) { - hw.LastThreadUpdate = new Date(modified); + hw.lastThreadUpdate = new Date(modified); } } @@ -199,66 +205,70 @@ function fillWithJSONLD(hw: HandiWork, data: JSONLD) { * The values that will be added are: * `Overview`, `OS`, `Language`, `Version`, `Installation`, * `Pages`, `Resolution`, `Lenght`, `Genre`, `Censored`, - * `LastRelease`, `Authors`, `Changelog`. + * `LastRelease`, `Authors`, `Changelog`, `Cover`. */ function fillWithPostData(hw: HandiWork, elements: IPostElement[]) { // First fill the "simple" elements - hw.Overview = getPostElementByName(elements, "overview")?.Text; - hw.OS = getPostElementByName(elements, "os")?.Text?.split(",").map(s => s.trim()); - hw.Language = getPostElementByName(elements, "language")?.Text?.split(",").map(s => s.trim()); - hw.Version = getPostElementByName(elements, "version")?.Text; - hw.Installation = getPostElementByName(elements, "installation")?.Content.shift()?.Text; - hw.Pages = getPostElementByName(elements, "pages")?.Text; - hw.Resolution = getPostElementByName(elements, "resolution")?.Text?.split(",").map(s => s.trim()); - hw.Lenght = getPostElementByName(elements, "lenght")?.Text; + hw.overview = getPostElementByName(elements, "overview")?.text; + hw.os = getPostElementByName(elements, "os")?.text?.split(",").map(s => s.trim()); + hw.language = getPostElementByName(elements, "language")?.text?.split(",").map(s => s.trim()); + hw.version = getPostElementByName(elements, "version")?.text; + hw.installation = getPostElementByName(elements, "installation")?.content.shift()?.text; + hw.pages = getPostElementByName(elements, "pages")?.text; + hw.resolution = getPostElementByName(elements, "resolution")?.text?.split(",").map(s => s.trim()); + hw.lenght = getPostElementByName(elements, "lenght")?.text; // Parse the censorship const censored = getPostElementByName(elements, "censored") || getPostElementByName(elements, "censorship"); - if (censored) hw.Censored = stringToBoolean(censored.Text); + if (censored) hw.censored = stringToBoolean(censored.text); // Get the genres - const genre = getPostElementByName(elements, "genre")?.Content.shift()?.Text; - hw.Genre = genre?.split(",").map(s => s.trim()); + const genre = getPostElementByName(elements, "genre")?.content.shift()?.text; + hw.genre = genre?.split(",").map(s => s.trim()); + + // Get the cover + const cover = getPostElementByName(elements, "overview")?.content.find(el => el.type === "Image") as ILink; + hw.cover = cover?.href; // Fill the dates - const releaseDate = getPostElementByName(elements, "release date")?.Text; - if (luxon.DateTime.fromISO(releaseDate).isValid) hw.LastRelease = new Date(releaseDate); + const releaseDate = getPostElementByName(elements, "release date")?.text; + if (luxon.DateTime.fromISO(releaseDate).isValid) hw.lastRelease = new Date(releaseDate); //#region Convert the author const authorElement = getPostElementByName(elements, "developer") || getPostElementByName(elements, "developer/publisher") || getPostElementByName(elements, "artist"); - const author: AuthorType = { - Name: authorElement.Text, - Platforms: [] + const author: TAuthor = { + name: authorElement.text, + platforms: [] }; // Add the found platforms - authorElement?.Content.forEach((el: ILink, idx) => { - const platform: ExternalPlatformType = { - Name: el.Text, - Link: el.Href, + authorElement?.content.forEach((el: ILink, idx) => { + const platform: TExternalPlatform = { + name: el.text, + link: el.href, }; - author.Platforms.push(platform); + author.platforms.push(platform); }); - hw.Authors = [author]; + hw.authors = [author]; //#endregion Convert the author //#region Get the changelog - hw.Changelog = []; + hw.changelog = []; const changelogElement = getPostElementByName(elements, "changelog") || getPostElementByName(elements, "change-log"); - const changelogSpoiler = changelogElement?.Content.find(el => { - return el.Type === "Spoiler" && el.Content.length > 0; + const changelogSpoiler = changelogElement?.content.find(el => { + return el.type === "Spoiler" && el.content.length > 0; }); // Add to the changelog the single spoilers - changelogSpoiler.Content.forEach(el => { - if (el.Text.trim()) hw.Changelog.push(el.Text); + changelogSpoiler.content.forEach(el => { + if (el.text.trim()) hw.changelog.push(el.text); }); // Add at the ened also the text of the "changelog" element - hw.Changelog.push(changelogSpoiler.Text); + hw.changelog.push(changelogSpoiler.text); //#endregion Get the changelog } @@ -288,11 +298,11 @@ function fillWithPrefixes(hw: HandiWork, body: cheerio.Cheerio) { // Local variables let mod = false; - let engine: EngineType = null; - let status: StatusType = null; + let engine: TEngine = null; + let status: TStatus = null; // Initialize the array - hw.Prefixes = []; + hw.prefixes = []; // Obtain the title prefixes const prefixeElements = body.find(f95Selector.GT_TITLE_PREFIXES); @@ -305,20 +315,20 @@ function fillWithPrefixes(hw: HandiWork, body: cheerio.Cheerio) { prefix = prefix.replace("[", "").replace("]", ""); // Check what the prefix indicates - if (isEngine(prefix)) engine = prefix as EngineType; - else if (isStatus(prefix)) status = prefix as StatusType; + if (isEngine(prefix)) engine = prefix as TEngine; + else if (isStatus(prefix)) status = prefix as TStatus; else if (isMod(prefix)) mod = true; // Anyway add the prefix to list - hw.Prefixes.push(prefix); + hw.prefixes.push(prefix); }); // If the status is not set, then the game is in development (Ongoing) - status = (!status && hw.Category === "games") ? status : "Ongoing"; + status = (!status && hw.category === "games") ? status : "Ongoing"; - hw.Engine = engine; - hw.Status = status; - hw.Mod = mod; + hw.engine = engine; + hw.status = status; + hw.mod = mod; } //#endregion