diff --git a/src/scripts/classes/handiwork/animation.ts b/src/scripts/classes/handiwork/animation.ts index d097cc8..65c8447 100644 --- a/src/scripts/classes/handiwork/animation.ts +++ b/src/scripts/classes/handiwork/animation.ts @@ -6,7 +6,7 @@ "use strict"; // Modules from files -import { TAuthor, IAnimation, TRating, TCategory } from "../../interfaces"; +import { TAuthor, IAnimation, TRating, TCategory, TChangelog } from "../../interfaces"; export default class Animation implements IAnimation { //#region Properties @@ -19,7 +19,7 @@ export default class Animation implements IAnimation { resolution: string[]; authors: TAuthor[]; category: TCategory; - changelog: string[]; + changelog: TChangelog[]; cover: string; id: number; lastThreadUpdate: Date; diff --git a/src/scripts/classes/handiwork/asset.ts b/src/scripts/classes/handiwork/asset.ts index b18a9e5..5a434be 100644 --- a/src/scripts/classes/handiwork/asset.ts +++ b/src/scripts/classes/handiwork/asset.ts @@ -6,7 +6,7 @@ "use strict"; // Modules from files -import { TAuthor, IAsset, TRating, TCategory } from "../../interfaces"; +import { TAuthor, IAsset, TRating, TCategory, TChangelog } from "../../interfaces"; export default class Asset implements IAsset { //#region Properties @@ -18,7 +18,7 @@ export default class Asset implements IAsset { sku: string; authors: TAuthor[]; category: TCategory; - changelog: string[]; + changelog: TChangelog[]; cover: string; id: number; lastThreadUpdate: Date; diff --git a/src/scripts/classes/handiwork/comic.ts b/src/scripts/classes/handiwork/comic.ts index 6bf713a..ff5b795 100644 --- a/src/scripts/classes/handiwork/comic.ts +++ b/src/scripts/classes/handiwork/comic.ts @@ -6,7 +6,7 @@ "use strict"; // Modules from files -import { TAuthor, IComic, TRating, TCategory } from "../../interfaces"; +import { TAuthor, IComic, TRating, TCategory, TChangelog } from "../../interfaces"; export default class Comic implements IComic { //#region Properties @@ -15,7 +15,7 @@ export default class Comic implements IComic { resolution: string[]; authors: TAuthor[]; category: TCategory; - changelog: string[]; + changelog: TChangelog[]; cover: string; id: number; lastThreadUpdate: Date; diff --git a/src/scripts/classes/handiwork/game.ts b/src/scripts/classes/handiwork/game.ts index d422668..80a71d5 100644 --- a/src/scripts/classes/handiwork/game.ts +++ b/src/scripts/classes/handiwork/game.ts @@ -6,7 +6,7 @@ "use strict"; // Modules from files -import { TAuthor, TEngine, IGame, TRating, TStatus, TCategory } from "../../interfaces"; +import { TAuthor, TEngine, IGame, TRating, TStatus, TCategory, TChangelog } from "../../interfaces"; export default class Game implements IGame { //#region Properties @@ -22,7 +22,7 @@ export default class Game implements IGame { version: string; authors: TAuthor[]; category: TCategory; - changelog: string[]; + changelog: TChangelog[]; cover: string; id: number; lastThreadUpdate: Date; diff --git a/src/scripts/classes/handiwork/handiwork.ts b/src/scripts/classes/handiwork/handiwork.ts index cb68d27..c558c1a 100644 --- a/src/scripts/classes/handiwork/handiwork.ts +++ b/src/scripts/classes/handiwork/handiwork.ts @@ -6,7 +6,15 @@ "use strict"; // Modules from files -import { TAuthor, TRating, IHandiwork, TEngine, TCategory, TStatus } from "../../interfaces"; +import { + TAuthor, + TRating, + IHandiwork, + TEngine, + TCategory, + TStatus, + TChangelog +} from "../../interfaces"; /** * It represents a generic work, be it a game, a comic, an animation or an asset. @@ -25,7 +33,7 @@ export default class HandiWork implements IHandiwork { version: string; authors: TAuthor[]; category: TCategory; - changelog: string[]; + changelog: TChangelog[]; cover: string; id: number; lastThreadUpdate: Date; diff --git a/src/scripts/interfaces.ts b/src/scripts/interfaces.ts index 33916e6..64b1bae 100644 --- a/src/scripts/interfaces.ts +++ b/src/scripts/interfaces.ts @@ -51,6 +51,20 @@ export type TRating = { count: number; }; +/** + * Information about a single version of the product. + */ +export type TChangelog = { + /** + * Product version. + */ + version: string; + /** + * Version information. + */ + information: string[]; +}; + /** * List of possible graphics engines used for game development. */ @@ -101,7 +115,7 @@ export interface IBasic { /** * List of changes of the work for each version. */ - changelog: string[]; + changelog: TChangelog[]; /** * link to the cover image of the work. */ diff --git a/src/scripts/scrape-data/handiwork-parse.ts b/src/scripts/scrape-data/handiwork-parse.ts index 764a904..1699a19 100644 --- a/src/scripts/scrape-data/handiwork-parse.ts +++ b/src/scripts/scrape-data/handiwork-parse.ts @@ -11,7 +11,7 @@ import { DateTime } from "luxon"; // Modules from files import HandiWork from "../classes/handiwork/handiwork"; import Thread from "../classes/mapping/thread"; -import { IBasic, TAuthor, TEngine, TExternalPlatform, TStatus } from "../interfaces"; +import { IBasic, TAuthor, TChangelog, TEngine, TExternalPlatform, TStatus } from "../interfaces"; import shared, { TPrefixDict } from "../shared"; import { ILink, IPostElement } from "./post-parse"; @@ -193,7 +193,7 @@ function fillWithPostData(hw: HandiWork, elements: IPostElement[]) { ?.text?.split(",") .map((s) => s.trim()); hw.version = getPostElementByName(elements, "version")?.text; - hw.installation = getPostElementByName(elements, "installation")?.content.shift()?.text; + hw.installation = getPostElementByName(elements, "installation")?.text; hw.pages = getPostElementByName(elements, "pages")?.text; hw.resolution = getPostElementByName(elements, "resolution") ?.text?.split(",") @@ -206,64 +206,112 @@ function fillWithPostData(hw: HandiWork, elements: IPostElement[]) { if (censored) hw.censored = stringToBoolean(censored.text); // Get the genres - const genre = getPostElementByName(elements, "genre")?.content.shift()?.text; + const genre = getPostElementByName(elements, "genre")?.text; hw.genre = genre ?.split(",") .map((s) => s.trim()) .filter((s) => s !== ""); // Get the cover - const cover = getPostElementByName(elements, "overview")?.content.find( - (el) => el.type === "Image" - ) as ILink; + const cover = elements.find((e) => e.type === "Image") as ILink; hw.cover = cover?.href; // Fill the dates const releaseDate = getPostElementByName(elements, "release date")?.text; if (DateTime.fromISO(releaseDate).isValid) hw.lastRelease = new Date(releaseDate); - //#region Convert the author + // Get the author + hw.authors = parseAuthor(elements); + + // Get the changelog + hw.changelog = parseChangelog(elements); +} + +/** + * Parse the author from the post's data. + */ +function parseAuthor(elements: IPostElement[]): TAuthor[] { + // Local variables + const author: TAuthor = { + name: "", + platforms: [] + }; + + // Fetch the authors from the post data const authorElement = getPostElementByName(elements, "developer") || getPostElementByName(elements, "developer/publisher") || getPostElementByName(elements, "artist"); - const author: TAuthor = { - name: authorElement?.text, - platforms: [] - }; - // Add the found platforms - authorElement?.content.forEach((el: ILink, idx) => { - const platform: TExternalPlatform = { - name: el.text, - link: el.href - }; + if (authorElement) { + // Set the author name + author.name = authorElement.text; - author.platforms.push(platform); - }); - hw.authors = [author]; - //#endregion Convert the author + // Add the found platforms + authorElement.content.forEach((e: ILink) => { + // Ignore invalid links + if (e.href) { + // Create and push the new platform + const platform: TExternalPlatform = { + name: e.text, + link: e.href + }; - //#region Get the changelog - hw.changelog = []; + author.platforms.push(platform); + } + }); + } + + return [author]; +} + +/** + * Parse the changelog from the post's data. + */ +function parseChangelog(elements: IPostElement[]): TChangelog[] { + // Local variables + const changelog = []; const changelogElement = getPostElementByName(elements, "changelog") || getPostElementByName(elements, "change-log"); - if (changelogElement?.content) { - const changelogSpoiler = changelogElement.content.find( - (el) => el.type === "Spoiler" && el.content.length > 0 - ); + if (changelogElement) { + // regex used to match version tags + const versionRegex = /^v[0-9]+\.[0-9]+.*/; - // Add to the changelog the single spoilers - const spoilers = changelogSpoiler.content - .filter((e) => e.text.trim() !== "") - .map((e) => e.text); - hw.changelog.push(...spoilers); + // Get the indexes of the version tags + const indexesVersion = changelogElement.content + .filter((e) => e.type === "Text" && versionRegex.test(e.text)) + .map((e) => changelogElement.content.indexOf(e)); - // Add at the end also the text of the "changelog" element - hw.changelog.push(changelogSpoiler.text); + const results = indexesVersion.map((i, j) => { + // In-loop variable + const versionChangelog: TChangelog = { + version: "", + information: [] + }; + + // Get the difference in indexes between this and the next version tag + const diff = indexesVersion[j + 1] ?? changelogElement.content.length; + + // fetch the group of data of this version tag + const group = changelogElement.content.slice(i, diff); + versionChangelog.version = group.shift().text.replace("v", "").trim(); + + // parse the data + group.forEach((e) => { + if (e.type === "Generic" || e.type === "Spoiler") { + const textes = e.content.map((c) => c.text); + versionChangelog.information.push(...textes); + } else versionChangelog.information.push(e.text); + }); + + return versionChangelog; + }); + + changelog.push(...results); } - //#endregion Get the changelog + + return changelog; } //#endregion Private methods diff --git a/src/scripts/scrape-data/post-parse.ts b/src/scripts/scrape-data/post-parse.ts index c7218e9..3837005 100644 --- a/src/scripts/scrape-data/post-parse.ts +++ b/src/scripts/scrape-data/post-parse.ts @@ -8,17 +8,41 @@ // Import from files import { POST } from "../constants/css-selector"; +// Types +type TNodeType = "Text" | "Formatted" | "Spoiler" | "Link" | "List" | "Noscript" | "Unknown"; + //#region Interfaces +/** + * Represents an element contained in the post. + */ export interface IPostElement { + /** + * Type of element. + */ type: "Generic" | "Text" | "Link" | "Image" | "Spoiler"; + /** + * Name associated with the element. + */ name: string; + /** + * Text of the content of the element excluding any children. + */ text: string; + /** + * Children elements contained in this element. + */ content: IPostElement[]; } +/** + * Represents a link type link in the post. + */ export interface ILink extends IPostElement { type: "Image" | "Link"; + /** + * Link to the resource. + */ href: string; } @@ -54,7 +78,7 @@ export function parseF95ThreadPost($: cheerio.Root, post: cheerio.Cheerio): IPos supernode = removeEmptyContentFromElement(supernode); // Finally parse the elements to create the pairs of title/data - return associateNameToElements(supernode.content); + return pairUpElements(supernode.content); } //#endregion Public methods @@ -94,8 +118,8 @@ function isLinkNode(node: cheerio.Element): boolean { // The node is a valid DOM element if (node.type === "tag") { - const el = node as cheerio.TagElement; - valid = el.name === "a" || el.name === "img"; + const e = node as cheerio.TagElement; + valid = e.name === "a" || e.name === "img"; } return valid; @@ -108,6 +132,32 @@ function isNoScriptNode(node: cheerio.Element): boolean { return node.type === "tag" && node.name === "noscript"; } +/** + * Check if the node is a list element, i.e. `
  • ` or `