From 99a1a3cbe4558f60c91f7dc46a7dbfe86db742cb Mon Sep 17 00:00:00 2001 From: MillenniumEarl Date: Tue, 2 Mar 2021 12:08:20 +0100 Subject: [PATCH] Implement information from JSONLD --- src/scripts/classes/thread.ts | 48 +++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/src/scripts/classes/thread.ts b/src/scripts/classes/thread.ts index 1122b21..9b827f4 100644 --- a/src/scripts/classes/thread.ts +++ b/src/scripts/classes/thread.ts @@ -2,11 +2,12 @@ // Public modules from npm import cheerio from "cheerio"; +import luxon from "luxon"; // Modules from files import Post from "./post"; import PlatformUser from "./platform-user"; -import { TRating } from "../interfaces"; +import { TCategory, TRating } from "../interfaces"; import { urls } from "../constants/url"; import { THREAD } from "../constants/css-selector"; import { fetchHTML, fetchPOSTResponse } from "../network-helper"; @@ -30,7 +31,9 @@ export default class Thread { private _posts: Post[]; private _rating: TRating; private _owner: PlatformUser; - private _creation: Date; + private _publication: Date; + private _modified: Date; + private _category: TCategory; //#endregion Fields @@ -49,7 +52,7 @@ export default class Thread { /** * Thread title. */ - public get title() { return this._title; }; + public get title() { return this._title; } /** * Tags associated with the thread. */ @@ -71,9 +74,17 @@ export default class Thread { */ public get owner() { return this._owner; } /** - * Creation date of the thread. + * Date the thread was first published. */ - public get creation() { return this._creation; } + public get publication() { return this._publication; } + /** + * Date the thread was last modified. + */ + public get modified() { return this._modified; } + /** + * Category to which the content of the thread belongs. + */ + public get category() { return this._category; } //#endregion Getters @@ -182,6 +193,22 @@ export default class Thread { return rating; } + /** + * Clean the title of a thread, removing prefixes + * and generic elements between square brackets, and + * returns the clean title of the work. + */ + private cleanHeadline(headline: string): string { + // From the title we can extract: Name, author and version + // [PREFIXES] TITLE [VERSION] [AUTHOR] + const matches = headline.match(/\[(.*?)\]/g); + + // Get the title name + let name = headline; + matches.forEach(e => name = name.replace(e, "")); + return name.trim(); + } + //#endregion Private methods //#region Public methods @@ -201,19 +228,24 @@ export default class Thread { const $ = cheerio.load(htmlResponse.value); // Fetch data from selectors - const creationDatetime = $(THREAD.CREATION).attr("datetime"); const ownerID = $(THREAD.OWNER_ID).attr("data-user-id"); const tagArray = $(THREAD.TAGS).toArray(); const prefixArray = $(THREAD.PREFIXES).toArray(); const JSONLD = getJSONLD($("body")); + const published = JSONLD["datePublished"] as string; + const modified = JSONLD["dateModified"] as string; // Parse the thread's data - this._title = $(THREAD.TITLE).text(); - this._creation = new Date(creationDatetime); + this._title = this.cleanHeadline(JSONLD["headline"] as string); this._tags = tagArray.map(el => $(el).text().trim()); this._prefixes = prefixArray.map(el => $(el).text().trim()); this._owner = new PlatformUser(parseInt(ownerID)); this._rating = this.parseRating(JSONLD); + this._category = JSONLD["articleSection"] as TCategory; + + // Validate the dates + if (luxon.DateTime.fromISO(modified).isValid) this._modified = new Date(modified); + if (luxon.DateTime.fromISO(published).isValid) this._publication = new Date(published); // Parse all the posts const pages = parseInt($(THREAD.LAST_PAGE).first().text());