F95API/src/scripts/classes/mapping/thread.ts

314 lines
8.3 KiB
TypeScript

// Copyright (c) 2021 MillenniumEarl
//
// This software is released under the MIT License.
// https://opensource.org/licenses/MIT
"use strict";
// Public modules from npm
import cheerio from "cheerio";
import luxon from "luxon";
// Modules from files
import Post from "./post.js";
import PlatformUser from "./platform-user.js";
import { TCategory, TRating } from "../../interfaces.js";
import { urls } from "../../constants/url.js";
import { POST, THREAD } from "../../constants/css-selector.js";
import { fetchHTML, fetchPOSTResponse } from "../../network-helper.js";
import Shared from "../../shared.js";
import {
GenericAxiosError,
ParameterError,
UnexpectedResponseContentType
} from "../errors.js";
import { Result } from "../result.js";
import { getJSONLD, TJsonLD } from "../../scrape-data/json-ld.js";
/**
* Represents a generic F95Zone platform thread.
*/
export default class Thread {
//#region Fields
private POST_FOR_PAGE = 20;
private _id: number;
private _url: string;
private _title: string;
private _tags: string[];
private _prefixes: string[];
private _rating: TRating;
private _owner: PlatformUser;
private _publication: Date;
private _modified: Date;
private _category: TCategory;
//#endregion Fields
//#region Getters
/**
* Unique ID of the thread on the platform.
*/
public get id(): number {
return this._id;
}
/**
* URL of the thread.
*
* It may vary depending on any versions of the contained product.
*/
public get url(): string {
return this._url;
}
/**
* Thread title.
*/
public get title(): string {
return this._title;
}
/**
* Tags associated with the thread.
*/
public get tags(): string[] {
return this._tags;
}
/**
* Prefixes associated with the thread
*/
public get prefixes(): string[] {
return this._prefixes;
}
/**
* Rating assigned to the thread.
*/
public get rating(): TRating {
return this._rating;
}
/**
* Owner of the thread.
*/
public get owner(): PlatformUser {
return this._owner;
}
/**
* Date the thread was first published.
*/
public get publication(): Date {
return this._publication;
}
/**
* Date the thread was last modified.
*/
public get modified(): Date {
return this._modified;
}
/**
* Category to which the content of the thread belongs.
*/
public get category(): TCategory {
return this._category;
}
//#endregion Getters
/**
* Initializes an object for mapping a thread.
*
* The unique ID of the thread must be specified.
*/
constructor(id: number) {
this._id = id;
}
//#region Private methods
/**
* Set the number of posts to display for the current thread.
*/
private async setMaximumPostsForPage(n: 20 | 40 | 60 | 100): Promise<void> {
// Prepare the parameters to send via POST request
const params = {
_xfResponseType: "json",
_xfRequestUri: `/account/dpp-update?content_type=thread&content_id=${this.id}`,
_xfToken: Shared.session.token,
_xfWithData: "1",
content_id: this.id.toString(),
content_type: "thread",
"dpp_custom_config[posts]": n.toString()
};
// Send POST request
const response = await fetchPOSTResponse(urls.POSTS_NUMBER, params);
if (response.isFailure()) throw response.value;
}
/**
* Gets all posts on a page.
*/
private parsePostsInPage(html: string): Post[] {
// Load the HTML
const $ = cheerio.load(html);
// Start parsing the posts
const posts = $(THREAD.POSTS_IN_PAGE)
.toArray()
.map((el, idx) => {
const id = $(el).find(POST.ID).attr("id").replace("post-", "");
return new Post(parseInt(id, 10));
});
// Wait for the post to be fetched
return posts;
}
/**
* Gets all posts in the thread.
*/
private async fetchPosts(pages: number): Promise<Post[]> {
// Local variables
type TFetchResult = Promise<
Result<GenericAxiosError | UnexpectedResponseContentType, string>
>;
const htmlPromiseList: TFetchResult[] = [];
const fetchedPosts: Post[] = [];
// Fetch posts for every page in the thread
for (let i = 1; i <= pages; i++) {
// Prepare the URL
const url = new URL(`page-${i}`, `${this.url}/`).toString();
// Fetch the HTML source
const htmlResponse = fetchHTML(url);
htmlPromiseList.push(htmlResponse);
}
// Wait for all the pages to load
const responses = await Promise.all(htmlPromiseList);
// Scrape the pages
for (const response of responses) {
if (response.isSuccess()) {
const posts = this.parsePostsInPage(response.value);
fetchedPosts.push(...posts);
} else throw response.value;
}
// Sorts the list of posts
return fetchedPosts.sort((a, b) => (a.id > b.id ? 1 : b.id > a.id ? -1 : 0));
}
/**
* It processes the rating of the thread
* starting from the data contained in the JSON+LD tag.
*/
private parseRating(data: TJsonLD): TRating {
const ratingTree = data["aggregateRating"] as TJsonLD;
const rating: TRating = {
average: ratingTree ? parseFloat(ratingTree["ratingValue"] as string) : 0,
best: ratingTree ? parseInt(ratingTree["bestRating"] as string, 10) : 0,
count: ratingTree ? parseInt(ratingTree["ratingCount"] as string, 10) : 0
};
return rating;
}
/**
* Clean the title of a thread, removing prefixes
* and generic elements between square brackets, and
* returns the clean title of the work.
*/
private cleanHeadline(headline: string): string {
// From the title we can extract: Name, author and version
// [PREFIXES] TITLE [VERSION] [AUTHOR]
const matches = headline.match(/\[(.*?)\]/g);
// Get the title name
let name = headline;
if (matches) matches.forEach((e) => (name = name.replace(e, "")));
return name.trim();
}
//#endregion Private methods
//#region Public methods
/**
* Gets information about this thread.
*/
public async fetch(): Promise<void> {
// Prepare the url
this._url = new URL(this.id.toString(), urls.THREADS).toString();
// Fetch the HTML source
const htmlResponse = await fetchHTML(this.url);
if (htmlResponse.isSuccess()) {
// Load the HTML
const $ = cheerio.load(htmlResponse.value);
// Fetch data from selectors
const ownerID = $(THREAD.OWNER_ID).attr("data-user-id");
const tagArray = $(THREAD.TAGS).toArray();
const prefixArray = $(THREAD.PREFIXES).toArray();
const JSONLD = getJSONLD($("body"));
const published = JSONLD["datePublished"] as string;
const modified = JSONLD["dateModified"] as string;
// Parse the thread's data
this._title = this.cleanHeadline(JSONLD["headline"] as string);
this._tags = tagArray.map((el) => $(el).text().trim());
this._prefixes = prefixArray.map((el) => $(el).text().trim());
this._owner = new PlatformUser(parseInt(ownerID, 10));
await this._owner.fetch();
this._rating = this.parseRating(JSONLD);
this._category = JSONLD["articleSection"] as TCategory;
// Validate the dates
if (luxon.DateTime.fromISO(modified).isValid) this._modified = new Date(modified);
if (luxon.DateTime.fromISO(published).isValid)
this._publication = new Date(published);
} else throw htmlResponse.value;
}
/**
* Gets the post in the `index` position with respect to the posts in the thread.
*
* `index` must be greater or equal to 1.
* If the post is not found, `null` is returned.
*/
public async getPost(index: number): Promise<Post | null> {
// Validate parameters
if (index < 1) throw new ParameterError("Index must be greater or equal than 1");
// Local variables
let returnValue = null;
// Get the page number of the post
const page = Math.ceil(index / this.POST_FOR_PAGE);
// Fetch the page
const url = new URL(`page-${page}`, `${this.url}/`).toString();
const htmlResponse = await fetchHTML(url);
if (htmlResponse.isSuccess()) {
// Parse the post
const posts = this.parsePostsInPage(htmlResponse.value);
// Find the searched post
for (const p of posts) {
await p.fetch();
if (p.number === index) {
returnValue = p;
break;
}
}
return returnValue;
} else throw htmlResponse.value;
}
//#endregion Public methods
}