Merge pull request #81 from MillenniumEarl/simplify-post-parse

Simplify post parse
2.0.0-ts
Millennium Earl 2021-03-15 21:37:13 +01:00 committed by GitHub
commit 94d6f3667b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 275 additions and 236 deletions

View File

@ -2,5 +2,5 @@
"semi": true, "semi": true,
"trailingComma": "none", "trailingComma": "none",
"singleQuote": false, "singleQuote": false,
"printWidth": 90 "printWidth": 100
} }

13
.vscode/launch.json vendored
View File

@ -5,7 +5,18 @@
"name": "Test", "name": "Test",
"request": "launch", "request": "launch",
"command": "npm run test", "command": "npm run test",
"cwd": "${workspaceFolder}" "cwd": "${workspaceFolder}",
},
{
"type": "node-terminal",
"name": "Example",
"request": "launch",
"command": "npm run run-example",
"cwd": "${workspaceFolder}",
"skipFiles": [
"${workspaceFolder}/node_modules/**/*",
"<node_internals>/**/*"
]
}, },
] ]
} }

13
package-lock.json generated
View File

@ -22,7 +22,6 @@
"@types/chai": "^4.2.15", "@types/chai": "^4.2.15",
"@types/chai-as-promised": "^7.1.3", "@types/chai-as-promised": "^7.1.3",
"@types/inquirer": "^7.3.1", "@types/inquirer": "^7.3.1",
"@types/lodash": "^4.14.168",
"@types/luxon": "^1.25.2", "@types/luxon": "^1.25.2",
"@types/mocha": "^8.2.1", "@types/mocha": "^8.2.1",
"@types/node": "^14.14.27", "@types/node": "^14.14.27",
@ -519,12 +518,6 @@
"integrity": "sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==", "integrity": "sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==",
"dev": true "dev": true
}, },
"node_modules/@types/lodash": {
"version": "4.14.168",
"resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.168.tgz",
"integrity": "sha512-oVfRvqHV/V6D1yifJbVRU3TMp8OT6o6BG+U9MkwuJ3U8/CsDHvalRpsxBqivn71ztOFZBTfJMvETbqHiaNSj7Q==",
"dev": true
},
"node_modules/@types/luxon": { "node_modules/@types/luxon": {
"version": "1.25.2", "version": "1.25.2",
"resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-1.25.2.tgz", "resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-1.25.2.tgz",
@ -5482,12 +5475,6 @@
"integrity": "sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==", "integrity": "sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==",
"dev": true "dev": true
}, },
"@types/lodash": {
"version": "4.14.168",
"resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.168.tgz",
"integrity": "sha512-oVfRvqHV/V6D1yifJbVRU3TMp8OT6o6BG+U9MkwuJ3U8/CsDHvalRpsxBqivn71ztOFZBTfJMvETbqHiaNSj7Q==",
"dev": true
},
"@types/luxon": { "@types/luxon": {
"version": "1.25.2", "version": "1.25.2",
"resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-1.25.2.tgz", "resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-1.25.2.tgz",

View File

@ -48,7 +48,6 @@
"@types/chai": "^4.2.15", "@types/chai": "^4.2.15",
"@types/chai-as-promised": "^7.1.3", "@types/chai-as-promised": "^7.1.3",
"@types/inquirer": "^7.3.1", "@types/inquirer": "^7.3.1",
"@types/lodash": "^4.14.168",
"@types/luxon": "^1.25.2", "@types/luxon": "^1.25.2",
"@types/mocha": "^8.2.1", "@types/mocha": "^8.2.1",
"@types/node": "^14.14.27", "@types/node": "^14.14.27",

View File

@ -60,11 +60,7 @@ async function main() {
// Log in the platform // Log in the platform
console.log("Authenticating..."); console.log("Authenticating...");
const result = await login( const result = await login(process.env.F95_USERNAME, process.env.F95_PASSWORD, insert2faCode);
process.env.F95_USERNAME,
process.env.F95_PASSWORD,
insert2faCode
);
console.log(`Authentication result: ${result.message}\n`); console.log(`Authentication result: ${result.message}\n`);
// Manage failed login // Manage failed login
@ -87,9 +83,7 @@ async function main() {
latestQuery.includedTags = ["3d game"]; latestQuery.includedTags = ["3d game"];
const latestUpdates = await getLatestUpdates<Game>(latestQuery, 1); const latestUpdates = await getLatestUpdates<Game>(latestQuery, 1);
console.log( console.log(`"${latestUpdates.shift().name}" was the last "3d game" tagged game to be updated\n`);
`"${latestUpdates.shift().name}" was the last "3d game" tagged game to be updated\n`
);
// Get game data // Get game data
for (const gamename of gameList) { for (const gamename of gameList) {

View File

@ -6,14 +6,7 @@
"use strict"; "use strict";
// Modules from files // Modules from files
import { import { TAuthor, TRating, IHandiwork, TEngine, TCategory, TStatus } from "../../interfaces";
TAuthor,
TRating,
IHandiwork,
TEngine,
TCategory,
TStatus
} from "../../interfaces";
/** /**
* It represents a generic work, be it a game, a comic, an animation or an asset. * It represents a generic work, be it a game, a comic, an animation or an asset.

View File

@ -96,8 +96,7 @@ export default class HandiworkSearchQuery implements IQuery {
// If the keywords are set or the number // If the keywords are set or the number
// of included tags is greather than 5, // of included tags is greather than 5,
// we must perform a thread search // we must perform a thread search
if (this.keywords || this.includedTags.length > MAX_TAGS_LATEST_SEARCH) if (this.keywords || this.includedTags.length > MAX_TAGS_LATEST_SEARCH) return "thread";
return "thread";
return DEFAULT_SEARCH_TYPE; return DEFAULT_SEARCH_TYPE;
} }

View File

@ -130,8 +130,7 @@ export default class ThreadSearchQuery implements IQuery {
if (this.excludedTags) params["c[excludeTags]"] = this.excludedTags.join(","); if (this.excludedTags) params["c[excludeTags]"] = this.excludedTags.join(",");
// Set minimum reply number // Set minimum reply number
if (this.minimumReplies > 0) if (this.minimumReplies > 0) params["c[min_reply_count]"] = this.minimumReplies.toString();
params["c[min_reply_count]"] = this.minimumReplies.toString();
// Add prefixes // Add prefixes
const parser = new PrefixParser(); const parser = new PrefixParser();

View File

@ -210,8 +210,8 @@ export default class Session {
// Search for expired cookies // Search for expired cookies
const jarValid = const jarValid =
this._cookieJar.getCookiesSync("https://f95zone.to").filter((el) => el.TTL() === 0) this._cookieJar.getCookiesSync("https://f95zone.to").filter((el) => el.TTL() === 0).length ===
.length === 0; 0;
return dateValid && hashValid && jarValid; return dateValid && hashValid && jarValid;
} }

View File

@ -154,7 +154,15 @@ export const POST = {
* *
* For use within a `THREAD.POSTS_IN_PAGE` selector. * For use within a `THREAD.POSTS_IN_PAGE` selector.
*/ */
BOOKMARKED: '* ul.message-attribution-opposite >li > a[title="Bookmark"].is-bookmarked' BOOKMARKED: '* ul.message-attribution-opposite >li > a[title="Bookmark"].is-bookmarked',
/**
* Button used to hide/show a spoiler element of a post.
*/
SPOILER_BUTTON: "button.bbCodeSpoiler-button",
/**
* Contents of a spoiler element in a post.
*/
SPOILER_CONTENT: "div.bbCodeSpoiler-content > div.bbCodeBlock--spoiler > div.bbCodeBlock-content"
}; };
export const MEMBER = { export const MEMBER = {
@ -205,8 +213,7 @@ export const MEMBER = {
* If the text is `Unfollow` then the user is followed. * If the text is `Unfollow` then the user is followed.
* If the text is `Follow` then the user is not followed. * If the text is `Follow` then the user is not followed.
*/ */
FOLLOWED: FOLLOWED: "div.memberHeader-buttons > div.buttonGroup:first-child > a[data-sk-follow] > span",
"div.memberHeader-buttons > div.buttonGroup:first-child > a[data-sk-follow] > span",
/** /**
* Button used to ignore/unignore the user. * Button used to ignore/unignore the user.
* *

View File

@ -83,10 +83,7 @@ export type TCategory = "games" | "mods" | "comics" | "animations" | "assets";
/** /**
* Valid names of classes that implement the IQuery interface. * Valid names of classes that implement the IQuery interface.
*/ */
export type TQueryInterface = export type TQueryInterface = "LatestSearchQuery" | "ThreadSearchQuery" | "HandiworkSearchQuery";
| "LatestSearchQuery"
| "ThreadSearchQuery"
| "HandiworkSearchQuery";
/** /**
* Collection of values defined for each * Collection of values defined for each

View File

@ -86,9 +86,7 @@ export async function fetchHTML(
error: null error: null
}); });
return isHTML return isHTML ? success(response.value.data as string) : failure(unexpectedResponseError);
? success(response.value.data as string)
: failure(unexpectedResponseError);
} else return failure(response.value as GenericAxiosError); } else return failure(response.value as GenericAxiosError);
} }
@ -105,8 +103,7 @@ export async function authenticate(
force: boolean = false force: boolean = false
): Promise<LoginResult> { ): Promise<LoginResult> {
shared.logger.info(`Authenticating with user ${credentials.username}`); shared.logger.info(`Authenticating with user ${credentials.username}`);
if (!credentials.token) if (!credentials.token) throw new InvalidF95Token(`Invalid token for auth: ${credentials.token}`);
throw new InvalidF95Token(`Invalid token for auth: ${credentials.token}`);
// Secure the URL // Secure the URL
const secureURL = enforceHttpsUrl(urls.LOGIN); const secureURL = enforceHttpsUrl(urls.LOGIN);
@ -216,9 +213,7 @@ export async function fetchGETResponse(
const response = await axios.get(secureURL, commonConfig); const response = await axios.get(secureURL, commonConfig);
return success(response); return success(response);
} catch (e) { } catch (e) {
shared.logger.error( shared.logger.error(`(GET) Error ${e.message} occurred while trying to fetch ${secureURL}`);
`(GET) Error ${e.message} occurred while trying to fetch ${secureURL}`
);
const genericError = new GenericAxiosError({ const genericError = new GenericAxiosError({
id: 1, id: 1,
message: `(GET) Error ${e.message} occurred while trying to fetch ${secureURL}`, message: `(GET) Error ${e.message} occurred while trying to fetch ${secureURL}`,
@ -305,10 +300,7 @@ export function isStringAValidURL(url: string): boolean {
* If `true`, the function will consider redirects a violation and return `false`. * If `true`, the function will consider redirects a violation and return `false`.
* Default: `false` * Default: `false`
*/ */
export async function urlExists( export async function urlExists(url: string, checkRedirect: boolean = false): Promise<boolean> {
url: string,
checkRedirect: boolean = false
): Promise<boolean> {
// Local variables // Local variables
let valid = false; let valid = false;
@ -376,10 +368,7 @@ function manageLoginPOSTResponse(response: AxiosResponse<any>) {
} }
// Get the error message (if any) and remove the new line chars // Get the error message (if any) and remove the new line chars
const errorMessage = $("body") const errorMessage = $("body").find(GENERIC.LOGIN_MESSAGE_ERROR).text().replace(/\n/g, "");
.find(GENERIC.LOGIN_MESSAGE_ERROR)
.text()
.replace(/\n/g, "");
// Return the result of the authentication // Return the result of the authentication
const result = errorMessage.trim() === ""; const result = errorMessage.trim() === "";

View File

@ -122,10 +122,7 @@ function stringToBoolean(s: string): boolean {
* *
* Case-insensitive. * Case-insensitive.
*/ */
function getPostElementByName( function getPostElementByName(elements: IPostElement[], name: string): IPostElement | undefined {
elements: IPostElement[],
name: string
): IPostElement | undefined {
return elements.find((el) => el.name.toUpperCase() === name.toUpperCase()); return elements.find((el) => el.name.toUpperCase() === name.toUpperCase());
} }
@ -162,8 +159,7 @@ function fillWithPrefixes(hw: HandiWork, prefixes: string[]) {
// Check what the prefix indicates // Check what the prefix indicates
if (stringInDict(prefix, shared.prefixes["engines"])) engine = prefix as TEngine; if (stringInDict(prefix, shared.prefixes["engines"])) engine = prefix as TEngine;
else if (stringInDict(prefix, shared.prefixes["statuses"])) else if (stringInDict(prefix, shared.prefixes["statuses"])) status = prefix as TStatus;
status = prefix as TStatus;
else if (stringInDict(prefix, fakeModDict)) mod = true; else if (stringInDict(prefix, fakeModDict)) mod = true;
// Anyway add the prefix to list // Anyway add the prefix to list
@ -206,8 +202,7 @@ function fillWithPostData(hw: HandiWork, elements: IPostElement[]) {
// Parse the censorship // Parse the censorship
const censored = const censored =
getPostElementByName(elements, "censored") || getPostElementByName(elements, "censored") || getPostElementByName(elements, "censorship");
getPostElementByName(elements, "censorship");
if (censored) hw.censored = stringToBoolean(censored.text); if (censored) hw.censored = stringToBoolean(censored.text);
// Get the genres // Get the genres
@ -249,8 +244,7 @@ function fillWithPostData(hw: HandiWork, elements: IPostElement[]) {
//#region Get the changelog //#region Get the changelog
hw.changelog = []; hw.changelog = [];
const changelogElement = const changelogElement =
getPostElementByName(elements, "changelog") || getPostElementByName(elements, "changelog") || getPostElementByName(elements, "change-log");
getPostElementByName(elements, "change-log");
if (changelogElement) { if (changelogElement) {
const changelogSpoiler = changelogElement?.content.find((el) => { const changelogSpoiler = changelogElement?.content.find((el) => {
return el.type === "Spoiler" && el.content.length > 0; return el.type === "Spoiler" && el.content.length > 0;

View File

@ -59,9 +59,7 @@ function parseJSONLD(element: cheerio.Element): TJsonLD {
const html = cheerio(element).html().trim(); const html = cheerio(element).html().trim();
// Obtain the JSON-LD // Obtain the JSON-LD
const data = html const data = html.replace('<script type="application/ld+json">', "").replace("</script>", "");
.replace('<script type="application/ld+json">', "")
.replace("</script>", "");
// Convert the string to an object // Convert the string to an object
return JSON.parse(data); return JSON.parse(data);

View File

@ -5,6 +5,9 @@
"use strict"; "use strict";
// Import from files
import { POST } from "../constants/css-selector";
//#region Interfaces //#region Interfaces
export interface IPostElement { export interface IPostElement {
@ -22,13 +25,11 @@ export interface ILink extends IPostElement {
//#endregion Interfaces //#endregion Interfaces
//#region Public methods //#region Public methods
/** /**
* Given a post of a thread page it extracts the information contained in the body. * Given a post of a thread page it extracts the information contained in the body.
*/ */
export function parseF95ThreadPost( export function parseF95ThreadPost($: cheerio.Root, post: cheerio.Cheerio): IPostElement[] {
$: cheerio.Root,
post: cheerio.Cheerio
): IPostElement[] {
// The data is divided between "tag" and "text" elements. // The data is divided between "tag" and "text" elements.
// Simple data is composed of a "tag" element followed // Simple data is composed of a "tag" element followed
// by a "text" element, while more complex data (contained // by a "text" element, while more complex data (contained
@ -40,34 +41,81 @@ export function parseF95ThreadPost(
const elements = post const elements = post
.contents() .contents()
.toArray() .toArray()
.map((el) => parseCheerioNode($, el)) .map((el) => parseCheerioNode($, el)) // Parse the nodes
.filter((node) => node.name || node.text || node.content.length != 0); .filter((el) => !isPostElementEmpty(el)) // Ignore the empty nodes
.map((el) => reducePostElement(el)); // Compress the nodes
// ... then parse the elements to create the pairs of title/data // ... then parse the elements to create the pairs of title/data
return parsePostElements(elements); return associateElementsWithName(elements);
} }
//#endregion Public methods //#endregion Public methods
//#region Private methods //#region Private methods
//#region Node type
/**
* Check if the node passed as a parameter is a formatting one (i.e. `<b>`).
*/
function isFormattingNode(node: cheerio.Element): boolean {
const formattedTags = ["b", "i"];
return node.type === "tag" && formattedTags.includes(node.name);
}
/**
* Check if the node passed as a parameter is of text type.
*/
function isTextNode(node: cheerio.Element): boolean {
return node.type === "text";
}
/**
* Check if the node is a spoiler.
*/
function isSpoilerNode(node: cheerio.Cheerio): boolean {
return node.attr("class") === "bbCodeSpoiler";
}
/**
* Check if the node is a link or a image.
*/
function isLinkNode(node: cheerio.Element): boolean {
// Local variables
let valid = false;
// The node is a valid DOM element
if (node.type === "tag") {
const el = node as cheerio.TagElement;
valid = el.name === "a" || el.name === "img";
}
return valid;
}
/**
* Check if the node is a `noscript` tag.
*/
function isNoScriptNode(node: cheerio.Element): boolean {
return node.type === "tag" && node.name === "noscript";
}
//#endregion Node Type
//#region Parse Cheerio node
/** /**
* Process a spoiler element by getting its text broken * Process a spoiler element by getting its text broken
* down by any other spoiler elements present. * down by any other spoiler elements present.
*/ */
function parseCheerioSpoilerNode( function parseCheerioSpoilerNode($: cheerio.Root, node: cheerio.Cheerio): IPostElement {
$: cheerio.Root,
spoiler: cheerio.Cheerio
): IPostElement {
// A spoiler block is composed of a div with class "bbCodeSpoiler", // A spoiler block is composed of a div with class "bbCodeSpoiler",
// containing a div "bbCodeSpoiler-content" containing, in cascade, // containing a div "bbCodeSpoiler-content" containing, in cascade,
// a div with class "bbCodeBlock--spoiler" and a div with class "bbCodeBlock-content". // a div with class "bbCodeBlock--spoiler" and a div with class "bbCodeBlock-content".
// This last tag contains the required data. // This last tag contains the required data.
// Local variables // Local variables
const BUTTON_CLASS = "button.bbCodeSpoiler-button"; const spoiler: IPostElement = {
const SPOILER_CONTENT_CLASS =
"div.bbCodeSpoiler-content > div.bbCodeBlock--spoiler > div.bbCodeBlock-content";
const content: IPostElement = {
type: "Spoiler", type: "Spoiler",
name: "", name: "",
text: "", text: "",
@ -75,185 +123,219 @@ function parseCheerioSpoilerNode(
}; };
// Find the title of the spoiler (contained in the button) // Find the title of the spoiler (contained in the button)
const button = spoiler.find(BUTTON_CLASS).toArray().shift(); spoiler.name = node.find(POST.SPOILER_BUTTON).first().text().trim();
content.name = $(button).text().trim();
// Parse the content of the spoiler // Parse the content of the spoiler
spoiler spoiler.content = node
.find(SPOILER_CONTENT_CLASS) .find(POST.SPOILER_CONTENT)
.contents() .contents()
.map((idx, el) => { .toArray()
// Convert the element .map((el) => parseCheerioNode($, el));
const element = $(el);
// Parse nested spoiler
if (element.attr("class") === "bbCodeSpoiler") {
const spoiler = parseCheerioSpoilerNode($, element);
content.content.push(spoiler);
} else if (el.type === "text") {
// Append text
content.text += element.text();
}
});
// Clean text // Clean text
content.text = content.text.replace(/\s\s+/g, " ").trim(); spoiler.text = spoiler.text.replace(/\s\s+/g, " ").trim();
return content; return spoiler;
} }
/** /**
* Check if the node passed as a parameter is of text type. * Process a node that contains a link or image.
* This also includes formatted nodes (i.e. `<b>`).
*/ */
function isTextNode(node: cheerio.Element): boolean { function parseCheerioLinkNode(element: cheerio.Cheerio): ILink {
const formattedTags = ["b", "i"]; // Local variable
const isText = node.type === "text"; const link: ILink = {
const isFormatted = node.type === "tag" && formattedTags.includes(node.name); type: "Link",
name: "",
text: "",
href: "",
content: []
};
return isText || isFormatted; if (element.is("img")) {
link.type = "Image";
link.text = element.attr("alt");
link.href = element.attr("data-src");
} else if (element.is("a")) {
link.type = "Link";
link.text = element.text().replace(/\s\s+/g, " ").trim();
link.href = element.attr("href");
}
return link;
} }
/**
* Process a text only node.
*/
function parseCheerioTextNode(node: cheerio.Cheerio): IPostElement {
const content: IPostElement = {
type: "Text",
name: "",
text: getCheerioNonChildrenText(node),
content: []
};
return content;
}
//#endregion Parse Cheerio node
//#region IPostElement utility
/**
* Check if the node has non empty `name` and `text`.
*/
function isPostElementUnknown(node: IPostElement): boolean {
return node.name.trim() === "" && node.text.trim() === "";
}
/**
* Check if the node has a non empty property
* between `name`, `text` and `content`.
*/
function isPostElementEmpty(node: IPostElement): boolean {
return node.content.length === 0 && isPostElementUnknown(node);
}
/**
* Create a `IPostElement` without name, text or content.
*/
function createEmptyElement(): IPostElement {
return {
type: "Empty",
name: "",
text: "",
content: []
};
}
/**
* Check if the element contains the overview of a thread (post #1).
*/
function elementIsOverview(element: IPostElement): boolean {
// Search the text element that start with "overview"
const result = element.content
.filter((e) => e.type === "Text")
.find((e) => e.text.toUpperCase().startsWith("OVERVIEW"));
return result !== undefined;
}
/**
* If the element contains the overview of a thread, parse it.
*/
function getOverviewFromElement(element: IPostElement): string {
// Local variables
const alphanumericRegex = new RegExp("[a-zA-Z0-9]+");
// Get all the text values of the overview
const textes = element.content
.filter((e) => e.type === "Text")
.filter((e) => {
const cleanValue = e.text.toUpperCase().replace("OVERVIEW", "").trim();
const isAlphanumeric = alphanumericRegex.test(cleanValue);
return cleanValue !== "" && isAlphanumeric;
})
.map((e) => e.text);
// Joins the textes
return textes.join(" ");
}
//#endregion IPostElement utility
/** /**
* Gets the text of the node only, excluding child nodes. * Gets the text of the node only, excluding child nodes.
* Also includes formatted text elements (i.e. `<b>`). * Also includes formatted text elements (i.e. `<b>`).
*/ */
function getCheerioNonChildrenText(node: cheerio.Cheerio): string { function getCheerioNonChildrenText(node: cheerio.Cheerio): string {
// Local variable
let text = "";
// If the node has no children, return the node's text
if (node.contents().length === 1) {
// @todo Remove IF after cheerio RC6
text = node.text();
} else {
// Find all the text nodes in the node // Find all the text nodes in the node
const text = node text = node
.first() .first()
.contents() .contents() // @todo Change to children() after cheerio RC6
.filter((idx, el) => { .filter((idx, el) => isTextNode(el))
return isTextNode(el);
})
.text(); .text();
}
// Clean and return the text // Clean and return the text
return text.replace(/\s\s+/g, " ").trim(); return text.replace(/\s\s+/g, " ").trim();
} }
/**
* Process a node and see if it contains a
* link or image. If not, it returns `null`.
*/
function parseCheerioLinkNode(element: cheerio.Cheerio): ILink | null {
//@ts-ignore
const name = element[0]?.name;
const link: ILink = {
name: "",
type: "Link",
text: "",
href: "",
content: []
};
if (name === "img") {
link.type = "Image";
link.text = element.attr("alt");
link.href = element.attr("data-src");
} else if (name === "a") {
link.type = "Link";
link.text = element.text().replace(/\s\s+/g, " ").trim();
link.href = element.attr("href");
}
return link.href ? link : null;
}
/** /**
* Collapse an `IPostElement` element with a single subnode * Collapse an `IPostElement` element with a single subnode
* in the `Content` field in case it has no information. * in the `Content` field in case it has no information.
*/ */
function reducePostElement(element: IPostElement): IPostElement { function reducePostElement(element: IPostElement, recursive = true): IPostElement {
if (element.content.length === 1) { // Local variables
const content = element.content[0] as IPostElement; const shallowCopy = Object.assign({}, element);
const nullValues =
(!element.name || !content.name) && (!element.text || !content.text);
const sameValues = element.name === content.name || element.text === content.text;
if (nullValues || sameValues) { // Find the posts without name and text
element.name = element.name || content.name; const unknownChildrens = shallowCopy.content.filter((e) => isPostElementUnknown(e));
element.text = element.text || content.text; if (recursive) {
element.content.push(...content.content); const recursiveUnknownChildrens = unknownChildrens.map((e) => reducePostElement(e));
element.type = content.type; unknownChildrens.push(...recursiveUnknownChildrens);
// If the content is a link, add the HREF to the element
const contentILink = content as ILink;
const elementILink = element as ILink;
if (contentILink.href) elementILink.href = contentILink.href;
}
} }
return element; // Eliminate non-useful child nodes
if (isPostElementUnknown(shallowCopy) && unknownChildrens.length > 0) {
// Find the valid elements to add to the node
const childContents = unknownChildrens
.filter((e) => !shallowCopy.content.includes(e))
.map((e) => (e.content.length > 0 ? e.content : e));
// Remove the empty elements
shallowCopy.content = shallowCopy.content.filter((e) => !unknownChildrens.includes(e));
// Merge the non-empty children of this node with
// the content of the empty children of this node
const newContent = [].concat(...childContents);
shallowCopy.content.push(...newContent);
}
// If the node has only one child, return it
else if (isPostElementUnknown(shallowCopy) && shallowCopy.content.length === 1) {
return shallowCopy.content[0];
}
return shallowCopy;
} }
/** /**
* Transform a `cheerio.Cheerio` node into an `IPostElement` element with its subnodes. * Transform a `cheerio.Cheerio` node into an `IPostElement` element with its subnodes.
* @param reduce Compress subsequent subnodes if they contain no information. Default: `true`.
*/ */
function parseCheerioNode( function parseCheerioNode($: cheerio.Root, node: cheerio.Element): IPostElement {
$: cheerio.Root,
node: cheerio.Element,
reduce = true
): IPostElement {
// Local variables // Local variables
const content: IPostElement = { let post: IPostElement = createEmptyElement();
type: "Empty",
name: "",
text: "",
content: []
};
const cheerioNode = $(node); const cheerioNode = $(node);
if (isTextNode(node)) { // Parse the node
content.text = cheerioNode.text().replace(/\s\s+/g, " ").trim(); if (!isNoScriptNode(node)) {
content.type = "Text"; if (isTextNode(node) && !isFormattingNode(node)) post = parseCheerioTextNode(cheerioNode);
} else { else if (isSpoilerNode(cheerioNode)) post = parseCheerioSpoilerNode($, cheerioNode);
// Get the number of children that the element own else if (isLinkNode(node)) post = parseCheerioLinkNode(cheerioNode);
const nChildren = cheerioNode.children().length;
// Get the text of the element without childrens // Parse the node's childrens
content.text = getCheerioNonChildrenText(cheerioNode); const childPosts = cheerioNode
.contents() // @todo Change to children() after cheerio RC6
// Parse spoilers .toArray()
if (cheerioNode.attr("class") === "bbCodeSpoiler") { .filter((el) => el) // Ignore undefined elements
const spoiler = parseCheerioSpoilerNode($, cheerioNode); .map((el) => parseCheerioNode($, el))
.filter((el) => !isPostElementEmpty(el));
// Add element if not null post.content.push(...childPosts);
if (spoiler) {
content.content.push(spoiler);
content.type = "Spoiler";
}
}
// Parse links
else if (nChildren === 0 && cheerioNode.length != 0) {
const link = parseCheerioLinkNode(cheerioNode);
// Add element if not null
if (link) {
content.content.push(link);
content.type = "Link";
}
} else {
cheerioNode.children().map((idx, el) => {
// Parse the children of the element passed as parameter
const childElement = parseCheerioNode($, el);
// If the children is valid (not empty) push it
if ((childElement.text || childElement.content.length !== 0) && !isTextNode(el)) {
content.content.push(childElement);
}
});
}
} }
return reduce ? reducePostElement(content) : content; return post;
} }
/** /**
* It simplifies the `IPostElement` elements by associating * It simplifies the `IPostElement` elements by associating
* the corresponding value to each characterizing element (i.e. author). * the corresponding value to each characterizing element (i.e. author).
*/ */
function parsePostElements(elements: IPostElement[]): IPostElement[] { function associateElementsWithName(elements: IPostElement[]): IPostElement[] {
// Local variables // Local variables
const pairs: IPostElement[] = []; const pairs: IPostElement[] = [];
const specialCharsRegex = /^[-!$%^&*()_+|~=`{}[\]:";'<>?,./]/; const specialCharsRegex = /^[-!$%^&*()_+|~=`{}[\]:";'<>?,./]/;
@ -275,11 +357,11 @@ function parsePostElements(elements: IPostElement[]): IPostElement[] {
lastPair.content.push(...elements[i].content); lastPair.content.push(...elements[i].content);
} }
// This is a special case // This is a special case
else if (elements[i].text.startsWith("Overview:\n")) { else if (elementIsOverview(elements[i])) {
// We add the overview to the pairs as a text element // We add the overview to the pairs as a text element
elements[i].type = "Text"; elements[i].type = "Text";
elements[i].name = "Overview"; elements[i].name = "Overview";
elements[i].text = elements[i].text.replace("Overview:\n", ""); elements[i].text = getOverviewFromElement(elements[i]);
pairs.push(elements[i]); pairs.push(elements[i]);
} }
// We have an element referred to the previous "title" // We have an element referred to the previous "title"

View File

@ -34,8 +34,6 @@ export function suite(): void {
it("Fetch post with invalid ID", async function fetchWithInvalidID() { it("Fetch post with invalid ID", async function fetchWithInvalidID() {
Shared.setIsLogged(true); Shared.setIsLogged(true);
const thread = new Thread(-1); const thread = new Thread(-1);
await expect(thread.getPost(0)).to.be.rejectedWith( await expect(thread.getPost(0)).to.be.rejectedWith("Index must be greater or equal than 1");
"Index must be greater or equal than 1"
);
}); });
} }

View File

@ -29,15 +29,7 @@ export function suite(): void {
// Test values // Test values
const testIDs = [103, 225, 44, 13, 2, 7, 22]; const testIDs = [103, 225, 44, 13, 2, 7, 22];
const testPrefixes = [ const testPrefixes = ["corruption", "pregnancy", "slave", "VN", "RPGM", "Ren'Py", "Abandoned"];
"corruption",
"pregnancy",
"slave",
"VN",
"RPGM",
"Ren'Py",
"Abandoned"
];
// Parse values // Parse values
const ids = parser.prefixesToIDs(testPrefixes); const ids = parser.prefixesToIDs(testPrefixes);