Refactoring code

pull/83/head
MillenniumEarl 2021-03-22 11:00:45 +01:00
parent b74a212c80
commit b7d27483cc
1 changed files with 34 additions and 66 deletions

View File

@ -235,6 +235,31 @@ function parseCheerioTextNode(node: cheerio.Cheerio): IPostElement {
return content;
}
/**
* Gets the text of the node only, excluding child nodes.
* Also includes formatted text elements (i.e. `<b>`).
*/
function getCheerioNonChildrenText(node: cheerio.Cheerio): string {
// Local variable
let text = "";
// If the node has no children, return the node's text
if (node.contents().length === 1) {
// @todo Remove IF after cheerio RC6
text = node.text();
} else {
// Find all the text nodes in the node
text = node
.first()
.contents() // @todo Change to children() after cheerio RC6
.filter((idx, e) => isTextNode(e))
.text();
}
// Clean and return the text
return text.replace(/\s\s+/g, " ").trim();
}
//#endregion Parse Cheerio node
//#region IPostElement utility
@ -268,66 +293,8 @@ function createGenericElement(): IPostElement {
};
}
/**
* Check if the element contains the overview of a thread (post #1).
*/
function elementIsOverview(element: IPostElement): boolean {
// Search the text element that start with "overview"
const result = element.content
.filter((e) => e.type === "Text")
.find((e) => e.text.toUpperCase().startsWith("OVERVIEW"));
return result !== undefined;
}
/**
* If the element contains the overview of a thread, parse it.
*/
function getOverviewFromElement(element: IPostElement): string {
// Local variables
const alphanumericRegex = new RegExp("[a-zA-Z0-9]+");
// Get all the text values of the overview
const textes = element.content
.filter((e) => e.type === "Text")
.filter((e) => {
const cleanValue = e.text.toUpperCase().replace("OVERVIEW", "").trim();
const isAlphanumeric = alphanumericRegex.test(cleanValue);
return cleanValue !== "" && isAlphanumeric;
})
.map((e) => e.text);
// Joins the textes
return textes.join(" ");
}
//#endregion IPostElement utility
/**
* Gets the text of the node only, excluding child nodes.
* Also includes formatted text elements (i.e. `<b>`).
*/
function getCheerioNonChildrenText(node: cheerio.Cheerio): string {
// Local variable
let text = "";
// If the node has no children, return the node's text
if (node.contents().length === 1) {
// @todo Remove IF after cheerio RC6
text = node.text();
} else {
// Find all the text nodes in the node
text = node
.first()
.contents() // @todo Change to children() after cheerio RC6
.filter((idx, e) => isTextNode(e))
.text();
}
// Clean and return the text
return text.replace(/\s\s+/g, " ").trim();
}
/**
* Collapse an `IPostElement` element with a single subnode
* in the `Content` field in case it has no information.
@ -434,14 +401,9 @@ function pairUpElements(elements: IPostElement[]): IPostElement[] {
e.pairs.forEach((e, i) => shallow.splice(index + i, 0, e));
});
// Ignore the "Generic" elements that we have already parsed
//const validElements = shallow.filter((e) => e.type !== "Generic");
// Than we find all the IDs of "Text" elements where the
// text doesn't starts with double points. This means
// that we find all the IDs of "title" elements.
// Than we find all the IDs of the elements that are "titles".
const indexes = shallow
.filter((e, i) => filterValidElements(e, i, shallow))
.filter((e, i) => isValidTitleElement(e, i, shallow))
.map((e) => shallow.indexOf(e));
// Now we find all the elements between indexes and
@ -449,7 +411,13 @@ function pairUpElements(elements: IPostElement[]): IPostElement[] {
return indexes.map((i, j) => parseGroupData(i, j, indexes, shallow));
}
function filterValidElements(element: IPostElement, index: number, array: IPostElement[]): boolean {
/**
* Verify if the `element` is a valid title.
* @param element Element to check
* @param index Index of the element in `array`
* @param array Array of elements to check
*/
function isValidTitleElement(element: IPostElement, index: number, array: IPostElement[]): boolean {
// Check if this element is a "title" checking also the next element
const isPostfixDoublePoints = element.text.endsWith(":") && element.text !== ":";
const nextElementIsValue = array[index + 1]?.text.startsWith(":");