From 4a73375cfb49325b30efa647ed41e5e3e147f380 Mon Sep 17 00:00:00 2001 From: MillenniumEarl Date: Mon, 15 Feb 2021 21:28:37 +0100 Subject: [PATCH] Add json-ld.ts --- src/scripts/json-ld.ts | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 src/scripts/json-ld.ts diff --git a/src/scripts/json-ld.ts b/src/scripts/json-ld.ts new file mode 100644 index 0000000..04699c8 --- /dev/null +++ b/src/scripts/json-ld.ts @@ -0,0 +1,55 @@ +/** + * Represents information contained in a JSON+LD tag. + */ +export type JSONLD = { [s: string]: string | JSONLD } + +/** + * Extracts and processes the JSON-LD values of the page. + * @param {cheerio.Cheerio} body Page `body` selector + * @returns {JSONLD[]} List of data obtained from the page + */ +export function getJSONLD(body: cheerio.Cheerio): JSONLD { + shared.logger.trace("Extracting JSON-LD data..."); + + // Fetch the JSON-LD data + const structuredDataElements = body.find(f95Selector.GT_JSONLD); + + // Parse the data + const values = structuredDataElements.map((idx, el) => parseJSONLD(el)).get(); + + // Merge the data and return a single value + return mergeJSONLD(values); +} + +//#region Private methods +/** + * Merges multiple JSON+LD tags into one object. + * @param data List of JSON+LD tags + */ +function mergeJSONLD(data: JSONLD[]): JSONLD { + // Local variables + let merged: JSONLD = {}; + + for (const value of data) { + merged = Object.assign(merged, value); + } + + return merged; +} + +/** + * Parse a JSON-LD element source code. + */ +function parseJSONLD(element: cheerio.Element): JSONLD { + // Get the element HTML + const html = cheerio.load(element).html().trim(); + + // Obtain the JSON-LD + const data = html + .replace("", ""); + + // Convert the string to an object + return JSON.parse(data); +} +//#endregion Private methods \ No newline at end of file