X-Git-Url: https://git.squeep.com/?a=blobdiff_plain;f=src%2Flink-helper.js;h=f35aca9419ab082a7d7f8370ebe506e6fa82278d;hb=b806715f9288323cce7b0ab437ee78b01d26c548;hp=1f3cd0992810f482717f0448781968afb0298723;hpb=28de4364128a4b03918a8cbe868009b5d427220a;p=websub-hub diff --git a/src/link-helper.js b/src/link-helper.js index 1f3cd09..f35aca9 100644 --- a/src/link-helper.js +++ b/src/link-helper.js @@ -11,9 +11,16 @@ const Enum = require('./enum'); const FeedParser = require('feedparser'); const { Readable } = require('stream'); const htmlparser2 = require('htmlparser2'); +const { Iconv } = require('iconv'); const _fileScope = common.fileScope(__filename); +/** + * Helper for accessing headers + * @param {object} headers header object + * @param {string} header header name + * @returns {string} header content + */ function getHeader(headers, header) { return headers[header.toLowerCase()]; } @@ -29,10 +36,10 @@ class LinkHelper { /** * Determine if this hub is listed in response data from url. - * @param {String} url - * @param {Object} headers - * @param {String|Buffer} body - * @returns {Boolean} + * @param {string} url content url + * @param {object} headers headers from accessing url + * @param {string | Buffer} body body from accessing url + * @returns {Promise} url lists this hub */ async validHub(url, headers, body) { const _scope = _fileScope('validHub'); @@ -41,19 +48,34 @@ class LinkHelper { // Add Link headers first, as they take priority over link elements in body. const linkHeader = getHeader(headers, Enum.Header.Link); const links = []; - try { - links.push(...parseLinkHeader(linkHeader)); - } catch (e) { - if (e instanceof ParseSyntaxError) { - this.logger.debug(_scope, 'failed to parse link header, bad syntax', { error: e, linkHeader }); - } else { - this.logger.error(_scope, 'failed to parse link header', { error: e, linkHeader }); + if (linkHeader) { + try { + links.push(...parseLinkHeader(linkHeader)); + } catch (e) { + /* istanbul ignore else */ + if (e instanceof ParseSyntaxError) { + this.logger.debug(_scope, 'failed to parse link header, bad syntax', { error: e, linkHeader }); + } else { + this.logger.error(_scope, 'failed to parse link header', { error: e, linkHeader }); + } + } + } + + const contentType = LinkHelper.parseContentType(getHeader(headers, Enum.Header.ContentType)); + const nonUTF8Charset = !/utf-*8/i.test(contentType.params.charset) && contentType.params.charset; + if (nonUTF8Charset) { + const iconv = new Iconv(nonUTF8Charset, 'utf-8//translit//ignore'); + try { + body = iconv.convert(body).toString('utf8'); + } catch (e) { + /* istanbul ignore next */ + this.logger.error(_scope, 'iconv conversion error', { error: e, contentType, url }); + // But try to carry on, anyhow. } } - const contentType = getHeader(headers, Enum.Header.ContentType); let bodyLinks = []; - switch (contentType) { + switch (contentType.mediaType) { case Enum.ContentType.ApplicationAtom: case Enum.ContentType.ApplicationRDF: case Enum.ContentType.ApplicationRSS: @@ -75,15 +97,44 @@ class LinkHelper { // Fetch all hub relation targets from headers, resolving relative URIs. const hubs = LinkHelper.locateHubTargets(links).map((link) => this.absoluteURI(link, url)); + this.logger.debug(_scope, 'valid hubs for url', { url, hubs }); + return hubs.includes(this.selfUrl); } + /** + * @typedef {object} ContentType + * @property {string} mediaType media type + * @property {object} params map of parameters + */ + /** + * Convert a Content-Type string to normalized components. + * RFC7231 §3.1.1 + * N.B. this non-parser implementation will not work if a parameter + * value for some reason includes a ; or = within a quoted-string. + * @param {string} contentTypeHeader content type header + * @returns {ContentType} contentType + */ + static parseContentType(contentTypeHeader) { + const [ mediaType, ...params ] = (contentTypeHeader || '').split(/ *; */); + return { + mediaType: mediaType.toLowerCase() || Enum.ContentType.ApplicationOctetStream, + params: params.reduce((obj, param) => { + const [field, value] = param.split('='); + const isQuoted = value.startsWith('"') && value.endsWith('"'); + obj[field.toLowerCase()] = isQuoted ? value.slice(1, value.length - 1) : value; + return obj; + }, {}), + }; + } + + /** * Parse XML-ish feed content, extracting link elements into our own format. - * @param {String} feedurl - * @param {String} body - * @returns {Object[]} + * @param {string} feedurl feed url + * @param {string} body feed body + * @returns {Promise} array of link elements */ async linksFromFeedBody(feedurl, body) { const _scope = _fileScope('linksFromFeedBody'); @@ -106,7 +157,11 @@ class LinkHelper { }); feedParser.on('meta', (meta) => { this.logger.debug(_scope, 'FeedParser meta', { meta }); - const feedLinks = meta['atom:link'] || []; + let feedLinks = meta['atom:link'] || []; + if (!Array.isArray(feedLinks)) { + // Parsing RSS seems to return a single entry for this rather than a list. + feedLinks = [feedLinks]; + } feedLinks .map((l) => l['@']) .forEach((l) => { @@ -133,7 +188,8 @@ class LinkHelper { /** * Parse HTML-ish content, extracting link elements into our own format. - * @param {String} body + * @param {string} body html body + * @returns {object[]} array of link elements */ linksFromHTMLBody(body) { const _scope = _fileScope('linksFromHTMLBody'); @@ -161,18 +217,18 @@ class LinkHelper { /** * Attempt to resolve a relative target URI - * @param {String} uri - * @param {String} context - * @returns {String} + * @param {string} uri target + * @param {string} context base + * @returns {string} uri */ absoluteURI(uri, context) { const _scope = _fileScope('absoluteURI'); try { new URL(uri); - } catch (e) { + } catch (e) { // eslint-disable-line no-unused-vars try { uri = new URL(uri, context).href; - } catch (e) { + } catch (e) { // eslint-disable-line no-unused-vars this.logger.debug(_scope, 'could not resolve link URI', { uri, context }); } } @@ -182,8 +238,8 @@ class LinkHelper { /** * Return all link targets with a hub relation. - * @param {Object[]} links - * @returns {String[]} + * @param {object[]} links array of link objects + * @returns {string[]} array of hub targets */ static locateHubTargets(links) { return links