4 * A utility class for checking link values in a topic's data and metadata.
5 * Used to determine if we are a valid hub for topic.
8 const { parse: parseLinkHeader
, SyntaxError: ParseSyntaxError
} = require('@squeep/web-linking');
9 const common
= require('./common');
10 const Enum
= require('./enum');
11 const FeedParser
= require('feedparser');
12 const { Readable
} = require('stream');
13 const htmlparser2
= require('htmlparser2');
14 const { Iconv
} = require('iconv');
16 const _fileScope
= common
.fileScope(__filename
);
18 function getHeader(headers
, header
) {
19 return headers
[header
.toLowerCase()];
24 constructor(logger
, options
) {
26 this.options
= options
;
27 this.selfUrl
= options
.dingus
.selfBaseUrl
;
32 * Determine if this hub is listed in response data from url.
34 * @param {Object} headers
35 * @param {String|Buffer} body
38 async
validHub(url
, headers
, body
) {
39 const _scope
= _fileScope('validHub');
40 this.logger
.debug(_scope
, 'called', { headers
, body: common
.logTruncate(body
, 100) });
42 // Add Link headers first, as they take priority over link elements in body.
43 const linkHeader
= getHeader(headers
, Enum
.Header
.Link
);
47 links
.push(...parseLinkHeader(linkHeader
));
49 /* istanbul ignore else */
50 if (e
instanceof ParseSyntaxError
) {
51 this.logger
.debug(_scope
, 'failed to parse link header, bad syntax', { error: e
, linkHeader
});
53 this.logger
.error(_scope
, 'failed to parse link header', { error: e
, linkHeader
});
58 const contentType
= LinkHelper
.parseContentType(getHeader(headers
, Enum
.Header
.ContentType
));
59 const nonUTF8Charset
= !/utf-*8/i.test(contentType
.params
.charset
) && contentType
.params
.charset
;
61 const iconv
= new Iconv(nonUTF8Charset
, 'utf-8//translit//ignore');
63 body
= iconv
.convert(body
).toString('utf8');
65 /* istanbul ignore next */
66 this.logger
.error(_scope
, 'iconv conversion error', { error: e
, contentType
, url
});
67 // But try to carry on, anyhow.
72 switch (contentType
.mediaType
) {
73 case Enum
.ContentType
.ApplicationAtom:
74 case Enum
.ContentType
.ApplicationRDF:
75 case Enum
.ContentType
.ApplicationRSS:
76 case Enum
.ContentType
.ApplicationXML:
77 case Enum
.ContentType
.TextXML: {
78 bodyLinks
= await
this.linksFromFeedBody(url
, body
);
82 case Enum
.ContentType
.TextHTML:
83 bodyLinks
= this.linksFromHTMLBody(body
);
87 this.logger
.debug(_scope
, 'no parser for content type', { contentType
});
89 links
.push(...bodyLinks
);
91 // Fetch all hub relation targets from headers, resolving relative URIs.
92 const hubs
= LinkHelper
.locateHubTargets(links
).map((link
) => this.absoluteURI(link
, url
));
94 this.logger
.debug(_scope
, 'valid hubs for url', { url
, hubs
});
96 return hubs
.includes(this.selfUrl
);
101 * Convert a Content-Type string to normalized components.
103 * N.B. this non-parser implementation will not work if a parameter
104 * value for some reason includes a ; or = within a quoted-string.
105 * @param {String} contentTypeHeader
106 * @returns {Object} contentType
107 * @returns {String} contentType.mediaType
108 * @returns {Object} contentType.params
110 static parseContentType(contentTypeHeader
) {
111 const [ mediaType
, ...params
] = (contentTypeHeader
|| '').split(/ *; */
);
113 mediaType: mediaType
.toLowerCase() || Enum
.ContentType
.ApplicationOctetStream
,
114 params: params
.reduce((obj
, param
) => {
115 const [field
, value
] = param
.split('=');
116 const isQuoted
= value
.charAt(0) === '"' && value
.charAt(value
.length
- 1) === '"';
117 obj
[field
.toLowerCase()] = isQuoted
? value
.slice(1, value
.length
- 1) : value
;
125 * Parse XML-ish feed content, extracting link elements into our own format.
126 * @param {String} feedurl
127 * @param {String} body
128 * @returns {Object[]}
130 async
linksFromFeedBody(feedurl
, body
) {
131 const _scope
= _fileScope('linksFromFeedBody');
132 this.logger
.debug(_scope
, 'called', { feedurl
, body: common
.logTruncate(body
, 100) });
134 const feedParser
= new FeedParser({
138 const bodyStream
= Readable
.from(body
);
141 return new Promise((resolve
) => {
142 feedParser
.on('error', (err
) => {
143 this.logger
.debug(_scope
, 'FeedParser error', { err
, feedurl
, body
});
145 feedParser
.on('end', () => {
146 this.logger
.debug(_scope
, 'FeedParser finished', { links
});
149 feedParser
.on('meta', (meta
) => {
150 this.logger
.debug(_scope
, 'FeedParser meta', { meta
});
151 let feedLinks
= meta
['atom:link'] || [];
152 if (!Array
.isArray(feedLinks
)) {
153 // Parsing RSS seems to return a single entry for this rather than a list.
154 feedLinks
= [feedLinks
];
161 attributes: Object
.entries(l
)
162 .filter(([name
]) => name
!== 'href')
163 .map(([name
, value
]) => ({ name
, value
})),
168 feedParser
.on('readable', () => {
170 while ((_item
= feedParser
.read())) {
171 // Quietly consume remaining stream content
175 bodyStream
.pipe(feedParser
);
181 * Parse HTML-ish content, extracting link elements into our own format.
182 * @param {String} body
184 linksFromHTMLBody(body
) {
185 const _scope
= _fileScope('linksFromHTMLBody');
186 this.logger
.debug(_scope
, 'called', { body: common
.logTruncate(body
, 100) });
189 const parser
= new htmlparser2
.Parser({
190 onopentag(tagName
, attributes
) {
191 if (tagName
.toLowerCase() === 'link') {
193 target: attributes
.href
,
194 attributes: Object
.entries(attributes
)
195 .filter(([name
]) => name
!== 'href')
196 .map(([name
, value
]) => ({ name
, value
})),
209 * Attempt to resolve a relative target URI
210 * @param {String} uri
211 * @param {String} context
214 absoluteURI(uri
, context
) {
215 const _scope
= _fileScope('absoluteURI');
220 uri
= new URL(uri
, context
).href
;
222 this.logger
.debug(_scope
, 'could not resolve link URI', { uri
, context
});
230 * Return all link targets with a hub relation.
231 * @param {Object[]} links
232 * @returns {String[]}
234 static locateHubTargets(links
) {
236 .filter((link
) => link
.attributes
.some((attr
) => attr
.name
=== 'rel' && ` ${attr.value} `.includes(' hub ')))
237 .map((link
) => link
.target
);
242 module
.exports
= LinkHelper
;