5c6b83939f579b18491fc935c87fddd65a93131b
4 * A utility class for checking link values in a topic's data and metadata.
5 * Used to determine if we are a valid hub for topic.
8 const { parse: parseLinkHeader
, SyntaxError: ParseSyntaxError
} = require('@squeep/web-linking');
9 const common
= require('./common');
10 const Enum
= require('./enum');
11 const FeedParser
= require('feedparser');
12 const { Readable
} = require('stream');
13 const htmlparser2
= require('htmlparser2');
15 const _fileScope
= common
.fileScope(__filename
);
17 function getHeader(headers
, header
) {
18 return headers
[header
.toLowerCase()];
23 constructor(logger
, options
) {
25 this.options
= options
;
26 this.selfUrl
= options
.dingus
.selfBaseUrl
;
31 * Determine if this hub is listed in response data from url.
33 * @param {Object} headers
34 * @param {String|Buffer} body
37 async
validHub(url
, headers
, body
) {
38 const _scope
= _fileScope('validHub');
39 this.logger
.debug(_scope
, 'called', { headers
, body: common
.logTruncate(body
, 100) });
41 // Add Link headers first, as they take priority over link elements in body.
42 const linkHeader
= getHeader(headers
, Enum
.Header
.Link
);
46 links
.push(...parseLinkHeader(linkHeader
));
48 if (e
instanceof ParseSyntaxError
) {
49 this.logger
.debug(_scope
, 'failed to parse link header, bad syntax', { error: e
, linkHeader
});
51 this.logger
.error(_scope
, 'failed to parse link header', { error: e
, linkHeader
});
55 const contentType
= getHeader(headers
, Enum
.Header
.ContentType
);
57 const [contentTypeBase
, _contentTypeEncoding
] = contentType
.split(/; +/);
59 switch (contentTypeBase
) {
60 case Enum
.ContentType
.ApplicationAtom:
61 case Enum
.ContentType
.ApplicationRDF:
62 case Enum
.ContentType
.ApplicationRSS:
63 case Enum
.ContentType
.ApplicationXML:
64 case Enum
.ContentType
.TextXML: {
65 bodyLinks
= await
this.linksFromFeedBody(url
, body
);
69 case Enum
.ContentType
.TextHTML:
70 bodyLinks
= this.linksFromHTMLBody(body
);
74 this.logger
.debug(_scope
, 'no parser for content type', { contentType
});
76 links
.push(...bodyLinks
);
79 // Fetch all hub relation targets from headers, resolving relative URIs.
80 const hubs
= LinkHelper
.locateHubTargets(links
).map((link
) => this.absoluteURI(link
, url
));
82 this.logger
.debug(_scope
, 'valid hubs for url', { url
, hubs
});
84 return hubs
.includes(this.selfUrl
);
89 * Parse XML-ish feed content, extracting link elements into our own format.
90 * @param {String} feedurl
91 * @param {String} body
94 async
linksFromFeedBody(feedurl
, body
) {
95 const _scope
= _fileScope('linksFromFeedBody');
96 this.logger
.debug(_scope
, 'called', { feedurl
, body: common
.logTruncate(body
, 100) });
98 const feedParser
= new FeedParser({
102 const bodyStream
= Readable
.from(body
);
105 return new Promise((resolve
) => {
106 feedParser
.on('error', (err
) => {
107 this.logger
.debug(_scope
, 'FeedParser error', { err
, feedurl
, body
});
109 feedParser
.on('end', () => {
110 this.logger
.debug(_scope
, 'FeedParser finished', { links
});
113 feedParser
.on('meta', (meta
) => {
114 this.logger
.debug(_scope
, 'FeedParser meta', { meta
});
115 let feedLinks
= meta
['atom:link'] || [];
116 if (!Array
.isArray(feedLinks
)) {
117 // Parsing RSS seems to return a single entry for this rather than a list.
118 feedLinks
= [feedLinks
];
125 attributes: Object
.entries(l
)
126 .filter(([name
]) => name
!== 'href')
127 .map(([name
, value
]) => ({ name
, value
})),
132 feedParser
.on('readable', () => {
134 while ((_item
= feedParser
.read())) {
135 // Quietly consume remaining stream content
139 bodyStream
.pipe(feedParser
);
145 * Parse HTML-ish content, extracting link elements into our own format.
146 * @param {String} body
148 linksFromHTMLBody(body
) {
149 const _scope
= _fileScope('linksFromHTMLBody');
150 this.logger
.debug(_scope
, 'called', { body: common
.logTruncate(body
, 100) });
153 const parser
= new htmlparser2
.Parser({
154 onopentag(tagName
, attributes
) {
155 if (tagName
.toLowerCase() === 'link') {
157 target: attributes
.href
,
158 attributes: Object
.entries(attributes
)
159 .filter(([name
]) => name
!== 'href')
160 .map(([name
, value
]) => ({ name
, value
})),
173 * Attempt to resolve a relative target URI
174 * @param {String} uri
175 * @param {String} context
178 absoluteURI(uri
, context
) {
179 const _scope
= _fileScope('absoluteURI');
184 uri
= new URL(uri
, context
).href
;
186 this.logger
.debug(_scope
, 'could not resolve link URI', { uri
, context
});
194 * Return all link targets with a hub relation.
195 * @param {Object[]} links
196 * @returns {String[]}
198 static locateHubTargets(links
) {
200 .filter((link
) => link
.attributes
.some((attr
) => attr
.name
=== 'rel' && ` ${attr.value} `.includes(' hub ')))
201 .map((link
) => link
.target
);
206 module
.exports
= LinkHelper
;