Merge branch 'v1.1-dev' as v1.1.1
[websub-hub] / src / link-helper.js
1 'use strict';
2
3 /**
4 * A utility class for checking link values in a topic's data and metadata.
5 * Used to determine if we are a valid hub for topic.
6 */
7
8 const { parse: parseLinkHeader, SyntaxError: ParseSyntaxError } = require('@squeep/web-linking');
9 const common = require('./common');
10 const Enum = require('./enum');
11 const FeedParser = require('feedparser');
12 const { Readable } = require('stream');
13 const htmlparser2 = require('htmlparser2');
14
15 const _fileScope = common.fileScope(__filename);
16
17 function getHeader(headers, header) {
18 return headers[header.toLowerCase()];
19 }
20
21
22 class LinkHelper {
23 constructor(logger, options) {
24 this.logger = logger;
25 this.options = options;
26 this.selfUrl = options.dingus.selfBaseUrl;
27 }
28
29
30 /**
31 * Determine if this hub is listed in response data from url.
32 * @param {String} url
33 * @param {Object} headers
34 * @param {String|Buffer} body
35 * @returns {Boolean}
36 */
37 async validHub(url, headers, body) {
38 const _scope = _fileScope('validHub');
39 this.logger.debug(_scope, 'called', { headers, body: common.logTruncate(body, 100) });
40
41 // Add Link headers first, as they take priority over link elements in body.
42 const linkHeader = getHeader(headers, Enum.Header.Link);
43 const links = [];
44 if (linkHeader) {
45 try {
46 links.push(...parseLinkHeader(linkHeader));
47 } catch (e) {
48 if (e instanceof ParseSyntaxError) {
49 this.logger.debug(_scope, 'failed to parse link header, bad syntax', { error: e, linkHeader });
50 } else {
51 this.logger.error(_scope, 'failed to parse link header', { error: e, linkHeader });
52 }
53 }
54 }
55 const contentType = getHeader(headers, Enum.Header.ContentType);
56 if (contentType) {
57 const [contentTypeBase, _contentTypeEncoding] = contentType.split(/; +/);
58 let bodyLinks = [];
59 switch (contentTypeBase) {
60 case Enum.ContentType.ApplicationAtom:
61 case Enum.ContentType.ApplicationRDF:
62 case Enum.ContentType.ApplicationRSS:
63 case Enum.ContentType.ApplicationXML:
64 case Enum.ContentType.TextXML: {
65 bodyLinks = await this.linksFromFeedBody(url, body);
66 break;
67 }
68
69 case Enum.ContentType.TextHTML:
70 bodyLinks = this.linksFromHTMLBody(body);
71 break;
72
73 default:
74 this.logger.debug(_scope, 'no parser for content type', { contentType });
75 }
76 links.push(...bodyLinks);
77 }
78
79 // Fetch all hub relation targets from headers, resolving relative URIs.
80 const hubs = LinkHelper.locateHubTargets(links).map((link) => this.absoluteURI(link, url));
81
82 this.logger.debug(_scope, 'valid hubs for url', { url, hubs });
83
84 return hubs.includes(this.selfUrl);
85 }
86
87
88 /**
89 * Parse XML-ish feed content, extracting link elements into our own format.
90 * @param {String} feedurl
91 * @param {String} body
92 * @returns {Object[]}
93 */
94 async linksFromFeedBody(feedurl, body) {
95 const _scope = _fileScope('linksFromFeedBody');
96 this.logger.debug(_scope, 'called', { feedurl, body: common.logTruncate(body, 100) });
97
98 const feedParser = new FeedParser({
99 feedurl,
100 addmeta: false,
101 });
102 const bodyStream = Readable.from(body);
103 const links = [];
104
105 return new Promise((resolve) => {
106 feedParser.on('error', (err) => {
107 this.logger.debug(_scope, 'FeedParser error', { err, feedurl, body });
108 });
109 feedParser.on('end', () => {
110 this.logger.debug(_scope, 'FeedParser finished', { links });
111 resolve(links);
112 });
113 feedParser.on('meta', (meta) => {
114 this.logger.debug(_scope, 'FeedParser meta', { meta });
115 const feedLinks = meta['atom:link'] || [];
116 feedLinks
117 .map((l) => l['@'])
118 .forEach((l) => {
119 const link = {
120 target: l.href,
121 attributes: Object.entries(l)
122 .filter(([name]) => name !== 'href')
123 .map(([name, value]) => ({ name, value })),
124 };
125 links.push(link);
126 });
127 });
128 feedParser.on('readable', () => {
129 let _item;
130 while ((_item = feedParser.read())) {
131 // Quietly consume remaining stream content
132 }
133 });
134
135 bodyStream.pipe(feedParser);
136 });
137 }
138
139
140 /**
141 * Parse HTML-ish content, extracting link elements into our own format.
142 * @param {String} body
143 */
144 linksFromHTMLBody(body) {
145 const _scope = _fileScope('linksFromHTMLBody');
146 this.logger.debug(_scope, 'called', { body: common.logTruncate(body, 100) });
147
148 const links = [];
149 const parser = new htmlparser2.Parser({
150 onopentag(tagName, attributes) {
151 if (tagName.toLowerCase() === 'link') {
152 const link = {
153 target: attributes.href,
154 attributes: Object.entries(attributes)
155 .filter(([name]) => name !== 'href')
156 .map(([name, value]) => ({ name, value })),
157 };
158 links.push(link);
159 }
160 },
161 });
162 parser.write(body);
163 parser.end();
164 return links;
165 }
166
167
168 /**
169 * Attempt to resolve a relative target URI
170 * @param {String} uri
171 * @param {String} context
172 * @returns {String}
173 */
174 absoluteURI(uri, context) {
175 const _scope = _fileScope('absoluteURI');
176 try {
177 new URL(uri);
178 } catch (e) {
179 try {
180 uri = new URL(uri, context).href;
181 } catch (e) {
182 this.logger.debug(_scope, 'could not resolve link URI', { uri, context });
183 }
184 }
185 return uri;
186 }
187
188
189 /**
190 * Return all link targets with a hub relation.
191 * @param {Object[]} links
192 * @returns {String[]}
193 */
194 static locateHubTargets(links) {
195 return links
196 .filter((link) => link.attributes.some((attr) => attr.name === 'rel' && ` ${attr.value} `.includes(' hub ')))
197 .map((link) => link.target);
198 }
199
200 }
201
202 module.exports = LinkHelper;