Initial release
[websub-hub] / src / link-helper.js
1 'use strict';
2
3 /**
4 * A utility class for checking link values in a topic's data and metadata.
5 * Used to determine if we are a valid hub for topic.
6 */
7
8 const { parse: parseLinkHeader, SyntaxError: ParseSyntaxError } = require('@squeep/web-linking');
9 const common = require('./common');
10 const Enum = require('./enum');
11 const FeedParser = require('feedparser');
12 const { Readable } = require('stream');
13 const htmlparser2 = require('htmlparser2');
14
15 const _fileScope = common.fileScope(__filename);
16
17 function getHeader(headers, header) {
18 return headers[header.toLowerCase()];
19 }
20
21
22 class LinkHelper {
23 constructor(logger, options) {
24 this.logger = logger;
25 this.options = options;
26 this.selfUrl = options.dingus.selfBaseUrl;
27 }
28
29
30 /**
31 * Determine if this hub is listed in response data from url.
32 * @param {String} url
33 * @param {Object} headers
34 * @param {String|Buffer} body
35 * @returns {Boolean}
36 */
37 async validHub(url, headers, body) {
38 const _scope = _fileScope('validHub');
39 this.logger.debug(_scope, 'called', { headers, body: common.logTruncate(body, 100) });
40
41 // Add Link headers first, as they take priority over link elements in body.
42 const linkHeader = getHeader(headers, Enum.Header.Link);
43 const links = [];
44 try {
45 links.push(...parseLinkHeader(linkHeader));
46 } catch (e) {
47 if (e instanceof ParseSyntaxError) {
48 this.logger.debug(_scope, 'failed to parse link header, bad syntax', { error: e, linkHeader });
49 } else {
50 this.logger.error(_scope, 'failed to parse link header', { error: e, linkHeader });
51 }
52 }
53
54 const contentType = getHeader(headers, Enum.Header.ContentType);
55 let bodyLinks = [];
56 switch (contentType) {
57 case Enum.ContentType.ApplicationAtom:
58 case Enum.ContentType.ApplicationRDF:
59 case Enum.ContentType.ApplicationRSS:
60 case Enum.ContentType.ApplicationXML:
61 case Enum.ContentType.TextXML: {
62 bodyLinks = await this.linksFromFeedBody(url, body);
63 break;
64 }
65
66 case Enum.ContentType.TextHTML:
67 bodyLinks = this.linksFromHTMLBody(body);
68 break;
69
70 default:
71 this.logger.debug(_scope, 'no parser for content type', { contentType });
72 }
73 links.push(...bodyLinks);
74
75 // Fetch all hub relation targets from headers, resolving relative URIs.
76 const hubs = LinkHelper.locateHubTargets(links).map((link) => this.absoluteURI(link, url));
77
78 return hubs.includes(this.selfUrl);
79 }
80
81
82 /**
83 * Parse XML-ish feed content, extracting link elements into our own format.
84 * @param {String} feedurl
85 * @param {String} body
86 * @returns {Object[]}
87 */
88 async linksFromFeedBody(feedurl, body) {
89 const _scope = _fileScope('linksFromFeedBody');
90 this.logger.debug(_scope, 'called', { feedurl, body: common.logTruncate(body, 100) });
91
92 const feedParser = new FeedParser({
93 feedurl,
94 addmeta: false,
95 });
96 const bodyStream = Readable.from(body);
97 const links = [];
98
99 return new Promise((resolve) => {
100 feedParser.on('error', (err) => {
101 this.logger.debug(_scope, 'FeedParser error', { err, feedurl, body });
102 });
103 feedParser.on('end', () => {
104 this.logger.debug(_scope, 'FeedParser finished', { links });
105 resolve(links);
106 });
107 feedParser.on('meta', (meta) => {
108 this.logger.debug(_scope, 'FeedParser meta', { meta });
109 const feedLinks = meta['atom:link'] || [];
110 feedLinks
111 .map((l) => l['@'])
112 .forEach((l) => {
113 const link = {
114 target: l.href,
115 attributes: Object.entries(l)
116 .filter(([name]) => name !== 'href')
117 .map(([name, value]) => ({ name, value })),
118 };
119 links.push(link);
120 });
121
122 });
123 feedParser.on('readable', () => {
124 let _item;
125 while ((_item = feedParser.read())) {
126 // Quietly consume remaining stream content
127 }
128 });
129
130 bodyStream.pipe(feedParser);
131 });
132 }
133
134
135 /**
136 * Parse HTML-ish content, extracting link elements into our own format.
137 * @param {String} body
138 */
139 linksFromHTMLBody(body) {
140 const _scope = _fileScope('linksFromHTMLBody');
141 this.logger.debug(_scope, 'called', { body: common.logTruncate(body, 100) });
142
143 const links = [];
144 const parser = new htmlparser2.Parser({
145 onopentag(tagName, attributes) {
146 if (tagName.toLowerCase() === 'link') {
147 const link = {
148 target: attributes.href,
149 attributes: Object.entries(attributes)
150 .filter(([name]) => name !== 'href')
151 .map(([name, value]) => ({ name, value })),
152 };
153 links.push(link);
154 }
155 },
156 });
157 parser.write(body);
158 parser.end();
159 return links;
160 }
161
162 /**
163 * Attempt to resolve a relative target URI
164 * @param {String} uri
165 * @param {String} context
166 * @returns {String}
167 */
168 absoluteURI(uri, context) {
169 const _scope = _fileScope('absoluteURI');
170 try {
171 new URL(uri);
172 } catch (e) {
173 try {
174 uri = new URL(uri, context).href;
175 } catch (e) {
176 this.logger.debug(_scope, 'could not resolve link URI', { uri, context });
177 }
178 }
179 return uri;
180 }
181
182
183 /**
184 * Return all link targets with a hub relation.
185 * @param {Object[]} links
186 * @returns {String[]}
187 */
188 static locateHubTargets(links) {
189 return links
190 .filter((link) => link.attributes.some((attr) => attr.name === 'rel' && ` ${attr.value} `.includes(' hub ')))
191 .map((link) => link.target);
192 }
193
194 }
195
196 module.exports = LinkHelper;