coabot/src/url_metadata.js

52 lines
1.9 KiB
JavaScript
Raw Normal View History

2019-09-14 02:03:02 +00:00
const bot = require('bbot');
const request = require('request');
var request_options = {
headers: {
'User-Agent': 'crabot retrieving url metadata (https://wiki.crapaud-fou.org/crabot)'
}
};
2019-09-14 02:03:02 +00:00
2019-09-21 04:54:43 +00:00
bot.global.text(/(https?:\/\/[^ ,\)"\n]*)/ig, async (b) => {
// console.log(JSON.stringify(b.match, null, 2));
2019-09-14 13:29:56 +00:00
for (url of b.match) {
2019-09-21 05:00:27 +00:00
// console.log(JSON.stringify(url, null, 2));
2019-09-14 13:29:56 +00:00
if (!/(coa|pad)\.crapaud-fou\.org/.test(url)) {
request_options.url = url;
request(request_options, (err, res, body) => {
2019-09-16 00:39:16 +00:00
// console.log(JSON.stringify(res, null, 2));
if (!err && res.statusCode === 200) {
2019-09-14 13:29:56 +00:00
var re = /<title[^>]*>([^<]*)<\/title>/gi;
var match = re.exec(body);
if (match && match[1]) {
var extra = "";
var link = res.request.uri.href;
2019-11-10 16:44:58 +00:00
link = link.replace(/[\?&]fbclid=.+/gi, "");
2019-09-15 09:45:01 +00:00
if (/youtube.com/.test(link)) {
if (t = /[?&]v=([^&]+)/.exec(link)) {
link = "https://www.youtube.com/watch?v=" + t[1];
extra += " - [no-cookie](https://www.youtube-nocookie.com/embed/" + t[1] + ")";
2019-09-15 07:34:08 +00:00
extra += " - [invidious](https://invidio.us/watch?v=" + t[1] + ")";
}
}
2019-10-15 16:49:48 +00:00
b.respond("[>>>](" + link + ") _" + match[1].replace(/\n/g, " ").trim() + "_" + extra);
2019-09-14 13:29:56 +00:00
}
} else {
console.log('-----------------------------------');
console.log('URL: ' + url);
console.log('ERR - - - - - - - - - - - - - - - -');
console.log(JSON.stringify(err, null, 2));
console.log('RES - - - - - - - - - - - - - - - -');
console.log(JSON.stringify(res, null, 2));
console.log('-----------------------------------');
}
2019-09-14 13:29:56 +00:00
});
}
2019-09-21 04:54:43 +00:00
await new Promise((resolve) => setTimeout(resolve, 1000));
}
2019-09-14 02:03:02 +00:00
}, {
id: 'get-url-metadata'
});