add a user-agent to the link checker to avoid getting blockerd by some sites

This commit is contained in:
mose 2019-09-16 08:30:13 +08:00
parent 5c6d2aa253
commit f7d026fe55

View file

@ -1,15 +1,21 @@
const bot = require('bbot'); const bot = require('bbot');
const request = require('request'); const request = require('request');
var request_options = {
headers: {
'User-Agent': 'crabot retrieving url metadata (https://wiki.crapaud-fou.org/crabot)'
}
};
bot.global.text(/(https?:\/\/[^ ,\)"]*)/ig, (b) => { bot.global.text(/(https?:\/\/[^ ,\)"]*)/ig, (b) => {
// console.log(JSON.stringify(b.envelope.payload, null, 2)); // console.log(JSON.stringify(b.envelope.payload, null, 2));
for (url of b.match) { for (url of b.match) {
// console.log(JSON.stringify(url, null, 2)); // console.log(JSON.stringify(url, null, 2));
if (!/(coa|pad)\.crapaud-fou\.org/.test(url)) { if (!/(coa|pad)\.crapaud-fou\.org/.test(url)) {
request(url, (err, res, body) => { request_options.url = url;
if (!err) { request(request_options, (err, res, body) => {
// console.log(JSON.stringify(res.request.uri.href, null, 2)); if (!err && res.statusCode === 200) {
// console.log(JSON.stringify(res.statusCode, null, 2));
var re = /<title[^>]*>([^<]*)<\/title>/gi; var re = /<title[^>]*>([^<]*)<\/title>/gi;
var match = re.exec(body); var match = re.exec(body);
if (match && match[1]) { if (match && match[1]) {
@ -24,6 +30,14 @@ bot.global.text(/(https?:\/\/[^ ,\)"]*)/ig, (b) => {
} }
b.respond("[:link:](" + link + ") _" + match[1] + "_" + extra); b.respond("[:link:](" + link + ") _" + match[1] + "_" + extra);
} }
} else {
console.log('-----------------------------------');
console.log('URL: ' + url);
console.log('ERR - - - - - - - - - - - - - - - -');
console.log(JSON.stringify(err, null, 2));
console.log('RES - - - - - - - - - - - - - - - -');
console.log(JSON.stringify(res, null, 2));
console.log('-----------------------------------');
} }
}); });
} }