add a user-agent to the link checker to avoid getting blockerd by some sites
This commit is contained in:
parent
5c6d2aa253
commit
f7d026fe55
1 changed files with 17 additions and 3 deletions
|
@ -1,15 +1,21 @@
|
|||
|
||||
const bot = require('bbot');
|
||||
const request = require('request');
|
||||
var request_options = {
|
||||
headers: {
|
||||
'User-Agent': 'crabot retrieving url metadata (https://wiki.crapaud-fou.org/crabot)'
|
||||
}
|
||||
};
|
||||
|
||||
bot.global.text(/(https?:\/\/[^ ,\)"]*)/ig, (b) => {
|
||||
// console.log(JSON.stringify(b.envelope.payload, null, 2));
|
||||
for (url of b.match) {
|
||||
// console.log(JSON.stringify(url, null, 2));
|
||||
if (!/(coa|pad)\.crapaud-fou\.org/.test(url)) {
|
||||
request(url, (err, res, body) => {
|
||||
if (!err) {
|
||||
// console.log(JSON.stringify(res.request.uri.href, null, 2));
|
||||
request_options.url = url;
|
||||
request(request_options, (err, res, body) => {
|
||||
if (!err && res.statusCode === 200) {
|
||||
// console.log(JSON.stringify(res.statusCode, null, 2));
|
||||
var re = /<title[^>]*>([^<]*)<\/title>/gi;
|
||||
var match = re.exec(body);
|
||||
if (match && match[1]) {
|
||||
|
@ -24,6 +30,14 @@ bot.global.text(/(https?:\/\/[^ ,\)"]*)/ig, (b) => {
|
|||
}
|
||||
b.respond("[:link:](" + link + ") _" + match[1] + "_" + extra);
|
||||
}
|
||||
} else {
|
||||
console.log('-----------------------------------');
|
||||
console.log('URL: ' + url);
|
||||
console.log('ERR - - - - - - - - - - - - - - - -');
|
||||
console.log(JSON.stringify(err, null, 2));
|
||||
console.log('RES - - - - - - - - - - - - - - - -');
|
||||
console.log(JSON.stringify(res, null, 2));
|
||||
console.log('-----------------------------------');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue