add a user-agent to the link checker to avoid getting blockerd by some sites

2019-09-16 08:30:13 +08:00 · 2019-09-16 08:30:13 +08:00 · f7d026fe55
commit f7d026fe55
parent 5c6d2aa253
1 changed files with 17 additions and 3 deletions
--- a/src/url_metadata.js
+++ b/src/url_metadata.js
@ -1,15 +1,21 @@
 const bot = require('bbot');
 const request = require('request');
 var request_options = {
  headers: {
    'User-Agent': 'crabot retrieving url metadata (https://wiki.crapaud-fou.org/crabot)'
  }
 };
 bot.global.text(/(https?:\/\/[^ ,\)"]*)/ig, (b) => {
  // console.log(JSON.stringify(b.envelope.payload, null, 2));
  for (url of b.match) {
    // console.log(JSON.stringify(url, null, 2));
    if (!/(coa|pad)\.crapaud-fou\.org/.test(url)) {
-      request(url, (err, res, body) => {
+      request_options.url = url;
-        if (!err) {
+      request(request_options, (err, res, body) => {
-          // console.log(JSON.stringify(res.request.uri.href, null, 2));
+        if (!err && res.statusCode === 200) {
          // console.log(JSON.stringify(res.statusCode, null, 2));
          var re = /<title[^>]*>([^<]*)<\/title>/gi;
          var match = re.exec(body);
          if (match && match[1]) {
@ -24,6 +30,14 @@ bot.global.text(/(https?:\/\/[^ ,\)"]*)/ig, (b) => {
            }
            b.respond("[:link:](" + link + ") _" + match[1] + "_" + extra);
          }
        } else {
          console.log('-----------------------------------');
          console.log('URL: ' + url);
          console.log('ERR - - - - - - - - - - - - - - - -');
          console.log(JSON.stringify(err, null, 2));
          console.log('RES - - - - - - - - - - - - - - - -');
          console.log(JSON.stringify(res, null, 2));
          console.log('-----------------------------------');
        }
      });
    }