🏡 index : ~doyle/dave.git

author Jordan Doyle <jordan@doyle.wf> 2017-07-31 6:54:47.0 +00:00:00
committer Jordan Doyle <jordan@doyle.wf> 2017-07-31 6:54:47.0 +00:00:00
commit
319214879b85788bb74e653bdc0bb4dd94ca959a [patch]
tree
86b00f45d38e43e705eb97e3e9849cf98a893235
parent
cb1a9c8ede110276b9604974aca064e624745e64
download
319214879b85788bb74e653bdc0bb4dd94ca959a.tar.gz

Catch exceptions thrown by the title module



Diff

 dave/modules/title.py |  9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/dave/modules/title.py b/dave/modules/title.py
index 450f5e9..928f285 100644
--- a/dave/modules/title.py
+++ b/dave/modules/title.py
@@ -6,6 +6,7 @@ from bs4 import BeautifulSoup
from requests import get
from twisted.words.protocols.irc import assembleFormattedText, attributes as A
import dave.config
from twisted.python import log

parse = re.compile(r"(?:(?:https?):\/\/)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?", re.IGNORECASE)

@@ -18,8 +19,12 @@ def link_parse(bot, args, sender, source):

    for match in matches:
        if not dave.config.redis.exists("site:{}".format(match)):
            res = get(match, timeout=3,
                      headers={'user-agent': 'irc bot (https://github.com/w4)'})
            try:
                res = get(match, timeout=3,
                          headers={'user-agent': 'irc bot (https://github.com/w4)'})
            except BaseException as e:
                log.msg("Couldn't connect to host.", e)
                return

            # sometimes requests guesses the charset wrong
            if res.encoding == 'ISO-8859-1' and not 'ISO-8859-1' in \