From 95b217b221b230165c8445816e4555f8f4aabc21 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sat, 22 Jul 2017 11:10:04 +0100 Subject: [PATCH] switch to requests in the title module and limit line length to 140 --- dave/modules/title.py | 22 +++++++++++----------- requirements.txt | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/dave/modules/title.py b/dave/modules/title.py index 6e54c4c..a6875f3 100644 --- a/dave/modules/title.py +++ b/dave/modules/title.py @@ -3,7 +3,7 @@ import re import dave.module from bs4 import BeautifulSoup -from mechanize import Browser +from requests import get from twisted.words.protocols.irc import assembleFormattedText, attributes as A import dave.config import socket @@ -16,23 +16,20 @@ parse = re.compile(r"(?:(?:https?):\/\/)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\ def link_parse(bot, args, sender, source): matches = parse.findall(args[0]) - br = Browser() - br.set_handle_robots(False) - titles = [] for match in matches: if not dave.config.redis.exists("site:{}".format(match)): - res = br.open(match) - data = res.get_data() + res = get(match, timeout=3) - soup = BeautifulSoup(data, "html.parser") + soup = BeautifulSoup(res.text, "html.parser") title = soup.title if title is not None: - title = re.sub(r"\r?\n|\r", - "", + title = re.sub(r"(\r?\n|\r| )+", + " ", title.string.encode("utf-8").strip()) + title = title[:140] + (title[140:] and '...') dave.config.redis.setex("site:{}".format(match), 300, title) else: title = dave.config.redis.get("site:{}".format(match)) @@ -41,5 +38,8 @@ def link_parse(bot, args, sender, source): titles.append(assembleFormattedText(A.bold[title])) if titles: - bot.msg(source, "Linked: {}".format( - assembleFormattedText(A.normal[" | "]).join(titles))) + # remove duplicates + titles = list(set(titles)) + + bot.msg(source, "Title: {}".format( + assembleFormattedText(A.normal[", "]).join(titles))) diff --git a/requirements.txt b/requirements.txt index 5df71a0..1472ff2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ service_identity==16.0.0 enum34==1.1.6 beautifulsoup4==4.4.1 mechanize==0.2.5 -requests==2.10.0 +requests==2.18.1 hiredis==0.2.0 redis==2.10.5 arrow==0.7.0 @@ -16,4 +16,4 @@ pysocks==1.5.7 wolframalpha==2.4 nltk==3.2.1 markovify==0.4.3 -babel==2.3.4 +babel==2.3.4 \ No newline at end of file -- libgit2 1.7.2