~doyle/dave.git

author	Jordan Doyle <jordan@doyle.wf>	2017-07-22 11:10:04.0 +01:00:00
committer	Jordan Doyle <jordan@doyle.wf>	2017-07-22 11:10:04.0 +01:00:00
commit	95b217b221b230165c8445816e4555f8f4aabc21 [patch]
tree	d8e6f0c0d61c6bc6f09dc73018f4ca86e68391d6
parent	0ade70523a6348f4af23b6df51b1dba083e6bec6
download	95b217b221b230165c8445816e4555f8f4aabc21.tar.gz

switch to requests in the title module and limit line length to 140

Diff

 requirements.txt      |  4 ++--
 dave/modules/title.py | 22 +++++++++++++---------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 5df71a0..1472ff2 100644
--- a/requirements.txt
+++ a/requirements.txt
@@ -1,10 +1,10 @@
twisted==17.5.0
pyopenssl==16.2.0
service_identity==16.0.0
enum34==1.1.6
beautifulsoup4==4.4.1
mechanize==0.2.5
requests==2.10.0
requests==2.18.1
hiredis==0.2.0
redis==2.10.5
arrow==0.7.0
@@ -16,4 +16,4 @@
wolframalpha==2.4
nltk==3.2.1
markovify==0.4.3
babel==2.3.4
babel==2.3.4
diff --git a/dave/modules/title.py b/dave/modules/title.py
index 6e54c4c..a6875f3 100644
--- a/dave/modules/title.py
+++ a/dave/modules/title.py
@@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
"""Get the title from a link using BeautifulSoup."""
import re
import dave.module
from bs4 import BeautifulSoup
from mechanize import Browser
from requests import get
from twisted.words.protocols.irc import assembleFormattedText, attributes as A
import dave.config
import socket
@@ -16,23 +16,20 @@
def link_parse(bot, args, sender, source):
    matches = parse.findall(args[0])

    br = Browser()
    br.set_handle_robots(False)

    titles = []

    for match in matches:
        if not dave.config.redis.exists("site:{}".format(match)):
            res = br.open(match)
            data = res.get_data()
            res = get(match, timeout=3)

            soup = BeautifulSoup(data, "html.parser")
            soup = BeautifulSoup(res.text, "html.parser")
            title = soup.title

            if title is not None:
                title = re.sub(r"\r?\n|\r",
                               "",
                title = re.sub(r"(\r?\n|\r| )+",
                               " ",
                               title.string.encode("utf-8").strip())
                title = title[:140] + (title[140:] and '...')
                dave.config.redis.setex("site:{}".format(match), 300, title)
        else:
            title = dave.config.redis.get("site:{}".format(match))
@@ -41,5 +38,8 @@
            titles.append(assembleFormattedText(A.bold[title]))

    if titles:
        bot.msg(source, "Linked: {}".format(
                        assembleFormattedText(A.normal[" | "]).join(titles)))
        # remove duplicates
        titles = list(set(titles))

        bot.msg(source, "Title: {}".format(
                        assembleFormattedText(A.normal[", "]).join(titles)))