Social News in 1000 Steps – Step 7

This entry is part 7 of 14 in the series Social News

Apparently Twitter allows you to peform more than 100 queries each 15 minutes. This means that I don’t need to be very selective. Therefore I rewrote about 63 % of the Twitter search code.
The new code is as follows:

from nltk.twitter import Query
from nltk.twitter import credsfromfile
from langdetect import detect
from datetime import datetime
import dataset
import pickle
from datetime import timedelta
import dautil as dl
def hrefs_from_text(str):
    res = str
    for s in str.split():
        if s.startswith('http'):
            res = str.replace(s, '<a href=\"{0}\">{0}< a>'.format(s))
    return res
def hours_from_now(dt):
    diff = int(datetime.strftime(, '%s')) - \
        int(datetime.strftime(dt, '%s'))
    return diff 3600
def write_file():
    with open('twitter.html', 'w') as html:
        yesterday = - timedelta(1)
        res = db.query('SELECT html FROM twitter_searches \
                       WHERE search_date > \"{}\"'.
        for row in res:
        html.write('< ol>< body>< html>')
def search():
    oauth = credsfromfile()
    client = Query(**oauth)
    terms = set()
    with open('terms.pkl', 'rb') as f:
        terms = pickle.load(f)
    searches = 0
    li_html = '<li>name={0} created={1} favorited={2} retweeted={3} \
        {4} query={5}< li>'
    for term in terms:
        searches += 1
        row = twitter_searches.find_one(query=term)
        if row is not None:
            if hours_from_now(row['search_date']) < 24:
        tweets = client.search_tweets(keywords=term + ' http -RT',
                                      lang='en', limit=5)
        for t in tweets:
            if int(t['favorite_count']) == 0:
                log.debug('No favorites')
            text = t['text']
            dt = datetime.strptime(t['created_at'],
                                   '%a %b %d %H:%M:%S %z %Y')
            if hours_from_now(dt) > 24:
            if detect(text) != 'en':
                log.debug('Not english: {}'.format(text))
            log.debug('Searching for {}'.format(term))
            uname = t['user']['screen_name']
            uname_html = '<a href="https: {0}">{0}< a>'
            html = li_html.format(uname_html.format(uname), t['created_at'],
                                  t['favorite_count'], t['retweet_count'],
                                  hrefs_from_text(text), term)
                                    ['query', 'html'])
        if searches == 100:
if __name__ == "__main__":
    log = dl.log_api.conf_logger(__name__)
    db = dataset.connect('sqlite:   sonar.db')
    twitter_searches = db['twitter_searches']
Series NavigationSocial News in 1000 Steps – Step 6Social News in 1000 Steps – Step 8
By the author of NumPy Beginner's Guide, NumPy Cookbook and Instant Pygame. If you enjoyed this post, please consider leaving a comment or subscribing to the RSS feed to have future articles delivered to your feed reader.
This entry was posted in programming and tagged . Bookmark the permalink.