Twitter research platform/twitter streaming es.py

From Federal Burro of Information
Jump to navigationJump to search
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from datetime import datetime
from elasticsearch import Elasticsearch
import json
import time
es = Elasticsearch()

#Variables that contains the user credentials to access Twitter API
access_token = "XXX"
access_token_secret = "XXX"
consumer_key = "XXX"
consumer_secret = "XXX"

#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):

    def on_data(self, data):
        # print data
        line_object = json.loads(data)
        print line_object["id"]
        print line_object["created_at"]
        print time.strptime(line_object["created_at"],"%a %b %d %H:%M:%S +0000 %Y")
        res = es.index(index="test-index", doc_type='tweet', id=line_object["id"], body=data)
        return True

    def on_error(self, status):
        print status


if __name__ == '__main__':

    #This handles Twitter authentication and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])