Twitter research platform/twitter streaming es.py

From Federal Burro of Information
Revision as of 13:53, 16 October 2015 by David (talk | contribs) (Created page with "<pre> from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from datetime import datetime from elasticsearch import Elasticsear...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigationJump to search
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from datetime import datetime
from elasticsearch import Elasticsearch
import json
import time
es = Elasticsearch()

#Variables that contains the user credentials to access Twitter API
access_token = "177545461-t2y1uQ07y9YV9zYfnfiF86TdMP0zxcum7QYImvrI"
access_token_secret = "ARAnOkZFpwykqiSkmemSPCo5V0FJiWyW2wYE6iHB1Bkmi"
consumer_key = "X2KkJXBaQ4kcosPqYTqBKQ"
consumer_secret = "xOz5dDLLYc8RKOUiYrN71xZyhHe4bffoEG8WdgXMJ8U"


#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):

    def on_data(self, data):
        # print data
        line_object = json.loads(data)
        print line_object["id"]
        print line_object["created_at"]
        print time.strptime(line_object["created_at"],"%a %b %d %H:%M:%S +0000 %Y")
        res = es.index(index="test-index", doc_type='tweet', id=line_object["id"], body=data)
        return True

    def on_error(self, status):
        print status


if __name__ == '__main__':

    #This handles Twitter authetification and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])