Twitter research platform/twitter streaming es.py
From Federal Burro of Information
Jump to navigationJump to search
from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from datetime import datetime from elasticsearch import Elasticsearch import json import time from datetime import datetime import pprint es = Elasticsearch() #Variables that contains the user credentials to access Twitter API access_token = "177545461-t2y1uQ07y9YV9zYfnfiF86TdMP0zxcum7QYImvrI" access_token_secret = "ARAnOkZFpwykqiSkmemSPCo5V0FJiWyW2wYE6iHB1Bkmi" consumer_key = "X2KkJXBaQ4kcosPqYTqBKQ" consumer_secret = "xOz5dDLLYc8RKOUiYrN71xZyhHe4bffoEG8WdgXMJ8U" #This is a basic listener that just prints received tweets to stdout. class StdOutListener(StreamListener): def on_data(self, data): # print data line_object = json.loads(data) # "created_at":"Mon Oct 12 19:42:21 +0000 2015" mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y") ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z" res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data) return True def on_error(self, status): pp = pprint.PrettyPrinter(indent=4) print status pp.pprint(self) if __name__ == '__main__': #This handles Twitter authetification and the connection to Twitter Streaming API l = StdOutListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby' stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])