Twitter research platform/twitter streaming es.py
From Federal Burro of Information
Jump to navigationJump to search
from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from datetime import datetime from elasticsearch import Elasticsearch import json import time es = Elasticsearch() #Variables that contains the user credentials to access Twitter API access_token = "177545461-t2y1uQ07y9YV9zYfnfiF86TdMP0zxcum7QYImvrI" access_token_secret = "ARAnOkZFpwykqiSkmemSPCo5V0FJiWyW2wYE6iHB1Bkmi" consumer_key = "X2KkJXBaQ4kcosPqYTqBKQ" consumer_secret = "xOz5dDLLYc8RKOUiYrN71xZyhHe4bffoEG8WdgXMJ8U" #This is a basic listener that just prints received tweets to stdout. class StdOutListener(StreamListener): def on_data(self, data): # print data line_object = json.loads(data) print line_object["id"] print line_object["created_at"] print time.strptime(line_object["created_at"],"%a %b %d %H:%M:%S +0000 %Y") res = es.index(index="test-index", doc_type='tweet', id=line_object["id"], body=data) return True def on_error(self, status): print status if __name__ == '__main__': #This handles Twitter authetification and the connection to Twitter Streaming API l = StdOutListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby' stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])