Twitter research platform/twitter streaming es.py: Difference between revisions
From Federal Burro of Information
Jump to navigationJump to search
No edit summary |
No edit summary |
||
Line 7: | Line 7: | ||
import json | import json | ||
import time | import time | ||
import pprint | import pprint | ||
es = Elasticsearch() | es = Elasticsearch() |
Latest revision as of 19:05, 21 March 2024
from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from datetime import datetime from elasticsearch import Elasticsearch import json import time import pprint es = Elasticsearch() #Variables that contains the user credentials to access Twitter API #Variables that contains the user credentials to access Twitter API access_token = "XXX" access_token_secret = "XXX" consumer_key = "XXX" consumer_secret = "XXX" #This is a basic listener that just prints received tweets to stdout. class StdOutListener(StreamListener): def on_data(self, data): # print data line_object = json.loads(data) # "created_at":"Mon Oct 12 19:42:21 +0000 2015" mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y") ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z" res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data) return True def on_error(self, status): pp = pprint.PrettyPrinter(indent=4) print status pp.pprint(self) if __name__ == '__main__': #This handles Twitter authetification and the connection to Twitter Streaming API l = StdOutListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby' stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])