Twitter research platform/twitter streaming es.py: Difference between revisions
From Federal Burro of Information
Jump to navigationJump to search
(Created page with "<pre> from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from datetime import datetime from elasticsearch import Elasticsear...") |
No edit summary |
||
(4 intermediate revisions by the same user not shown) | |||
Line 7: | Line 7: | ||
import json | import json | ||
import time | import time | ||
import pprint | |||
es = Elasticsearch() | es = Elasticsearch() | ||
#Variables that contains the user credentials to access Twitter API | #Variables that contains the user credentials to access Twitter API | ||
access_token = " | #Variables that contains the user credentials to access Twitter API | ||
access_token_secret = " | access_token = "XXX" | ||
consumer_key = " | access_token_secret = "XXX" | ||
consumer_secret = " | consumer_key = "XXX" | ||
consumer_secret = "XXX" | |||
#This is a basic listener that just prints received tweets to stdout. | #This is a basic listener that just prints received tweets to stdout. | ||
Line 22: | Line 23: | ||
# print data | # print data | ||
line_object = json.loads(data) | line_object = json.loads(data) | ||
# "created_at":"Mon Oct 12 19:42:21 +0000 2015" | |||
mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y") | |||
res = es.index(index=" | ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z" | ||
res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data) | |||
return True | return True | ||
def on_error(self, status): | def on_error(self, status): | ||
pp = pprint.PrettyPrinter(indent=4) | |||
print status | print status | ||
pp.pprint(self) | |||
Latest revision as of 19:05, 21 March 2024
from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from datetime import datetime from elasticsearch import Elasticsearch import json import time import pprint es = Elasticsearch() #Variables that contains the user credentials to access Twitter API #Variables that contains the user credentials to access Twitter API access_token = "XXX" access_token_secret = "XXX" consumer_key = "XXX" consumer_secret = "XXX" #This is a basic listener that just prints received tweets to stdout. class StdOutListener(StreamListener): def on_data(self, data): # print data line_object = json.loads(data) # "created_at":"Mon Oct 12 19:42:21 +0000 2015" mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y") ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z" res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data) return True def on_error(self, status): pp = pprint.PrettyPrinter(indent=4) print status pp.pprint(self) if __name__ == '__main__': #This handles Twitter authetification and the connection to Twitter Streaming API l = StdOutListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby' stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])