Twitter research platform/twitter streaming es.py: Difference between revisions

From Federal Burro of Information
Jump to navigationJump to search
No edit summary
No edit summary
 
(3 intermediate revisions by the same user not shown)
Line 7: Line 7:
import json
import json
import time
import time
import pprint
es = Elasticsearch()
es = Elasticsearch()


#Variables that contains the user credentials to access Twitter API
#Variables that contains the user credentials to access Twitter API
#Variables that contains the user credentials to access Twitter API
access_token = "XXX"
access_token = "XXX"
Line 21: Line 23:
         # print data
         # print data
         line_object = json.loads(data)
         line_object = json.loads(data)
         print line_object["id"]
 
        print line_object["created_at"]
         # "created_at":"Mon Oct 12 19:42:21 +0000 2015"
         print time.strptime(line_object["created_at"],"%a %b %d %H:%M:%S +0000 %Y")
         mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
         res = es.index(index="test-index", doc_type='tweet', id=line_object["id"], body=data)
        ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z"
 
         res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data)
         return True
         return True


     def on_error(self, status):
     def on_error(self, status):
        pp = pprint.PrettyPrinter(indent=4)
         print status
         print status
        pp.pprint(self)




if __name__ == '__main__':
if __name__ == '__main__':


     #This handles Twitter authentication and the connection to Twitter Streaming API
     #This handles Twitter authetification and the connection to Twitter Streaming API
     l = StdOutListener()
     l = StdOutListener()
     auth = OAuthHandler(consumer_key, consumer_secret)
     auth = OAuthHandler(consumer_key, consumer_secret)

Latest revision as of 19:05, 21 March 2024

from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from datetime import datetime
from elasticsearch import Elasticsearch
import json
import time
import pprint
es = Elasticsearch()

#Variables that contains the user credentials to access Twitter API
#Variables that contains the user credentials to access Twitter API
access_token = "XXX"
access_token_secret = "XXX"
consumer_key = "XXX"
consumer_secret = "XXX"

#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):

    def on_data(self, data):
        # print data
        line_object = json.loads(data)

        # "created_at":"Mon Oct 12 19:42:21 +0000 2015"
        mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
        ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z"

        res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data)
        return True

    def on_error(self, status):
        pp = pprint.PrettyPrinter(indent=4)
        print status
        pp.pprint(self)


if __name__ == '__main__':

    #This handles Twitter authetification and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])