Twitter research platform/twitter streaming es.py: Difference between revisions

From Federal Burro of Information
Jump to navigationJump to search
(Created page with "<pre> from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from datetime import datetime from elasticsearch import Elasticsear...")
 
No edit summary
 
(4 intermediate revisions by the same user not shown)
Line 7: Line 7:
import json
import json
import time
import time
import pprint
es = Elasticsearch()
es = Elasticsearch()


#Variables that contains the user credentials to access Twitter API
#Variables that contains the user credentials to access Twitter API
access_token = "177545461-t2y1uQ07y9YV9zYfnfiF86TdMP0zxcum7QYImvrI"
#Variables that contains the user credentials to access Twitter API
access_token_secret = "ARAnOkZFpwykqiSkmemSPCo5V0FJiWyW2wYE6iHB1Bkmi"
access_token = "XXX"
consumer_key = "X2KkJXBaQ4kcosPqYTqBKQ"
access_token_secret = "XXX"
consumer_secret = "xOz5dDLLYc8RKOUiYrN71xZyhHe4bffoEG8WdgXMJ8U"
consumer_key = "XXX"
 
consumer_secret = "XXX"


#This is a basic listener that just prints received tweets to stdout.
#This is a basic listener that just prints received tweets to stdout.
Line 22: Line 23:
         # print data
         # print data
         line_object = json.loads(data)
         line_object = json.loads(data)
         print line_object["id"]
 
        print line_object["created_at"]
         # "created_at":"Mon Oct 12 19:42:21 +0000 2015"
         print time.strptime(line_object["created_at"],"%a %b %d %H:%M:%S +0000 %Y")
         mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
         res = es.index(index="test-index", doc_type='tweet', id=line_object["id"], body=data)
        ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z"
 
         res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data)
         return True
         return True


     def on_error(self, status):
     def on_error(self, status):
        pp = pprint.PrettyPrinter(indent=4)
         print status
         print status
        pp.pprint(self)





Latest revision as of 19:05, 21 March 2024

from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from datetime import datetime
from elasticsearch import Elasticsearch
import json
import time
import pprint
es = Elasticsearch()

#Variables that contains the user credentials to access Twitter API
#Variables that contains the user credentials to access Twitter API
access_token = "XXX"
access_token_secret = "XXX"
consumer_key = "XXX"
consumer_secret = "XXX"

#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):

    def on_data(self, data):
        # print data
        line_object = json.loads(data)

        # "created_at":"Mon Oct 12 19:42:21 +0000 2015"
        mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
        ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z"

        res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data)
        return True

    def on_error(self, status):
        pp = pprint.PrettyPrinter(indent=4)
        print status
        pp.pprint(self)


if __name__ == '__main__':

    #This handles Twitter authetification and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])