Twitter research platform/twitter streaming es.py: Difference between revisions

From Federal Burro of Information
Jump to navigationJump to search
No edit summary
No edit summary
Line 7: Line 7:
import json
import json
import time
import time
from datetime import datetime
import pprint
es = Elasticsearch()
es = Elasticsearch()


#Variables that contains the user credentials to access Twitter API
#Variables that contains the user credentials to access Twitter API
access_token = "XXX"
access_token = "177545461-t2y1uQ07y9YV9zYfnfiF86TdMP0zxcum7QYImvrI"
access_token_secret = "XXX"
access_token_secret = "ARAnOkZFpwykqiSkmemSPCo5V0FJiWyW2wYE6iHB1Bkmi"
consumer_key = "XXX"
consumer_key = "X2KkJXBaQ4kcosPqYTqBKQ"
consumer_secret = "XXX"
consumer_secret = "xOz5dDLLYc8RKOUiYrN71xZyhHe4bffoEG8WdgXMJ8U"
 


#This is a basic listener that just prints received tweets to stdout.
#This is a basic listener that just prints received tweets to stdout.
Line 21: Line 24:
         # print data
         # print data
         line_object = json.loads(data)
         line_object = json.loads(data)
         print line_object["id"]
 
        print line_object["created_at"]
         # "created_at":"Mon Oct 12 19:42:21 +0000 2015"
         print time.strptime(line_object["created_at"],"%a %b %d %H:%M:%S +0000 %Y")
         mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
         res = es.index(index="test-index", doc_type='tweet', id=line_object["id"], body=data)
        ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z"
 
         res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data)
         return True
         return True


     def on_error(self, status):
     def on_error(self, status):
        pp = pprint.PrettyPrinter(indent=4)
         print status
         print status
        pp.pprint(self)




if __name__ == '__main__':
if __name__ == '__main__':


     #This handles Twitter authentication and the connection to Twitter Streaming API
     #This handles Twitter authetification and the connection to Twitter Streaming API
     l = StdOutListener()
     l = StdOutListener()
     auth = OAuthHandler(consumer_key, consumer_secret)
     auth = OAuthHandler(consumer_key, consumer_secret)

Revision as of 14:11, 21 October 2015

from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from datetime import datetime
from elasticsearch import Elasticsearch
import json
import time
from datetime import datetime
import pprint
es = Elasticsearch()

#Variables that contains the user credentials to access Twitter API
access_token = "177545461-t2y1uQ07y9YV9zYfnfiF86TdMP0zxcum7QYImvrI"
access_token_secret = "ARAnOkZFpwykqiSkmemSPCo5V0FJiWyW2wYE6iHB1Bkmi"
consumer_key = "X2KkJXBaQ4kcosPqYTqBKQ"
consumer_secret = "xOz5dDLLYc8RKOUiYrN71xZyhHe4bffoEG8WdgXMJ8U"


#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):

    def on_data(self, data):
        # print data
        line_object = json.loads(data)

        # "created_at":"Mon Oct 12 19:42:21 +0000 2015"
        mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
        ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z"

        res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data)
        return True

    def on_error(self, status):
        pp = pprint.PrettyPrinter(indent=4)
        print status
        pp.pprint(self)


if __name__ == '__main__':

    #This handles Twitter authetification and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])