Twitter research platform/twitter streaming es.py: Difference between revisions

From Federal Burro of Information
Jump to navigationJump to search
(Created page with "<pre> from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from datetime import datetime from elasticsearch import Elasticsear...")
 
No edit summary
Line 10: Line 10:


#Variables that contains the user credentials to access Twitter API
#Variables that contains the user credentials to access Twitter API
access_token = "177545461-t2y1uQ07y9YV9zYfnfiF86TdMP0zxcum7QYImvrI"
access_token = "XXX"
access_token_secret = "ARAnOkZFpwykqiSkmemSPCo5V0FJiWyW2wYE6iHB1Bkmi"
access_token_secret = "XXX"
consumer_key = "X2KkJXBaQ4kcosPqYTqBKQ"
consumer_key = "XXX"
consumer_secret = "xOz5dDLLYc8RKOUiYrN71xZyhHe4bffoEG8WdgXMJ8U"
consumer_secret = "XXX"
 


#This is a basic listener that just prints received tweets to stdout.
#This is a basic listener that just prints received tweets to stdout.
Line 34: Line 33:
if __name__ == '__main__':
if __name__ == '__main__':


     #This handles Twitter authetification and the connection to Twitter Streaming API
     #This handles Twitter authentication and the connection to Twitter Streaming API
     l = StdOutListener()
     l = StdOutListener()
     auth = OAuthHandler(consumer_key, consumer_secret)
     auth = OAuthHandler(consumer_key, consumer_secret)

Revision as of 13:54, 16 October 2015

from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from datetime import datetime
from elasticsearch import Elasticsearch
import json
import time
es = Elasticsearch()

#Variables that contains the user credentials to access Twitter API
access_token = "XXX"
access_token_secret = "XXX"
consumer_key = "XXX"
consumer_secret = "XXX"

#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):

    def on_data(self, data):
        # print data
        line_object = json.loads(data)
        print line_object["id"]
        print line_object["created_at"]
        print time.strptime(line_object["created_at"],"%a %b %d %H:%M:%S +0000 %Y")
        res = es.index(index="test-index", doc_type='tweet', id=line_object["id"], body=data)
        return True

    def on_error(self, status):
        print status


if __name__ == '__main__':

    #This handles Twitter authentication and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])