Twitter research platform/twitter streaming es.py

from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from datetime import datetime
from elasticsearch import Elasticsearch
import json
import time
es = Elasticsearch()

#Variables that contains the user credentials to access Twitter API
access_token = "XXX"
access_token_secret = "XXX"
consumer_key = "XXX"
consumer_secret = "XXX"

#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):

    def on_data(self, data):
        # print data
        line_object = json.loads(data)
        print line_object["id"]
        print line_object["created_at"]
        print time.strptime(line_object["created_at"],"%a %b %d %H:%M:%S +0000 %Y")
        res = es.index(index="test-index", doc_type='tweet', id=line_object["id"], body=data)
        return True

    def on_error(self, status):
        print status


if __name__ == '__main__':

    #This handles Twitter authentication and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])

Twitter research platform/twitter streaming es.py

Navigation menu

Page actions

Page actions

Personal tools

Navigation

Search

Tools