Twitter research platform/twitter streaming es.py: Difference between revisions

Revision as of 14:12, 21 October 2015

from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from datetime import datetime
from elasticsearch import Elasticsearch
import json
import time
from datetime import datetime
import pprint
es = Elasticsearch()

#Variables that contains the user credentials to access Twitter API
#Variables that contains the user credentials to access Twitter API
access_token = "XXX"
access_token_secret = "XXX"
consumer_key = "XXX"
consumer_secret = "XXX"

#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):

    def on_data(self, data):
        # print data
        line_object = json.loads(data)

        # "created_at":"Mon Oct 12 19:42:21 +0000 2015"
        mydatetime = datetime.strptime(line_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y")
        ltime = mydatetime.strftime("%Y-%m-%dT%H:%M:%S") + ".%03d" % (mydatetime.microsecond / 1000) + "Z"

        res = es.index(index="twitterindex_v3", doc_type='tweet', id=line_object["id"], timestamp=ltime, body=data)
        return True

    def on_error(self, status):
        pp = pprint.PrettyPrinter(indent=4)
        print status
        pp.pprint(self)


if __name__ == '__main__':

    #This handles Twitter authetification and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    stream.filter(track=['mulcair', 'harper', 'trudeau', 'elxn42'])

Twitter research platform/twitter streaming es.py: Difference between revisions

Revision as of 14:12, 21 October 2015

Navigation menu

Page actions

Page actions

Personal tools

Navigation

Search

Tools