parsing - How to separate text from twitter streaming JSON responses and run analysis on text with python? -
i trying use twitter api run sentiment analysis on text. running issue not understanding way separate text each tweet , running sentiment polarity analysis provided in textblob library. further more pull on english tweets. output in json.
here code produce tweets based on keywords (in case "usd", "euro", "loonie") , lame attempt @ storing text , using result in variable:
from tweepy.streaming import streamlistener tweepy import oauthhandler tweepy import stream import json import re import pandas pd import matplotlib.pyplot plt #variables contains user credentials access twitter api access_token = "xxxx" access_token_secret = "xxxx" consumer_key = "xxxx" consumer_secret = "xxxx" #this basic listener prints received tweets stdout. class stdoutlistener(streamlistener): def on_data(self, data): print data return true def on_error(self, status): print status if __name__ == '__main__': #this handles twitter authentication , connection twitter streaming api l = stdoutlistener() auth = oauthhandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = stream(auth, l) #this line filter twitter streams capture data keywords: 'python', 'javascript', 'ruby' stream.filter(track=['euro', 'dollar', 'loonie', ] ) tweets_data_path = stream.filter tweets_data = [] tweets_file = open(tweets_data_path, "r") line in tweets_file: try: tweet = json.loads(line) tweets_data.append(tweet) except: continue print len(tweets_data) tweets['text'] = map(lambda tweet: tweet['text'], tweets_data) wiki = textblob(tweets['text']) r = wiki.sentiment.polarity print r
this output looks like:
{"created_at":"sun jun 14 23:43:31 +0000 2015","id":610231121016524801,"id_str":"610231121016524801","text":"rt @amirulimannn: rm6 diperlukan utk tukar kpd 1pound.\nrm3 diperlukan utk tukar kpd 1s'pore dollar.\n\ngraf matawang jatuh. tak sedih ke? htt\u2026","source":"\u003ca href=\"http://twitter.com/download/iphone\" rel=\"nofollow\"\u003etwitter iphone\u003c/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":42642877,"id_str":"42642877","name":"wny","screen_name":"waaannnyyy","location":"dirgahayu darul makmur","url":null,"description":"aku serba tiada, aku kekurangan.","protected":false,"verified":false,"followers_count":320,"friends_count":239,"listed_count":1,"favourites_count":4344,"statuses_count":34408,"created_at":"tue may 26 15:10:28 +0000 2009","utc_offset":28800,"time_zone":"kuala lumpur","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"ffffff","profile_background_image_url":"http://pbs.twimg.com/profile_background_images/433201191825047553/pm76m-v2.jpeg","profile_background_image_url_https":"https://pbs.twimg.com/profile_background_images/433201191825047553/pm76m-v2.jpeg","profile_background_tile":true,"profile_link_color":"dd2e44","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"efefef","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/609402965795835904/mm6jjrro_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/609402965795835904/mm6jjrro_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/42642877/1415486321","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"sat jun 13 03:33:29 +0000 2015","id":609564219495706624,"id_str":"609564219495706624","text":"rm6 diperlukan utk tukar kpd 1pound.\nrm3 diperlukan utk tukar kpd 1s'pore dollar.\n\ngraf matawang jatuh. tak sedih ke? http://t.co/dum4skb6uk","source":"\u003ca href=\"http://twitter.com/download/android\" rel=\"nofollow\"\u003etwitter android\u003c/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":481856658,"id_str":"481856658","name":"seorang iman","screen_name":"amirulimannn","location":"+06my","url":"http://instagram.com/amirulimannn","description":"i wanna drown myself in bottle of perfume","protected":false,"verified":false,"followers_count":723,"friends_count":834,"listed_count":2,"favourites_count":4810,"statuses_count":50981,"created_at":"fri feb 03 07:49:55 +0000 2012","utc_offset":28800,"time_zone":"kuala lumpur","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"ad0a20","profile_background_image_url":"http://pbs.twimg.com/profile_background_images/378800000139426816/61dhbnyy.jpeg","profile_background_image_url_https":"https://pbs.twimg.com/profile_background_images/378800000139426816/61dhbnyy.jpeg","profile_background_tile":false,"profile_link_color":"e36009","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"24210e","profile_text_color":"89b5a2","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/592744790283911169/dw7s73wa_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/592744790283911169/dw7s73wa_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/481856658/1428379855","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweet_count":1321,"favorite_count":229,"entities":{"hashtags":[],"trends":[],"urls":[],"user_mentions":[],"symbols":[],"media":[{"id":609564142886760448,"id_str":"609564142886760448","indices":[118,140],"media_url":"http://pbs.twimg.com/media/chwbw7yusaayaew.jpg","media_url_https":"https://pbs.twimg.com/media/chwbw7yusaayaew.jpg","url":"http://t.co/dum4skb6uk","display_url":"pic.twitter.com/dum4skb6uk","expanded_url":"http://twitter.com/amirulimannn/status/609564219495706624/photo/1","type":"photo","sizes":{"small":{"w":340,"h":340,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":600,"h":600,"resize":"fit"},"large":{"w":1024,"h":1024,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":609564142886760448,"id_str":"609564142886760448","indices":[118,140],"media_url":"http://pbs.twimg.com/media/chwbw7yusaayaew.jpg","media_url_https":"https://pbs.twimg.com/media/chwbw7yusaayaew.jpg","url":"http://t.co/dum4skb6uk","display_url":"pic.twitter.com/dum4skb6uk","expanded_url":"http://twitter.com/amirulimannn/status/609564219495706624/photo/1","type":"photo","sizes":{"small":{"w":340,"h":340,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":600,"h":600,"resize":"fit"},"large":{"w":1024,"h":1024,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"in"},"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"trends":[],"urls":[],"user_mentions":[{"screen_name":"amirulimannn","name":"seorang iman","id":481856658,"id_str":"481856658","indices":[3,16]}],"symbols":[],"media":[{"id":609564142886760448,"id_str":"609564142886760448","indices":[139,140],"media_url":"http://pbs.twimg.com/media/chwbw7yusaayaew.jpg","media_url_https":"https://pbs.twimg.com/media/chwbw7yusaayaew.jpg","url":"http://t.co/dum4skb6uk","display_url":"pic.twitter.com/dum4skb6uk","expanded_url":"http://twitter.com/amirulimannn/status/609564219495706624/photo/1","type":"photo","sizes":{"small":{"w":340,"h":340,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":600,"h":600,"resize":"fit"},"large":{"w":1024,"h":1024,"resize":"fit"}},"source_status_id":609564219495706624,"source_status_id_str":"609564219495706624"}]},"extended_entities":{"media":[{"id":609564142886760448,"id_str":"609564142886760448","indices":[139,140],"media_url":"http://pbs.twimg.com/media/chwbw7yusaayaew.jpg","media_url_https":"https://pbs.twimg.com/media/chwbw7yusaayaew.jpg","url":"http://t.co/dum4skb6uk","display_url":"pic.twitter.com/dum4skb6uk","expanded_url":"http://twitter.com/amirulimannn/status/609564219495706624/photo/1","type":"photo","sizes":{"small":{"w":340,"h":340,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":600,"h":600,"resize":"fit"},"large":{"w":1024,"h":1024,"resize":"fit"}},"source_status_id":609564219495706624,"source_status_id_str":"609564219495706624"}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"in","timestamp_ms":"1434325411453"}
from tweepy.streaming import streamlistener tweepy import oauthhandler tweepy import stream import json # variables contains user credentials access twitter api access_token = '' access_token_secret = '' consumer_key = '' consumer_secret = '' # basic listener prints received tweets stdout. class stdoutlistener(streamlistener): def on_data(self, data): json_load = json.loads(data) texts = json_load['text'] coded = texts.encode('utf-8') s = str(coded) print(s[2:-1]) return true def on_error(self, status): print(status) auth = oauthhandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = stream(auth, stdoutlistener()) # line filter twitter streams capture data keywords: 'python', 'javascript', 'ruby' stream.filter(track=['euro', 'dollar', 'loonie', ], languages=['en'])
for original question json: can load data asit streams using json.loads()
. reason other stuff don't charmap
error when you're extracting data twitter onto python. reason s[2:-1]
rid of character encoding utf-8.
for english tweets can filter directly stream using languages=['en']
.
i'm not familiar textblob library, can store through multiple ways, write information onto file , when run textblob read directly file. can replace print(s[2:-1])
or add it:
myfile = open('text.csv','a') myfile.write(s[2:-1]) myfile.write('\n') # adds line between tweets myfile.close()
you can read using file = open('text.csv', 'r')
sentiment analysis. don't forget add file.close()
anytime open file.
Comments
Post a Comment