from __future__ import division
from IPython.display import display
from datetime import datetime
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from code.utils import show_videos
from code.plotting import plot_channel_stats, plot_compressed_channel_stats, plot_sentiment_series
%matplotlib inline


channels = pd.read_csv('channels.csv')
channels[['title', 'url', 'color']]


topics = pd.read_csv('topics.csv')
topics[['title', 'slug']]


all_videos = pd.read_csv('videos.csv', parse_dates=['published_at'])
all_videos.head(3)


num_relevant = all_videos.relevant.sum()
num_total = all_videos.shape[0]
print 'Number of relevant videos: %s' % num_relevant
print 'Total number of videos: %s' % num_total
print 'Percentage of relevant videos: %0.2f%%' % (100*num_relevant/num_total)

Number of relevant videos: 33710
Total number of videos: 186571
Percentage of relevant videos: 18.07%


channel_stats = pd.DataFrame({
    'relevant': all_videos.groupby('channel').relevant.sum().astype(int),
    'total': all_videos.groupby('channel').size()
})
channel_stats['percentage_relevant'] = (100*channel_stats.relevant/channel_stats.total).round(2)
channel_stats.sort_values('percentage_relevant', ascending=False)


absolutes = all_videos.groupby('channel')[topics.slug].sum().astype(int)
display(absolutes)


totals = all_videos.groupby('channel').size()
relatives = 100 * absolutes.divide(totals, axis=0)
display(relatives)


plot_channel_stats(relatives, topics, channels, title='Relative topic coverage\n(% of total # of each channel\'s videos)')


sentiments = pd.read_csv('sentiments.csv')
sentiments[['youtube_id', 'sentiment_score']].head()


videos = all_videos[all_videos.relevant].merge(sentiments, on='youtube_id')


# Some videos with negative sentiment:
videos.sort_values('sentiment_score')[['channel', 'title', 'sentiment_score', 'youtube_id']].head(4)


# Some videos with positive sentiment:
videos.sort_values('sentiment_score', ascending=False)[['channel', 'title', 'sentiment_score', 'youtube_id']].head(4)


sns.distplot(videos.sentiment_score, axlabel=False, ax=plt.gca())
plt.title('Sentiment scores distribution')
plt.gca().get_yaxis().set_visible(False)
plt.xlim(-1,1)
plt.show()


scores = pd.DataFrame(index=channels.sort_values('title').title, columns=topics.slug, )
for channel, group in videos.groupby('channel'):
    for topic in topics.slug:
        scores.loc[channel, topic] = group[group[topic]].sentiment_score.mean()
scores = scores.rename_axis('Topic', axis=1)
scores = scores.rename_axis('Channel', axis=0)
display(scores)
plot_channel_stats(scores, topics, channels, fig_height=10, y_center=True, title='Average sentiment by topic')


plot_compressed_channel_stats(scores, y_center=True, title='Average sentiment by topic')


plot_sentiment_series(videos, topics, channels, start_date=datetime(2015, 1, 1), title='Sentiment evolution during the presidential campaign')


# Separate left-oriented topics from right-oriented topics
left_topics = ['obama', 'clinton', 'democrats', 'liberals']
right_topics = ['trump', 'republicans', 'conservatives']

# Create two new flag columns, one for each mode
videos['left'] = np.any(videos[left_topics], axis=1)
videos['right'] = np.any(videos[right_topics], axis=1)

# Calculate average sentiments for each channel
modes = ['left', 'right']
scores = pd.DataFrame(index=channels.sort_values('title').title, columns=modes)
for channel, group in videos.groupby('channel'):
    for mode in modes:
        scores.loc[channel, mode] = group[group[mode]].sentiment_score.mean()
scores = scores.rename_axis('Topic', axis=1)
scores = scores.rename_axis('Channel', axis=0)
display(scores)


plot_compressed_channel_stats(scores, color=['#50AFE8', '#E61B23'], y_center=True, title='Average sentiment: Left-wing vs Right-wing')


show_videos(videos, ['NDq3Ojmk0mI', 'nLU12dCJpZ8'])


show_videos(videos, ['qwws4b22NIk', 'INNBVixrAgc'])


show_videos(videos, ['qmNDKUk-JuE', 'pL3HOH2YG9I'])


show_videos(videos, ['Wjtm14fDAjQ', 'JmCmNHEGWWs'])


show_videos(videos, ['f5NSJStPxEw', 'tRQm6tGyGbs'])


(np.sum(videos[topics.slug], axis=1) > 1).sum() * 100 / len(videos)

8.2198688855269797


np.sum(videos.left & videos.right) * 100 / len(videos)

6.422236065379253

	title	url	color
0	Fox News	https://www.youtube.com/user/FoxNewsChannel	#5975a4
1	CNN	https://www.youtube.com/user/CNN	#b55d60
2	MSNBC	https://www.youtube.com/user/msnbcleanforward	#5f9e6e
3	CBS News	https://www.youtube.com/user/CBSNewsOnline	#666666

	relevant	total	percentage_relevant
channel
MSNBC	2606	6120	42.58
Fox News	10753	28231	38.09
CNN	14855	100000	14.86
CBS News	5496	52220	10.52

	obama	clinton	trump	democrats	republicans	liberals	conservatives
channel
CBS News	3141	913	1545	101	175	15	34
CNN	8235	2799	4123	149	216	19	72
Fox News	987	3304	6754	273	526	73	129
MSNBC	250	663	1773	74	82	1	16

	obama	clinton	trump	democrats	republicans	liberals	conservatives
channel
CBS News	6.014937	1.748372	2.958637	0.193412	0.335121	0.028725	0.065109
CNN	8.235000	2.799000	4.123000	0.149000	0.216000	0.019000	0.072000
Fox News	3.496157	11.703447	23.924055	0.967022	1.863200	0.258581	0.456944
MSNBC	4.084967	10.833333	28.970588	1.209150	1.339869	0.016340	0.261438

	youtube_id	sentiment_score
0	rkLZEHl6gtc	-0.7
1	v9zqWRzaE0c	0.2
2	Yv2OzJoZtzw	-0.6
3	d9CdoVvG72U	0.4
4	fcCZunx-Ayw	0.3

Identifying bias in the media with sentiment analysis

Methodology¶

Data exploration¶

Sentiment analysis¶

Overall sentiments¶

Evolution through time¶

Left-wing vs Right-wing¶

Limitations and future improvements¶

On subjectivity¶

On context¶

On mixed topics¶

Other possible improvements¶

Conclusion¶

	channel_youtube_id	description	published_at	title	youtube_id	channel	obama	clinton	trump	democrats	conservatives	relevant	republicans	liberals
0	UCXIJgqnII2ZOINSWNOGFThA	Fox News contributor explains	2017-02-03 16:59:45	Turner: New sanctions show Trump's change of h...	mMwcBRjOhTE	Fox News	False	False	True	False	False	True	False	False
1	UCXIJgqnII2ZOINSWNOGFThA	Police arrested protesters during the event	2017-02-03 16:59:19	Violent protests erupt over conservative speak...	RJCvUuoyJvE	Fox News	False	False	False	False	True	True	False	False
2	UCXIJgqnII2ZOINSWNOGFThA	Controversy over president's call with Austral...	2017-02-03 16:00:55	Schlapp, Williams debate Trump's tone with for...	v9cx1l6GJdI	Fox News	False	False	True	False	False	True	False	False

	channel	title	sentiment_score	youtube_id
15112	CNN	OBAMA STATEMENT ON FORCED BUDGET CUTS- WALK UP	-0.9	oy1NAgE4j2U
11081	CNN	Spicer: Russia-Trump report is disgraceful	-0.9	-3xtbsxOnRo
32072	CBS News	Huntsman: Obama has weakened the United States	-0.9	LNIYsmIoYh8
16873	CNN	WH BRIEFING-OBAMA WON'T HOLD US HOSTAGE	-0.9	Q_1_l8IAO1I

	channel	title	sentiment_score	youtube_id
20521	CNN	OBAMA CABINET MTG - BUDGET DEAL-VERY PLEASED	0.9	casIztwAQCc
20645	CNN	OBAMA W COLOMBIAN PRES- MOVING BEYOND SECURITY	0.9	sr1tlS8pltY
11527	CNN	Trump's Supreme Court pick coming right after ...	0.9	MSeQieB5PnM
8165	Fox News	Trump: It's amazing that I did so well in SC	0.9	1f8PKgMCFbQ

Topic	obama	clinton	trump	democrats	republicans	liberals	conservatives
Channel
CBS News	0.0765998	-0.0346112	0.181424	-0.00594059	-0.0194286	-0.1	0.185294
CNN	0.0554827	-0.0035727	0.177813	0	-0.0180556	-0.173684	0.0347222
Fox News	-0.0420466	-0.0848063	0.173912	-0.0355311	-0.0524715	-0.216438	0.0806202
MSNBC	0.1216	0.0307692	0.156458	0.108108	0.1	0	0.125

Topic	left	right
Channel
CBS News	0.0495623	0.161105
CNN	0.0397833	0.166934
Fox News	-0.0737636	0.158329
MSNBC	0.0597092	0.154019

title	sentiment_score	channel	published_at	youtube_id
Can DC adjust to President Trump's swift speed?	0.5	Fox News	2017-01-28 23:59:53	NDq3Ojmk0mI
Did President-elect Trump inherit a divided America?	0.5	Fox News	2016-12-07 21:03:11	nLU12dCJpZ8

title	sentiment_score	channel	published_at	youtube_id
How delegates felt about the Republican National Convention	-0.5	Fox News	2016-07-23 22:46:41	INNBVixrAgc
"Obama Did Not Let Me Down"	-0.5	CBS News	2009-12-27 17:17:29	qwws4b22NIk

title	sentiment_score	channel	published_at	youtube_id
Why are Clinton and Trump doing so poorly with Millennials?	-0.7	Fox News	2016-09-21 19:01:54	qmNDKUk-JuE
Polls show supporters of Clinton, Trump equally enthusiastic	0.7	Fox News	2016-09-16 14:02:32	pL3HOH2YG9I

title	sentiment_score	channel	published_at	youtube_id
Trump slams Clinton's involvement in Wisconsin recount	0.7	Fox News	2016-11-28 15:00:29	JmCmNHEGWWs
Hillary Clinton: Happy to put my record against Trump's lies	0.6	CNN	2016-06-03 20:38:38	Wjtm14fDAjQ

title	sentiment_score	channel	published_at	youtube_id
Trump: Clinton scandal worse than Watergate	-0.7	Fox News	2016-10-18 13:59:05	tRQm6tGyGbs
Clinton: Trump promotes bigotry and paranoia	-0.6	CBS News	2016-08-25 18:59:05	f5NSJStPxEw