import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go
import numpy as np
from IPython.display import IFrame
pyo.init_notebook_mode(connected=True)
# Importing the data (csv file) into dataframe
df = pd.read_csv("Suicides in India 2001-2012 Updated.csv")
print(df.shape)
df.head()
# Calculating total suicide count for each state
total_suicides = df.groupby('State').sum()
total_suicides.drop('Year',axis=1,inplace=True)
count = list(total_suicides['Total'])
states = list(total_suicides.index)
import random
colors = {}
f = 0
# Assigning random color codes for each state
for i in states:
if f == 0:
colors[i] = 'rgb('+str(random.randint(10,125))+','+str(random.randint(128,200))+','+str(random.randint(0,15))+')'
f = 1
else:
colors[i] = 'rgb('+str(random.randint(10,125))+','+str(random.randint(128,190))+','+str(random.randint(192,255))+')'
f = 0
# Creating bar plot
bars = []
label = "Suicides count in Indian States"
bars.append(go.Bar(x=states,
y=count,
name=label,
marker={'color': list(colors.values())}))
layout = go.Layout(xaxis = dict(title=dict(text="Indian States"), automargin=True),
yaxis = dict(title=dict(text="Suicides count"), automargin=True))
fig = go.Figure(data=bars, layout = layout)
pyo.iplot(fig)
The above plot tells us clearly that Maharashtra stands first in the suicide count.
West Bengal takes the second place in this category.
Tamil Nadu takes the third place in this category.
# Suicides count by year
s_year = df.groupby('Year').sum()
# Creating line plot with scatter points
x = list(s_year.index)
y = list(s_year['Total'])
line = go.Scatter(
x = x,
y = y,
mode = 'lines+markers',
name = 'lines+markers'
)
layout = go.Layout(xaxis = dict(title=dict(text="Year"), automargin=True),
yaxis = dict(title=dict(text="Suicides count"), automargin=True))
fig = go.Figure(data=[line], layout=layout)
pyo.iplot(fig)
The above plot clearly shows the growth of suicide rate in years.
# Suicides count by gender
s_gender = df.groupby('Gender').sum()
s_gender.drop('Year',axis=1,inplace=True)
# Creating a pie chart for male-female ratio of suicides
pie = go.Pie(labels=list(s_gender.index), values=list(s_gender['Total']),
hoverinfo='label+percent', textinfo='value',
textfont=dict(size=20),
title=dict(text="Suicide count : 2001-2012"),
marker=dict(colors=['#ff00ff','#00ffff'],
line=dict(color='#ffffff', width=2)))
pyo.iplot([pie])
Two things to infer from the above pie chart.
# Calculating male-female suicide counts for every year
yg = df.groupby(['Year','Gender']).sum()
year = list(yg.index.levels[0])
gender = list(yg.index.levels[1])
female = [int(yg.loc[(i,'Female')].values) for i in year]
male = [int(yg.loc[(i,'Male')].values) for i in year]
# Creating two bars
# Creating grouped bar plot for male-female suicide counts by year
bar1 = go.Bar(
x = year,
y = female,
name = 'Female',
marker = dict(color='#ff00ff')
)
bar2 = go.Bar(
x = year,
y = male,
name = 'Male',
marker = dict(color='#00ffff')
)
final = [bar1,bar2]
layout = go.Layout(
xaxis = dict(tickangle=-45, title=dict(text='Years'), automargin=True),
yaxis = dict(title=dict(text='Suicide count')),
barmode = 'group'
)
fig = go.Figure(data=final, layout=layout)
pyo.iplot(fig)
Finally, we can conclude that in the years 2001-2012, a large number of male committed suicide and that rate is only increased.