Suicide statistics analysis of Indian states (2001 - 2012)¶

import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go
import numpy as np
from IPython.display import IFrame
pyo.init_notebook_mode(connected=True)

Importing dataset and checking its dimensions and data¶

# Importing the data (csv file) into dataframe
df = pd.read_csv("Suicides in India 2001-2012 Updated.csv")
print(df.shape)
df.head()

(236583, 7)

# Calculating total suicide count for each state
total_suicides = df.groupby('State').sum()
total_suicides.drop('Year',axis=1,inplace=True)

count = list(total_suicides['Total'])

states = list(total_suicides.index)

import random
colors = {}
f = 0
# Assigning random color codes for each state
for i in states:
    if f == 0:
        colors[i] = 'rgb('+str(random.randint(10,125))+','+str(random.randint(128,200))+','+str(random.randint(0,15))+')'
        f = 1
    else:
        colors[i] = 'rgb('+str(random.randint(10,125))+','+str(random.randint(128,190))+','+str(random.randint(192,255))+')'
        f = 0

# Creating bar plot
bars = []
label = "Suicides count in Indian States"
bars.append(go.Bar(x=states,
                   y=count,
                   name=label,
                   marker={'color': list(colors.values())}))

layout = go.Layout(xaxis = dict(title=dict(text="Indian States"), automargin=True),
                   yaxis = dict(title=dict(text="Suicides count"), automargin=True))
fig = go.Figure(data=bars, layout = layout)
pyo.iplot(fig)

The above plot tells us clearly that Maharashtra stands first in the suicide count.

West Bengal takes the second place in this category.

Tamil Nadu takes the third place in this category.

# Suicides count by year
s_year = df.groupby('Year').sum()

# Creating line plot with scatter points
x = list(s_year.index)
y = list(s_year['Total'])
line = go.Scatter(
        x = x,
        y = y,
        mode = 'lines+markers',
        name = 'lines+markers'
)
layout = go.Layout(xaxis = dict(title=dict(text="Year"), automargin=True),
                   yaxis = dict(title=dict(text="Suicides count"), automargin=True))
fig = go.Figure(data=[line], layout=layout)
pyo.iplot(fig)

The above plot clearly shows the growth of suicide rate in years.

# Suicides count by gender
s_gender = df.groupby('Gender').sum()
s_gender.drop('Year',axis=1,inplace=True)

# Creating a pie chart for male-female ratio of suicides
pie = go.Pie(labels=list(s_gender.index), values=list(s_gender['Total']),
               hoverinfo='label+percent', textinfo='value', 
               textfont=dict(size=20),
               title=dict(text="Suicide count : 2001-2012"),
               marker=dict(colors=['#ff00ff','#00ffff'], 
                           line=dict(color='#ffffff', width=2)))
pyo.iplot([pie])

Two things to infer from the above pie chart.

Male in India commits more suicides than the female.
Female are more stronger than male in terms of inner-strength (mind).

# Calculating male-female suicide counts for every year
yg = df.groupby(['Year','Gender']).sum()

year = list(yg.index.levels[0])
gender = list(yg.index.levels[1])

female = [int(yg.loc[(i,'Female')].values) for i in year]
male = [int(yg.loc[(i,'Male')].values) for i in year]

# Creating two bars
# Creating grouped bar plot for male-female suicide counts by year
bar1 = go.Bar(
    x = year,
    y = female,
    name = 'Female',
    marker = dict(color='#ff00ff')
)
bar2 = go.Bar(
    x = year,
    y = male,
    name = 'Male',
    marker = dict(color='#00ffff')
)
final = [bar1,bar2]
layout = go.Layout(
    xaxis = dict(tickangle=-45, title=dict(text='Years'), automargin=True),
    yaxis = dict(title=dict(text='Suicide count')),
    barmode = 'group'
)
fig = go.Figure(data=final, layout=layout)
pyo.iplot(fig)

Finally, we can conclude that in the years 2001-2012, a large number of male committed suicide and that rate is only increased.

	State	Year	Type_code	Type	Gender	Age_group
0	A & N Islands	2001	Causes	Illness (Aids/STD)	Female	0-14
1	A & N Islands	2001	Causes	Bankruptcy or Sudden change in Economic	Female	0-14
2	A & N Islands	2001	Causes	Cancellation/Non-Settlement of Marriage	Female	0-14
3	A & N Islands	2001	Causes	Physical Abuse (Rape/Incest Etc.)	Female	0-14
4	A & N Islands	2001	Causes	Dowry Dispute	Female	0-14