Learn by Coding Examples in Applied Machine Learning

How to Visualize Machine Learning Data in Python using Pandas?

In [7]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")

Univariate Histograms

In [8]:
import matplotlib.pyplot as plt
import pandas

url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv"

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

data = pandas.read_csv(url, names=names)

data.hist(figsize = (10,8))
plt.show()

Univariate Density Plots

In [9]:
import matplotlib.pyplot as plt
import pandas

url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv"

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

data = pandas.read_csv(url, names=names)

data.plot(kind='density', subplots=True, layout=(3,3), sharex=False, figsize = (10,8))
plt.show()

Box and Whisker Plots

In [10]:
import matplotlib.pyplot as plt
import pandas

url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv"

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

data = pandas.read_csv(url, names=names)

data.plot(kind='box', subplots=True, layout=(3,3), sharex=False, sharey=False, figsize = (10,8))
plt.show()

Correction Matrix Plot

In [11]:
import matplotlib.pyplot as plt
import pandas
import numpy

url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv"

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

data = pandas.read_csv(url, names=names)
correlations = data.corr()

# plot correlation matrix
fig = plt.figure(figsize = (10,8))
ax = fig.add_subplot(111)
cax = ax.matshow(correlations, vmin=-1, vmax=1)
fig.colorbar(cax)

ticks = numpy.arange(0,9,1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)

plt.show()

Scatterplot Matrix

In [12]:
import matplotlib.pyplot as plt
import pandas
from pandas.plotting import scatter_matrix

url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv"

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

data = pandas.read_csv(url, names=names)

scatter_matrix(data, figsize = (10,8))
plt.show()
In [ ]: