# import modules
from bokeh.plotting import figure, output_notebook, show
from bokeh.sampledata.penguins import data
from bokeh.transform import factor_cmap, factor_mark
data.head(10)
species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex | |
---|---|---|---|---|---|---|---|
0 | Adelie | Torgersen | 39.1 | 18.7 | 181.0 | 3750.0 | MALE |
1 | Adelie | Torgersen | 39.5 | 17.4 | 186.0 | 3800.0 | FEMALE |
2 | Adelie | Torgersen | 40.3 | 18.0 | 195.0 | 3250.0 | FEMALE |
3 | Adelie | Torgersen | NaN | NaN | NaN | NaN | NaN |
4 | Adelie | Torgersen | 36.7 | 19.3 | 193.0 | 3450.0 | FEMALE |
5 | Adelie | Torgersen | 39.3 | 20.6 | 190.0 | 3650.0 | MALE |
6 | Adelie | Torgersen | 38.9 | 17.8 | 181.0 | 3625.0 | FEMALE |
7 | Adelie | Torgersen | 39.2 | 19.6 | 195.0 | 4675.0 | MALE |
8 | Adelie | Torgersen | 34.1 | 18.1 | 193.0 | 3475.0 | NaN |
9 | Adelie | Torgersen | 42.0 | 20.2 | 190.0 | 4250.0 | NaN |
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 344 entries, 0 to 343 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 species 344 non-null object 1 island 344 non-null object 2 bill_length_mm 342 non-null float64 3 bill_depth_mm 342 non-null float64 4 flipper_length_mm 342 non-null float64 5 body_mass_g 342 non-null float64 6 sex 333 non-null object dtypes: float64(4), object(3) memory usage: 18.9+ KB
data.describe()
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |
---|---|---|---|---|
count | 342.000000 | 342.000000 | 342.000000 | 342.000000 |
mean | 43.921930 | 17.151170 | 200.915205 | 4201.754386 |
std | 5.459584 | 1.974793 | 14.061714 | 801.954536 |
min | 32.100000 | 13.100000 | 172.000000 | 2700.000000 |
25% | 39.225000 | 15.600000 | 190.000000 | 3550.000000 |
50% | 44.450000 | 17.300000 | 197.000000 | 4050.000000 |
75% | 48.500000 | 18.700000 | 213.000000 | 4750.000000 |
max | 59.600000 | 21.500000 | 231.000000 | 6300.000000 |
SPECIES = sorted(data.species.unique())
MARKERS = ['hex', 'circle_x', 'triangle']
p = figure(title = "Penguin size", background_fill_color="#fafafa", plot_width = 600, plot_height = 400)
p.xaxis.axis_label = 'Flipper Length (mm)'
p.yaxis.axis_label = 'Body Mass (g)'
p.scatter("flipper_length_mm", "body_mass_g", source=data,
legend_group="species", fill_alpha=0.4, size=12,
marker=factor_mark('species', MARKERS, SPECIES),
color=factor_cmap('species', 'Category10_3', SPECIES))
p.legend.location = "top_left"
p.legend.title = "Species"
show(p)
p = figure(title = "Penguin size", background_fill_color="#fafafa")
p.xaxis.axis_label = 'Flipper Length (mm)'
p.yaxis.axis_label = 'Body Mass (g)'
p.scatter("flipper_length_mm", "body_mass_g", source=data,
legend_group="species", fill_alpha=0.4, size=12,
marker=factor_mark('species', MARKERS, SPECIES),
color=factor_cmap('species', 'Category10_3', SPECIES))
p.legend.location = "top_left"
p.legend.title = "Species"
show(p)
data.sex = data.sex.fillna('UNKNOWN')
SEX = sorted(data.sex.unique())
MARKERS = ['hex', 'circle_x', 'triangle']
p = figure(title = "Penguin size", background_fill_color="#fafafa",
# plot_width = 600, plot_height = 400
)
p.xaxis.axis_label = 'Flipper Length (mm)'
p.yaxis.axis_label = 'Body Mass (g)'
p.scatter("flipper_length_mm", "body_mass_g", source=data,
legend_group="sex", fill_alpha=0.4, size=12,
marker=factor_mark('sex', MARKERS, SEX),
color=factor_cmap('sex', 'Category10_3', SEX))
p.legend.location = "top_left"
p.legend.title = "SEX"
show(p)
data.island = data.island.fillna('UNKNOWN')
ISLAND = sorted(data.island.unique())
MARKERS = ['hex', 'circle_x', 'triangle']
p = figure(title = "Penguin size", background_fill_color="#fafafa")
p.xaxis.axis_label = 'Bill Length (mm)'
p.yaxis.axis_label = 'Body Mass (g)'
p.scatter("bill_length_mm", "body_mass_g", source=data,
legend_group="island", fill_alpha=0.4, size=12,
marker=factor_mark('island', MARKERS, ISLAND),
color=factor_cmap('island', 'Category10_3', ISLAND))
p.legend.location = "top_left"
p.legend.title = "ISLAND"
show(p)