import pandas as pd
import numpy as np
import seaborn as sns

np.__version__

'1.26.4'

pd.__version__

'2.2.2'

df = pd.DataFrame({
    'feature': [0.2, -1.1, 1.6, 5.4], 
    'labels': [1, 1, 0, 2]
})

df

df = df.rename(columns={'labels': 'label'})

df

iris = sns.load_dataset('iris')

iris = sns.load_dataset('iris')

iris.sort_values('species', ascending=False)

iris.sort_values('species', ascending=False)

iris.species.value_counts().to_frame('n')

iris

len(iris[(iris.petal_length <= 1.4) | (iris.petal_length >= 1.6)])

137

iris[(iris.petal_length <= 1.4) | (iris.petal_length >= 1.6)].shape[0]

137

iris['min_petal'] = iris.apply(lambda x: min(x.petal_length, \
                                x.petal_width), axis=1)

iris.head()

iris.tail()

sum(iris.petal_length <= iris.petal_width)

0

iris[['petal_length','petal_width']].idxmin(axis=1).value_counts()

petal_width    150
Name: count, dtype: int64

pd.pivot_table(iris, 
       values = ["sepal_width", "petal_width"], 
       columns = ["species"], 
       aggfunc = {'mean', 'median'})  # Braces groups rows

pd.pivot_table(iris, 
       values = ["sepal_width", "petal_width"], 
       columns = ["species"], 
       aggfunc = ['mean', 'median']) # Brackets groups cols

pd.pivot_table(iris, 
       values = ["sepal_width", "petal_width"], 
       columns = ["species"], 
       aggfunc = ['mean', 'median']).T # Brackets groups cols

left = pd.DataFrame({"key": ["jamie", "bill"], "age": [15, 22]})
left = left.set_index('key')

left

right = pd.DataFrame({"key": ["jamie", "bill", "asher"], "height": [6, 5, 7]})
right = right.set_index('key')

right

inner = pd.merge(left, right, on="key", how="inner")

inner

outer = left.join(right, how='outer')

outer

ser = pd.Series([1, 1, 2, 3, 5, 8])
vals = ser.values
resh = vals.reshape(2,3)

resh

array([[1, 1, 2],
       [3, 5, 8]])

resh.shape

(2, 3)

cats = pd.DataFrame({
    'breed':['persian','persian','siamese','himalayan','burmese'], 
    'color':['calico','white','seal point','cream','sable']
})

cats = pd.DataFrame({
    'breed':['persian', 'persian', 'siamese', 'himalayan', 'burmese'], 
    'color':['calico', 'white', 'seal point', 'cream', 'sable']
})

cats

cats_combo = pd.get_dummies(cats, prefix=['b','c'])

cats_combo

pd.get_dummies(cats)

pd.get_dummies(cats).T

s1 = pd.Series([7.3, -2.5, 3.4, 1.5], \
        index=['a', 'c', 'd', 'e'])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], \
        index=['a', 'c', 'e', 'f', 'g'])

s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

s1 + s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), \
        columns=list('bcd'), index=['Ohio', 'Texas', 'Colorado'])
df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), \
        columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])

df1

df2

df1 + df2

(df1 + df2).fillna(0).style\
    .background_gradient(axis=None, cmap='YlGnBu')

penguins = sns.load_dataset('penguins')

W = penguins.corr(numeric_only=True)

W

N = W.unstack().to_frame('r')

N.index.names = ['f1','f2']

N

X = N.query("f1 < f2").sort_values('r', ascending=False)
X

X.r.plot.barh();

	sepal_length	sepal_width	petal_length	petal_width	species
149	5.9	3.0	5.1	1.8	virginica
111	6.4	2.7	5.3	1.9	virginica
122	7.7	2.8	6.7	2.0	virginica
121	5.6	2.8	4.9	2.0	virginica
120	6.9	3.2	5.7	2.3	virginica
...	...	...	...	...	...
31	5.4	3.4	1.5	0.4	setosa
30	4.8	3.1	1.6	0.2	setosa
29	4.7	3.2	1.6	0.2	setosa
28	5.2	3.4	1.4	0.2	setosa
0	5.1	3.5	1.4	0.2	setosa

	sepal_length	sepal_width	petal_length	petal_width	species
149	5.9	3.0	5.1	1.8	virginica
111	6.4	2.7	5.3	1.9	virginica
122	7.7	2.8	6.7	2.0	virginica
121	5.6	2.8	4.9	2.0	virginica
120	6.9	3.2	5.7	2.3	virginica
...	...	...	...	...	...
31	5.4	3.4	1.5	0.4	setosa
30	4.8	3.1	1.6	0.2	setosa
29	4.7	3.2	1.6	0.2	setosa
28	5.2	3.4	1.4	0.2	setosa
0	5.1	3.5	1.4	0.2	setosa

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	virginica
146	6.3	2.5	5.0	1.9	virginica
147	6.5	3.0	5.2	2.0	virginica
148	6.2	3.4	5.4	2.3	virginica
149	5.9	3.0	5.1	1.8	virginica

	sepal_length	sepal_width	petal_length	petal_width	species	min_petal
145	6.7	3.0	5.2	2.3	virginica	2.3
146	6.3	2.5	5.0	1.9	virginica	1.9
147	6.5	3.0	5.2	2.0	virginica	2.0
148	6.2	3.4	5.4	2.3	virginica	2.3
149	5.9	3.0	5.1	1.8	virginica	1.8

	species	setosa	versicolor	virginica
petal_width	mean	0.246	1.326	2.026
petal_width	median	0.200	1.300	2.000
sepal_width	mean	3.428	2.770	2.974
sepal_width	median	3.400	2.800	3.000

Python Pandas Exercises¶

Set Up¶

Exercise 1¶

Exercise 2¶

Exercise 3¶

Exercise 4¶

Exercise 5¶

Exercise 6¶

Exercise 7¶

Exercise 8¶

Exercise 9¶

Exercise 10¶

Exercise 11¶

Exercise 12¶

Demonstration¶

Exercise 13¶

	breed	color
0	persian	calico
1	persian	white
2	siamese	seal point
3	himalayan	cream
4	burmese	sable

	b_burmese	b_himalayan	b_persian	b_siamese	c_calico	c_cream	c_sable	c_seal point	c_white
0	False	False	True	False	True	False	False	False	False
1	False	False	True	False	False	False	False	False	True
2	False	False	False	True	False	False	False	True	False
3	False	True	False	False	False	True	False	False	False
4	True	False	False	False	False	False	True	False	False

	0	1	2	3	4
breed_burmese	False	False	False	False	True
breed_himalayan	False	False	False	True	False
breed_persian	True	True	False	False	False
breed_siamese	False	False	True	False	False
color_calico	True	False	False	False	False
color_cream	False	False	False	True	False
color_sable	False	False	False	False	True
color_seal point	False	False	True	False	False
color_white	False	True	False	False	False

	b	c	d	e
Colorado	NaN	NaN	NaN	NaN
Ohio	3.0	NaN	6.0	NaN
Oregon	NaN	NaN	NaN	NaN
Texas	9.0	NaN	12.0	NaN
Utah	NaN	NaN	NaN	NaN

	b	d
Colorado	0.000000	0.000000
Ohio	3.000000	6.000000
Oregon	0.000000	0.000000
Texas	9.000000	12.000000
Utah	0.000000	0.000000

	bill_length_mm	bill_depth_mm	flipper_length_mm	body_mass_g
bill_length_mm	1.000000	-0.235053	0.656181	0.595110
bill_depth_mm	-0.235053	1.000000	-0.583851	-0.471916
flipper_length_mm	0.656181	-0.583851	1.000000	0.871202
body_mass_g	0.595110	-0.471916	0.871202	1.000000

	feature	labels
0	0.2	1
1	-1.1	1
2	1.6	0
3	5.4	2

	feature	label
0	0.2	1
1	-1.1	1
2	1.6	0
3	5.4	2

	age
key
jamie	15
bill	22

	height
key
jamie	6
bill	5
asher	7