Conditional Selection (Boolean Arrays)¶
In [1]:
import pandas as pd
import numpy as np
In [11]:
df = pd.DataFrame({
'Population': [35.467, 63.951, 80.94 , 60.665, 127.061, 64.511, 318.523],
'GDP': [
1785387,
2833687,
3874437,
2167744,
4602367,
2950039,
17348075
],
'Surface Area': [
9984670,
640679,
357114,
301336,
377930,
242495,
9525067
],
'HDI': [
0.913,
0.888,
0.916,
0.873,
0.891,
0.907,
0.915
],
'Continent': [
'America',
'Europe',
'Europe',
'Europe',
'Asia',
'Europe',
'America'
]
}, columns=['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'])
df.index = [
'Canada',
'France',
'Germany',
'Italy',
'Japan',
'United Kingdom',
'United States',
]
In [12]:
df
Out[12]:
In [13]:
df['Population'] > 70
Out[13]:
In [14]:
df.loc[df['Population'] > 70]
Out[14]:
In [15]:
df.loc[df['Population'] > 70, 'Population']
Out[15]:
In [16]:
df.loc[df['Population'] > 70, ['Population', 'GDP']]
Out[16]:
Dropping Stuff¶
In [17]:
df.drop('Canada')
Out[17]:
In [18]:
df.drop(columns=['Surface Area', 'HDI'])
Out[18]:
Operations¶
In [20]:
df[['Population']] / 10
Out[20]:
Modifying DataFrames¶
In [21]:
langs = pd.Series(
['French', 'German', 'Italian'],
index=['France', 'Germany', 'Italy'],
name='Language'
)
In [22]:
df['Language'] = langs
In [23]:
df
Out[23]:
Replacing values per column¶
In [25]:
df['Languages'] = 'English'
In [26]:
df
Out[26]:
Renaming columns¶
In [27]:
df.rename(
columns={
'HDI': 'Human Development Index',
'Anual Popcorn Consumption': 'APC'
}, index={
'United States': 'USA',
'United Kingdom': 'UK',
'Argentina': 'AR'
})
Out[27]:
In [28]:
df.rename(index=str.upper)
Out[28]: