In [ ]:
import numpy as np
import pandas as pd
Exercise 1¶
Using Pandas, create a DataFrame with the following data of Twitter accounts:
username | followers | following | tweets |
---|---|---|---|
katyperry | 106,862,412 | 215 | 9,375 |
justinbieber | 104,998,286 | 298,668 | 30,501 |
BarackObama | 104,576,505 | 615,858 | 15,579 |
rihanna | 89,454,314 | 1,091 | 10,238 |
taylorswift13 | 83,293,215 | 0 | 103 |
ladygaga | 77,793,824 | 125,436 | 8,938 |
In [ ]:
username = ['katyperry', 'justinbieber', 'BarackObama', 'rihanna', 'taylorswift13', 'ladygaga']
followers = [106_862_412, 104_998_286, 104_576_505, 89_454_314, 83_293_215, 77_793_824]
following = [215, 298_668, 615_858, 1_091, 0, 125_436]
tweets = [9_375, 30_501, 15_579, 10_238, 103, 8_938]
In [ ]:
# your code goes here
In [ ]:
df = pd.DataFrame({
'username': username,
'followers': followers,
'following': following,
'tweets': tweets
}, columns=['username', 'followers', 'following', 'tweets'])
df
In [ ]:
# your code goes here
In [ ]:
df.index = df['username']
df
In [ ]:
# your code goes here
In [ ]:
df.drop(columns=['username'], inplace=True)
df
Exercise 4¶
Add a new record (row) with Cristiano Ronaldo data:
- index: Cristiano
- followers: 76_575_668
- following: 103
- tweets: 3_347
In [ ]:
# your code goes here
In [ ]:
df = df.append(pd.Series({
'followers': 76_575_668,
'following': 103,
'tweets': 3_347
}, name='Cristiano'))
df
In [ ]:
df.loc['Cristiano'] = pd.Series({'followers': 76_575_668,
'following': 103,
'tweets': 3_347})
df
In [ ]:
sex = ['female', 'male', 'male', 'female', 'female', 'female', 'male']
In [ ]:
# your code goes here
In [ ]:
df['sex'] = sex
df
In [ ]:
# your code goes here
In [ ]:
df[3:5]
In [ ]:
df.iloc[3:5]
In [ ]:
df.loc['rihanna': 'taylorswift13']
In [ ]:
# your code goes here
In [ ]:
df.iloc[3:5, 2].to_frame()
In [ ]:
df.loc['rihanna': 'taylorswift13', 'tweets'].to_frame()
In [ ]:
df[3:5]['tweets'].to_frame()
In [ ]:
# your code goes here
In [ ]:
'following' in df
In [ ]:
# your code goes here
In [ ]:
'shakira' in df.index
In [ ]:
# your code goes here
In [ ]:
df.loc[df['tweets'] > 9_000]
In [ ]:
df[df['tweets'] > 9_000]
In [ ]:
# your code goes here
In [ ]:
df.loc[(df['tweets'] > 9_000) & (df['sex'] == 'female')]
In [ ]:
df[(df['tweets'] > 9_000) & (df['sex'] == 'female')]
Exercise 12¶
Let's add a activity index to our data using the following formula:
$$ activity = \frac{\frac{tweets}{5} + \frac{followers}{100000} + \frac{following}{1000}}{1000} $$In [ ]:
# your code goes here
In [ ]:
df['activity'] = (df['tweets'] / 5 + df['followers'] / 100_000 + df['following'] / 1_000) / 1_000
df
In [ ]:
# your code goes here
In [ ]:
df.loc[:,'tweets'].mean()
In [ ]:
df['tweets'].mean()