網頁

2017年1月8日 星期日

資料結構2

pandas data frame
還記得 dict 這個資料結構嗎?
將一個 dict 的資料結構轉換為 data frame 是最方便的方法

import pandas as pd # 引用套件並縮寫為 
pd name = ["Monkey D. Luffy", "Roronoa Zoro", "Nami", "Usopp", "Vinsmoke Sanji", "Tony Tony Chopper", "Nico Robin", "Franky", "Brook"] 
age = [19, 21, 20, 19, 21, 17, 30, 36, 90] 
straw_hat_dict = {"name": name, "age": age }
straw_hat_dict_df = pd.DataFrame(straw_hat_dict)
print(type(straw_hat_dict_df))
print(straw_hat_dict_df.dtypes)
straw_hat_dict_df

out:
<class 'pandas.core.frame.DataFrame'>
age      int64
name    object
dtype: object
agename
019Monkey D. Luffy
121Roronoa Zoro
220Nami
319Usopp
421Vinsmoke Sanji
517Tony Tony Chopper
630Nico Robin
736Franky
890Brook
ps. 索引值起始是0

包含多種資料類型,不會像 ndarray 僅容納單一資料類型

import pandas as pd # 引用套件並縮寫為 pd

name = ["Monkey D. Luffy", "Roronoa Zoro", "Nami", "Usopp", "Vinsmoke Sanji", "Tony Tony Chopper", "Nico Robin", "Franky", "Brook"]
age = [19, 21, 20, 19, 21, 17, 30, 36, 90]
is_male = [True, True, False, True, True, True, False, True, True]

straw_hat_dict = {"name": name,
                  "age": age,
                  "is_male": is_male
}

straw_hat_df = pd.DataFrame(straw_hat_dict)

print(straw_hat_df.ix[0, :]) # 選第一個觀測值
print("---")
print(straw_hat_df.ix[:, "name"]) # 選第一欄
print("---")
print(straw_hat_df.ix[0, "name"]) # 選第一個觀測值的第一欄
print("---")
straw_hat_df

out:
age 19 i
s_male True name Monkey D. Luffy 
Name: 0, dtype: object --- 0 Monkey D. Luffy 1 Roronoa Zoro 2 Nami 3 Usopp 4 Vinsmoke Sanji 5 Tony Tony Chopper 6 Nico Robin 7 Franky 8 Brook Name: name, dtype: object --- Monkey D. Luffy ---
Out[46]:
ageis_malename
019TrueMonkey D. Luffy
121TrueRoronoa Zoro
220FalseNami
319TrueUsopp
421TrueVinsmoke Sanji
517TrueTony Tony Chopper
630FalseNico Robin
736TrueFranky
890TrueBrook

import pandas as pd # 引用套件並縮寫為 pd


name = ["Monkey D. Luffy", "Roronoa Zoro", "Nami", "Usopp", "Vinsmoke Sanji", "Tony Tony Chopper", "Nico Robin", "Franky", "Brook"]
age = [19, 21, 20, 19, 21, 17, 30, 36, 90]
is_male = [True, True, False, True, True, True, False, True, True]


straw_hat_dict = {"name": name,
"age": age,
"is_male": is_male
}


straw_hat_df = pd.DataFrame(straw_hat_dict)


filter_gender = straw_hat_df.ix[:, "is_male"] <= False
filter_age = straw_hat_df.ix[:, "age"] <= 30
straw_hat_df[filter_gender & filter_age]

out:
ageis_malename
220FalseNami
630FalseNico Robin
data frame 的概觀

import pandas as pd # 引用套件並縮寫為 pd

name = ["Monkey D. Luffy", "Roronoa Zoro", "Nami", "Usopp", "Vinsmoke Sanji", "Tony Tony Chopper", "Nico Robin", "Franky", "Brook"]
age = [19, 21, 20, 19, 21, 17, 30, 36, 90]
is_male = [True, True, False, True, True, True, False, True, True]

straw_hat_dict = {"name": name,
                  "age": age,
                  "is_male": is_male
}

straw_hat_df = pd.DataFrame(straw_hat_dict)


print(straw_hat_df.shape) # 回傳列數與欄數
print("---")
print(straw_hat_df.describe()) # 回傳描述性統計
print("---")
print(straw_hat_df.head(3)) # 回傳前三筆觀測值
print("---")
print(straw_hat_df.tail(3)) # 回傳後三筆觀測值
print("---")
print(straw_hat_df.columns) # 回傳欄位名稱
print("---")
print(straw_hat_df.index) # 回傳索引值



沒有留言:

張貼留言