1. csv
(i) df = ps.read_csv("./input/ecoli.csv", delim_whitespace=True)
(ii) df = pd.read_csv(filepath, names=['sentence', 'label'], sep='\t')
(ii) url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/molecular-biology/promoter-gene-sequences/promoters.data'
names = ['Class', 'id', 'Sequence']
df = pd.read_csv(url, names = names)
2. assign column
pd.columns = ['seq_name', 'mcg', 'gvh', 'lip', 'chg', 'aac', 'alm1', 'alm2', 'site']
3. drop
(i) df = pd.drop('seq_name', axis=1)
(ii) df = pd.drop(columns=['seq_name'])
4. replace
pd.replace(('cp', 'im', 'pp', 'imU', 'om', 'omL', 'imL', 'imS'),(1,2,3,4,5,6,7,8), inplace=True)
5. Statistical Description
pd.describe()
6. shape
pd.shape
7. type
pd.dtypes
8. correlation coefficient
pd.corr(method='pearson')
9. value (type=numpy)
df = pd.values
10. loc
df = pd.loc[:, 'Class']
11. transpose
df = pd.transpose()
12. rename
(i) pd.rename(columns = {57: 'Class'}, inplace = True)
(ii) sequences.rename("sequences")
13. value_counts()
df[name].value_counts()
14. one-hot encoder
df = pd.get_dummies(df)
15. head
pd.head()
16. txt
df = pd.read_table('chimp_data.txt')
17. apply
df['words'] = pd.apply(lambda x: getKmers(x['sequence']), axis=1)
18. merge
df = pd.read_csv('./input/pdb_data_no_dups.csv').merge(pd.read_csv('./input/pdb_data_seq.csv'), how='inner', on='structureId')
19. reset_index
pd.reset_index()
sequences.reset_index(drop=True, inplace=True)
20. dropna
sequences=df[0].dropna()
21. concat
df = pd.concat([df1,df2,df3], axis=1)
22. modify column value
pd['label']='1'
23. df[condiction]
df_yelp = df[df['source'] == 'yelp']
24. unique
pd.Series([2, 1, 3, 3], name='A').unique()
df['source'].unique()
沒有留言:
張貼留言