1
2
3
# 导入相关包
import pandas as pd
import numpy as np

字典创建为DataFrame

1
2
data = {"grammer":["Python","C++","Java","Go",np.nan,"SQL","PHP","Python"],
"score":[1,2,np.nan,4,5,6,7,10]}
1
2
df = pd.DataFrame(data)
df
grammerscore
0Python1.0
1C++2.0
2JavaNaN
3Go4.0
4NaN5.0
5SQL6.0
6PHP7.0
7Python10.0

取得含有"Python"的行

1
df[df['grammer'] == 'Python']
grammerscore
0Python1.0
7Python10.0

输出所有列名

1
print(df.columns)
Index(['grammer', 'score'], dtype='object')

修改第二列列名为 fraction

1
df.rename(columns={'score':'fraction'},inplace= True)

统计 grammer 列中每种编程语言出现的次数

1
df['grammer'].value_counts()
Python    2
C++       1
Java      1
Go        1
SQL       1
PHP       1
Name: grammer, dtype: int64

将 fraction列的空值,用平均值填充

1
df['fraction'] = df['fraction'].fillna(df['fraction'].mean())
1
df
grammerfraction
0Python1.0
1C++2.0
2Java5.0
3Go4.0
4NaN5.0
5SQL6.0
6PHP7.0
7Python10.0

提取 fraction 列中值大于3的行

1
df['fraction'] > 3
0    False
1    False
2     True
3     True
4     True
5     True
6     True
7     True
Name: fraction, dtype: bool
1
df[df['fraction'] > 3]
grammerfraction
2Java5.0
3Go4.0
4NaN5.0
5SQL6.0
6PHP7.0
7Python10.0

按照 fraction 列进行去重复值操作

1
df.drop_duplicates(['fraction'])  # 自上而下
grammerfraction
0Python1.0
1C++2.0
2Java5.0
3Go4.0
5SQL6.0
6PHP7.0
7Python10.0

计算 fraction 列的平均值

1
df['fraction'].mean()
5.0

将 grammer 列转换为list

1
df['grammer'].to_list()
['Python', 'C++', 'Java', 'Go', nan, 'SQL', 'PHP', 'Python']

将 DataFrame 保存为csv

1
df.to_csv('data.csv')

查看数据行列数

1
df.shape
(8, 2)

提取 fraction 列 值大于 2 且 小于 7 的行

1
df[(df['fraction'] > 2) & (df['fraction'] < 7)]
grammerfraction
2Java5.0
3Go4.0
4NaN5.0
5SQL6.0

提取 fraction 列最大值所在的行

1
df['fraction'].max()
10.0
1
df[df['fraction'] == df['fraction'].max()]
grammerfraction
7Python10.0

查看最后5行

1
df.tail()   # tail(x)   后 x 行
grammerfraction
3Go4.0
4NaN5.0
5SQL6.0
6PHP7.0
7Python10.0

查看前五行

1
df.head()  # head(x)   前 x 行   
grammerfraction
0Python1.0
1C++2.0
2Java5.0
3Go4.0
4NaN5.0

删除最后一行

1
2
# method 1
df.drop([len(df)-1])
grammerfraction
0Python1.0
1C++2.0
2Java5.0
3Go4.0
4NaN5.0
5SQL6.0
6PHP7.0
1
2
# method 2
df.drop(df.shape[0]-1)
grammerfraction
0Python1.0
1C++2.0
2Java5.0
3Go4.0
4NaN5.0
5SQL6.0
6PHP7.0
1
2
# method 3
df.drop(df.index[-1])
grammerfraction
0Python1.0
1C++2.0
2Java5.0
3Go4.0
4NaN5.0
5SQL6.0
6PHP7.0

添加一行数据 [‘ASP’,8.28]

1
s = pd.Series({'grammer':'ASP','fraction':8.28})
1
2
3
# DateFrame.append()方法已经被弃用了,所以用 pandas.concat方法
df = pd.concat([df, s.to_frame().T], ignore_index=True)
df
grammerfraction
0Python1.0
1C++2.0
2Java5.0
3Go4.0
4NaN5.0
5SQL6.0
6PHP7.0
7Python10.0
8ASP8.28

对数据进行排序,按照"fraction"列值的大小

1
2
3
4
df.sort_values("fraction")  
# 相关参数
# ascending(默认True) 升序:True 降序: False
# ignore_index(默认False) True: 重新索引
grammerfraction
0Python1.0
1C++2.0
3Go4.0
2Java5.0
4NaN5.0
5SQL6.0
6PHP7.0
8ASP8.28
7Python10.0

统计 grammer 列每个字符串的长度

1
df['grammer'] = df['grammer'].fillna('Lisp')
1
2
df['len_str'] = df['grammer'].apply(lambda x: len(x))
df
grammerfractionlen_str
0Python1.06
1C++2.03
2Java5.04
3Go4.02
4Lisp5.04
5SQL6.03
6PHP7.03
7Python10.06
8ASP8.283