详解 Pandas 的 reset_index 函数
Pandas 的 reset_index
函数主要分为 Series 对象的 reset_index()
和 DataFrame 对象的 reset_index()
,功能是用来重置索引的,二者的用法有所不同。
一、Series 的 reset_index()
1. 数据准备
import pandas as pdexaminations = pd.DataFrame({"student_id": [1, 1, 1, 2, 1, 1, 13, 13, 13, 2, 1],"subject_name": ["Math", "Physics", "Programming", "Programming", "Physics", "Math", "Math", "Programming","Physics", "Math", "Math"]
})
print(df)
student_id subject_name
0 1 Math
1 1 Physics
2 1 Programming
3 2 Programming
4 1 Physics
5 1 Math
6 13 Math
7 13 Programming
8 13 Physics
9 2 Math
10 1 Math
2. 方法声明
def reset_index(self,level: IndexLabel | None = None, # 复合索引时,指定删除哪一级,默认删除所有*,drop: bool = False, # 是否删除原索引列name: Level = lib.no_default, # 重命名 Series 数值列inplace: bool = False, # 是否在原对象上操作allow_duplicates: bool = False, # 是否允许重复) -> DataFrame | Series | None
3. 基本使用
# 1.将 DataFrame 聚合成 Series
res = examinations.groupby(by=["student_id", "subject_name"]).size()
print(res, type(res)) # MultiIndex(names=['student_id', 'subject_name'])
student_id subject_name
1 Math 3Physics 2Programming 1
2 Math 1Programming 1
13 Math 1Physics 1Programming 1
dtype: int64 <class 'pandas.core.series.Series'>
# 2.重置索引,使用默认参数
print(res.reset_index(), type(res.reset_index()))
student_id subject_name 0
0 1 Math 3
1 1 Physics 2
2 1 Programming 1
3 2 Math 1
4 2 Programming 1
5 13 Math 1
6 13 Physics 1
7 13 Programming 1<class 'pandas.core.frame.DataFrame'>
# 3.重置索引,并命名数值列
print(res.reset_index(name="attended_exams"))
student_id subject_name attended_exams
0 1 Math 3
1 1 Physics 2
2 1 Programming 1
3 2 Math 1
4 2 Programming 1
5 13 Math 1
6 13 Physics 1
7 13 Programming 1
# 4.重置索引,并删除原索引列
print(res.reset_index(drop=True), type(res.reset_index(drop=True)))
0 3
1 2
2 1
3 1
4 1
5 1
6 1
7 1
dtype: int64 <class 'pandas.core.series.Series'>
# 5.指定重置复合索引中的某一级索引
print(res.reset_index(level="student_id"))
student_id 0
subject_name
Math 1 3
Physics 1 2
Programming 1 1
Math 2 1
Programming 2 1
Math 13 1
Physics 13 1
Programming 13 1
二、DataFrame 的 reset_index()
1. 数据准备
import pandas as pddf = pd.DataFrame(data={"Jan": [1, 2, 3],"Feb": [4, 5, 6],"Mar": [7, 8, 9]
}, index=["a", "b", "c"])print(df)
Jan Feb Mar
a 1 4 7
b 2 5 8
c 3 6 9
2. 方法声明
def reset_index(self,level: IndexLabel | None = None, # 复合索引时,指定删除哪一级,默认删除所有*,drop: bool = False, # 是否删除原索引列inplace: bool = False, # 是否在原对象上操作col_level: Hashable = 0,col_fill: Hashable = "",allow_duplicates: bool | lib.NoDefault = lib.no_default,names: Hashable | Sequence[Hashable] | None = None, # 重命名原索引列,默认名为 Index) -> DataFrame | None
3. 基本使用
# 1.重置索引,使用默认参数
print(df.reset_index())
index Jan Feb Mar
0 a 1 4 7
1 b 2 5 8
2 c 3 6 9
# 2.重置索引,并删除原索引列
print(df.reset_index(drop=True))
Jan Feb Mar
0 1 4 7
1 2 5 8
2 3 6 9
# 3.重置索引,并重命名原索引列
print(df.reset_index(names="A"))
A Jan Feb Mar
0 a 1 4 7
1 b 2 5 8
2 c 3 6 9