支持的 pandas API¶
下表显示了 Spark 上的 Pandas API 中已实现或未实现的 pandas API。 一些 pandas API 没有实现全部参数,因此第三列显示了每个 API 缺少的参数。
第二列中的“Y”表示已实现,包括其所有参数。
“N”表示尚未实现。
“P”表示已部分实现,缺少某些参数。
以下列表中的所有 API 都使用分布式执行来计算数据,除非那些设计上需要本地执行的 API。 例如,DataFrame.to_numpy() 需要将数据收集到驱动程序端。
如果您需要任何尚未实现的 pandas API 或参数,您可以创建一个 Apache Spark JIRA 来请求或自行贡献。
API 列表基于 最新的 pandas 官方 API 参考进行更新。
CategoricalIndex API¶
API |
已实现 |
缺少参数 |
---|---|---|
Y |
||
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
P |
|
argsort |
N |
|
Y |
||
Y |
||
|
Y |
|
asof_locs |
N |
|
|
P |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
Y |
|
|
Y |
|
|
Y |
|
duplicated |
N |
|
|
Y |
|
|
P |
|
|
P |
|
format |
N |
|
get_indexer |
N |
|
get_indexer_for |
N |
|
get_indexer_non_unique |
N |
|
|
Y |
|
get_loc |
N |
|
get_slice_bound |
N |
|
groupby |
N |
|
|
Y |
|
|
Y |
|
infer_objects |
N |
|
|
Y |
|
|
P |
|
is_ |
N |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
Y |
|
|
Y |
|
|
Y |
|
join |
N |
|
Y |
||
|
Y |
|
memory_usage |
N |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
putmask |
N |
|
ravel |
N |
|
reindex |
N |
|
Y |
||
Y |
||
|
Y |
|
Y |
||
Y |
||
|
P |
|
searchsorted |
N |
|
Y |
||
|
Y |
|
|
P |
|
slice_indexer |
N |
|
slice_locs |
N |
|
|
Y |
|
|
P |
|
sortlevel |
N |
|
|
Y |
|
|
P |
|
to_flat_index |
N |
|
|
Y |
|
|
Y |
|
|
P |
|
|
P |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
where |
N |
DataFrame API¶
API |
已实现 |
缺少参数 |
---|---|---|
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
asfreq |
N |
|
asof |
N |
|
Y |
||
P |
|
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
combine |
N |
|
Y |
||
compare |
N |
|
convert_dtypes |
N |
|
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
||
P |
|
|
|
P |
|
Y |
||
P |
|
|
Y |
||
Y |
||
P |
|
|
Y |
||
P |
|
|
Y |
||
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
Y |
||
Y |
||
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
infer_objects |
N |
|
P |
|
|
Y |
||
P |
|
|
isetitem |
N |
|
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
P |
|
|
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
P |
|
|
P |
|
|
P |
|
|
Y |
||
Y |
||
Y |
||
P |
|
|
memory_usage |
N |
|
P |
|
|
Y |
||
P |
|
|
Y |
||
P |
|
|
|
P |
|
P |
|
|
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
P |
|
|
P |
|
|
Y |
||
Y |
||
P |
|
|
Y |
||
P |
|
|
Y |
||
Y |
||
P |
|
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
reorder_levels |
N |
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
||
Y |
||
set_axis |
N |
|
set_flags |
N |
|
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
|
Y |
||
P |
|
|
|
P |
|
Y |
||
P |
|
|
Y |
||
Y |
||
Y |
||
Y |
||
P |
|
|
P |
|
|
P |
|
|
to_feather |
N |
|
to_gbq |
N |
|
to_hdf |
N |
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
to_period |
N |
|
to_pickle |
N |
|
Y |
||
to_sql |
N |
|
to_stata |
N |
|
P |
|
|
to_timestamp |
N |
|
to_xarray |
N |
|
to_xml |
N |
|
Y |
||
P |
|
|
P |
|
|
Y |
||
tz_convert |
N |
|
tz_localize |
N |
|
P |
|
|
P |
|
|
value_counts |
N |
|
P |
|
|
P |
|
|
P |
|
DatetimeIndex API¶
API |
已实现 |
缺少参数 |
---|---|---|
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
P |
|
argsort |
N |
|
as_unit |
N |
|
|
Y |
|
asof_locs |
N |
|
|
P |
|
Y |
||
|
Y |
|
Y |
||
|
Y |
|
|
Y |
|
|
P |
|
|
Y |
|
|
Y |
|
|
Y |
|
duplicated |
N |
|
|
Y |
|
|
P |
|
|
P |
|
Y |
||
format |
N |
|
get_indexer |
N |
|
get_indexer_for |
N |
|
get_indexer_non_unique |
N |
|
|
Y |
|
get_loc |
N |
|
get_slice_bound |
N |
|
groupby |
N |
|
|
Y |
|
|
Y |
|
Y |
||
Y |
||
infer_objects |
N |
|
|
Y |
|
|
P |
|
is_ |
N |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
Y |
|
|
Y |
|
isocalendar |
N |
|
|
Y |
|
join |
N |
|
|
Y |
|
|
P |
|
mean |
N |
|
memory_usage |
N |
|
|
P |
|
Y |
||
Y |
||
|
Y |
|
|
Y |
|
|
Y |
|
putmask |
N |
|
ravel |
N |
|
reindex |
N |
|
|
Y |
|
|
P |
|
Y |
||
searchsorted |
N |
|
|
Y |
|
|
P |
|
slice_indexer |
N |
|
slice_locs |
N |
|
snap |
N |
|
|
Y |
|
|
P |
|
sortlevel |
N |
|
std |
N |
|
Y |
||
|
Y |
|
|
P |
|
to_flat_index |
N |
|
|
Y |
|
to_julian_date |
N |
|
|
Y |
|
|
P |
|
to_period |
N |
|
to_pydatetime |
N |
|
|
P |
|
|
Y |
|
|
Y |
|
tz_convert |
N |
|
tz_localize |
N |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
where |
N |
Index API¶
API |
已实现 |
缺少参数 |
---|---|---|
Y |
||
Y |
||
Y |
||
P |
|
|
P |
|
|
argsort |
N |
|
Y |
||
asof_locs |
N |
|
P |
|
|
Y |
||
Y |
||
Y |
||
P |
|
|
Y |
||
Y |
||
Y |
||
duplicated |
N |
|
Y |
||
P |
|
|
P |
|
|
format |
N |
|
get_indexer |
N |
|
get_indexer_for |
N |
|
get_indexer_non_unique |
N |
|
|
Y |
|
get_loc |
N |
|
get_slice_bound |
N |
|
groupby |
N |
|
|
Y |
|
Y |
||
infer_objects |
N |
|
Y |
||
P |
|
|
is_ |
N |
|
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
P |
|
|
Y |
||
|
Y |
|
Y |
||
join |
N |
|
Y |
||
P |
|
|
memory_usage |
N |
|
P |
|
|
Y |
||
|
Y |
|
Y |
||
putmask |
N |
|
ravel |
N |
|
reindex |
N |
|
Y |
||
P |
|
|
searchsorted |
N |
|
Y |
||
P |
|
|
slice_indexer |
N |
|
slice_locs |
N |
|
|
Y |
|
P |
|
|
sortlevel |
N |
|
Y |
||
P |
|
|
to_flat_index |
N |
|
Y |
||
Y |
||
P |
|
|
P |
|
|
|
Y |
|
|
Y |
|
Y |
||
Y |
||
Y |
||
Y |
||
where |
N |
MultiIndex API¶
API |
已实现 |
缺少参数 |
---|---|---|
|
Y |
|
|
Y |
|
Y |
||
|
P |
|
|
P |
|
argsort |
N |
|
|
Y |
|
asof_locs |
N |
|
P |
|
|
P |
|
|
Y |
||
Y |
||
P |
|
|
|
Y |
|
Y |
||
Y |
||
duplicated |
N |
|
Y |
||
Y |
||
|
P |
|
P |
|
|
format |
N |
|
get_indexer |
N |
|
get_indexer_for |
N |
|
get_indexer_non_unique |
N |
|
|
Y |
|
get_loc |
N |
|
get_loc_level |
N |
|
get_locs |
N |
|
get_slice_bound |
N |
|
groupby |
N |
|
|
Y |
|
Y |
||
infer_objects |
N |
|
Y |
||
P |
|
|
is_ |
N |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
Y |
|
|
Y |
|
Y |
||
join |
N |
|
|
Y |
|
P |
|
|
memory_usage |
N |
|
P |
|
|
|
Y |
|
|
Y |
|
|
Y |
|
putmask |
N |
|
ravel |
N |
|
reindex |
N |
|
remove_unused_levels |
N |
|
P |
|
|
reorder_levels |
N |
|
P |
|
|
searchsorted |
N |
|
set_codes |
N |
|
set_levels |
N |
|
|
Y |
|
|
P |
|
slice_indexer |
N |
|
slice_locs |
N |
|
|
Y |
|
P |
|
|
sortlevel |
N |
|
Y |
||
Y |
||
P |
|
|
to_flat_index |
N |
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
|
Y |
|
|
Y |
|
truncate |
N |
|
Y |
||
Y |
||
Y |
||
Y |
||
where |
N |
Series API¶
API |
已实现 |
缺少参数 |
---|---|---|
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
||
Y |
||
P |
|
|
asfreq |
N |
|
P |
|
|
P |
|
|
Y |
||
Y |
||
P |
|
|
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
|
combine |
N |
|
Y |
||
P |
|
|
convert_dtypes |
N |
|
Y |
||
Y |
||
Y |
||
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
||
P |
|
|
|
P |
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
||
P |
|
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
|
P |
|
P |
|
|
Y |
||
Y |
||
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
infer_objects |
N |
|
info |
N |
|
P |
|
|
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
|
Y |
||
Y |
||
Y |
||
memory_usage |
N |
|
Y |
||
P |
|
|
Y |
||
P |
|
|
|
P |
|
P |
|
|
P |
|
|
Y |
||
Y |
||
P |
|
|
Y |
||
P |
|
|
P |
|
|
Y |
||
Y |
||
P |
|
|
Y |
||
Y |
||
P |
|
|
P |
|
|
P |
|
|
ravel |
N |
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
reorder_levels |
N |
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
||
set_axis |
N |
|
set_flags |
N |
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
Y |
||
Y |
||
P |
|
|
|
P |
|
Y |
||
P |
|
|
Y |
||
Y |
||
P |
|
|
Y |
||
P |
|
|
Y |
||
P |
|
|
Y |
||
to_hdf |
N |
|
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
to_period |
N |
|
to_pickle |
N |
|
to_sql |
N |
|
P |
|
|
to_timestamp |
N |
|
to_xarray |
N |
|
|
Y |
|
Y |
||
|
Y |
|
P |
|
|
Y |
||
tz_convert |
N |
|
tz_localize |
N |
|
Y |
||
P |
|
|
Y |
||
Y |
||
P |
|
|
view |
N |
|
P |
|
|
P |
|
TimedeltaIndex API¶
API |
已实现 |
缺少参数 |
---|---|---|
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
P |
|
argsort |
N |
|
as_unit |
N |
|
|
Y |
|
asof_locs |
N |
|
|
P |
|
ceil |
N |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
Y |
|
|
Y |
|
|
Y |
|
duplicated |
N |
|
|
Y |
|
|
P |
|
|
P |
|
floor |
N |
|
format |
N |
|
get_indexer |
N |
|
get_indexer_for |
N |
|
get_indexer_non_unique |
N |
|
|
Y |
|
get_loc |
N |
|
get_slice_bound |
N |
|
groupby |
N |
|
|
Y |
|
|
Y |
|
infer_objects |
N |
|
|
Y |
|
|
P |
|
is_ |
N |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
Y |
|
|
Y |
|
|
Y |
|
join |
N |
|
|
Y |
|
|
P |
|
mean |
N |
|
median |
N |
|
memory_usage |
N |
|
|
P |
|
|
Y |
|
|
Y |
|
|
Y |
|
putmask |
N |
|
ravel |
N |
|
reindex |
N |
|
|
Y |
|
|
P |
|
round |
N |
|
searchsorted |
N |
|
|
Y |
|
|
P |
|
slice_indexer |
N |
|
slice_locs |
N |
|
|
Y |
|
|
P |
|
sortlevel |
N |
|
std |
N |
|
sum |
N |
|
|
Y |
|
|
P |
|
to_flat_index |
N |
|
|
Y |
|
|
Y |
|
|
P |
|
to_pytimedelta |
N |
|
|
P |
|
|
Y |
|
total_seconds |
N |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
Y |
|
where |
N |
通用函数 API¶
API |
已实现 |
缺少参数 |
---|---|---|
array |
N |
|
bdate_range |
N |
|
P |
|
|
crosstab |
N |
|
cut |
N |
|
P |
|
|
eval |
N |
|
factorize |
N |
|
from_dummies |
N |
|
Y |
||
infer_freq |
N |
|
interval_range |
N |
|
Y |
||
Y |
||
json_normalize |
N |
|
lreshape |
N |
|
P |
|
|
P |
|
|
Y |
||
merge_ordered |
N |
|
Y |
||
Y |
||
period_range |
N |
|
pivot |
N |
|
pivot_table |
N |
|
qcut |
N |
|
P |
|
|
P |
|
|
P |
|
|
read_feather |
N |
|
read_fwf |
N |
|
read_gbq |
N |
|
read_hdf |
N |
|
P |
|
|
P |
|
|
P |
|
|
P |
|
|
read_pickle |
N |
|
read_sas |
N |
|
read_spss |
N |
|
P |
|
|
P |
|
|
P |
|
|
read_stata |
N |
|
P |
|
|
read_xml |
N |
|
set_eng_float_format |
N |
|
show_versions |
N |
|
test |
N |
|
P |
|
|
P |
|
|
P |
|
|
to_pickle |
N |
|
Y |
||
unique |
N |
|
value_counts |
N |
|
wide_to_long |
N |
扩展 API¶
API |
已实现 |
缺少参数 |
---|---|---|
agg |
N |
|
aggregate |
N |
|
apply |
N |
|
corr |
N |
|
P |
|
|
cov |
N |
|
|
P |
|
P |
|
|
P |
|
|
median |
N |
|
P |
|
|
P |
|
|
rank |
N |
|
sem |
N |
|
|
P |
|
|
P |
|
P |
|
|
|
P |
|
扩展 Groupby API¶
API |
已实现 |
缺少参数 |
---|---|---|
agg |
N |
|
aggregate |
N |
|
apply |
N |
|
corr |
N |
|
|
P |
|
cov |
N |
|
|
P |
|
|
P |
|
|
P |
|
median |
N |
|
|
P |
|
|
P |
|
rank |
N |
|
sem |
N |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
滚动 API¶
API |
已实现 |
缺少参数 |
---|---|---|
agg |
N |
|
aggregate |
N |
|
apply |
N |
|
corr |
N |
|
P |
|
|
cov |
N |
|
|
P |
|
P |
|
|
P |
|
|
median |
N |
|
P |
|
|
P |
|
|
rank |
N |
|
sem |
N |
|
|
P |
|
|
P |
|
P |
|
|
|
P |
|
滚动 Groupby API¶
API |
已实现 |
缺少参数 |
---|---|---|
agg |
N |
|
aggregate |
N |
|
apply |
N |
|
corr |
N |
|
|
P |
|
cov |
N |
|
|
P |
|
|
P |
|
|
P |
|
median |
N |
|
|
P |
|
|
P |
|
rank |
N |
|
sem |
N |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
窗口 API¶
API |
已实现 |
缺少参数 |
---|---|---|
agg |
N |
|
aggregate |
N |
|
mean |
N |
|
std |
N |
|
sum |
N |
|
var |
N |
DataFrameGroupBy API¶
API |
已实现 |
缺少参数 |
---|---|---|
P |
|
|
P |
|
|
|
Y |
|
|
P |
|
|
Y |
|
|
Y |
|
boxplot |
N |
|
corr |
N |
|
corrwith |
N |
|
|
Y |
|
cov |
N |
|
|
Y |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
P |
|
|
|
P |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
P |
|
|
Y |
|
|
P |
|
|
Y |
|
hist |
N |
|
|
P |
|
|
P |
|
|
Y |
|
|
P |
|
|
P |
|
|
Y |
|
|
P |
|
ngroup |
N |
|
|
Y |
|
ohlc |
N |
|
pct_change |
N |
|
pipe |
N |
|
|
Y |
|
|
P |
|
|
P |
|
resample |
N |
|
|
Y |
|
sample |
N |
|
|
P |
|
|
P |
|
|
Y |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
|
take |
N |
|
|
P |
|
value_counts |
N |
|
|
P |
|
GroupBy API¶
API |
已实现 |
缺少参数 |
---|---|---|
|
P |
|
|
P |
|
Y |
||
P |
|
|
Y |
||
Y |
||
Y |
||
Y |
||
P |
|
|
P |
|
|
P |
|
|
P |
|
|
describe |
N |
|
P |
|
|
Y |
||
|
Y |
|
Y |
||
Y |
||
P |
|
|
Y |
||
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
|
ngroup |
N |
|
ohlc |
N |
|
pct_change |
N |
|
pipe |
N |
|
Y |
||
P |
|
|
P |
|
|
resample |
N |
|
|
Y |
|
sample |
N |
|
P |
|
|
P |
|
|
Y |
||
P |
|
|
P |
|
|
Y |
||
P |
|
SeriesGroupBy API¶
API |
已实现 |
缺少参数 |
---|---|---|
|
P |
|
|
P |
|
|
Y |
|
|
P |
|
|
Y |
|
|
Y |
|
corr |
N |
|
|
Y |
|
cov |
N |
|
|
Y |
|
|
P |
|
|
P |
|
|
P |
|
|
P |
|
describe |
N |
|
|
P |
|
|
Y |
|
|
Y |
|
|
Y |
|
|
P |
|
|
P |
|
|
Y |
|
|
P |
|
|
Y |
|
hist |
N |
|
|
P |
|
|
P |
|
|
Y |
|
|
P |
|
|
P |
|
|
Y |
|
|
P |
|
ngroup |
N |
|
P |
|
|
P |
|
|
|
Y |
|
ohlc |
N |
|
pct_change |
N |
|
pipe |
N |
|
|
Y |
|
|
P |
|
|
P |
|
resample |
N |
|
|
Y |
|
sample |
N |
|
|
P |
|
|
P |
|
|
Y |
|
|
P |
|
|
P |
|
|
P |
|
|
Y |
|
take |
N |
|
|
P |
|
Y |
||
P |
|
|
|
P |
|