pandas处理丢失数据:

import pandas as pd
import numpy as np

dates = pd.date_range('20190529', periods=4)
df = pd.DataFrame(np.arange(16).reshape((4, 4)), index=dates, columns=['A', 'B', 'C', 'D'])

df.iloc[0, 1] = np.NAN
df.iloc[1, 2] = np.NAN
print(df)
"""
                A     B     C   D
2019-05-29   0   NaN   2.0   3
2019-05-30   4   5.0   NaN   7
2019-05-31   8   9.0  10.0  11
2019-06-01  12  13.0  14.0  15
"""

# ===丢掉===
print(df.dropna(axis=0, how='any'))  # how={'any', 'all'}
"""
                A     B     C   D
2019-05-31   8   9.0  10.0  11
2019-06-01  12  13.0  14.0  15
"""

print(df.dropna(axis=1, how='any'))
"""
                A   D
2019-05-29   0   3
2019-05-30   4   7
2019-05-31   8  11
2019-06-01  12  15
"""

# ===填上===
print(df.fillna(value=0))
"""
                 A     B     C   D
2019-05-29   0   0.0   2.0   3
2019-05-30   4   5.0   0.0   7
2019-05-31   8   9.0  10.0  11
2019-06-01  12  13.0  14.0  15
"""

# 检查
print(np.any(df.isnull()) == True)  # True
最后修改:2019 年 05 月 29 日 02 : 10 PM
如果觉得我的文章对你有用,请随意赞赏