打比賽時,遇到了一個問題。填充空白值的時候,如果使用 固定值,均值啥的都沒問題。
但是我想用
.fillna(method='pad',axis=0,inplace=True)
但是每次都是報錯
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-191-1252788aaf79> in <module>
----> 1 sh_car1.fillna(method='pad',axis=0,inplace=True)
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in fillna(self, value, method, axis, inplace, limit, downcast, **kwargs)
4242 limit=limit,
4243 downcast=downcast,
-> 4244 **kwargs
4245 )
4246
C:\Anaconda3\lib\site-packages\pandas\core\generic.py in fillna(self, value, method, axis, inplace, limit, downcast)
6235 inplace=inplace,
6236 coerce=True,
-> 6237 downcast=downcast,
6238 )
6239 else:
C:\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in interpolate(self, **kwargs)
567
568 def interpolate(self, **kwargs):
--> 569 return self.apply("interpolate", **kwargs)
570
571 def shift(self, **kwargs):
C:\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
436 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
437
--> 438 applied = getattr(b, f)(**kwargs)
439 result_blocks = _extend_blocks(applied, result_blocks)
440
C:\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in interpolate(self, method, axis, index, values, inplace, limit, limit_direction, limit_area, fill_value, coerce, downcast, **kwargs)
1172 fill_value=fill_value,
1173 coerce=coerce,
-> 1174 downcast=downcast,
1175 )
1176 # validate the interp method
C:\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in _interpolate_with_fill(self, method, axis, inplace, limit, fill_value, coerce, downcast)
1226 limit=limit,
1227 fill_value=fill_value,
-> 1228 dtype=self.dtype,
1229 )
1230 values = self._try_coerce_result(values)
C:\Anaconda3\lib\site-packages\pandas\core\missing.py in interpolate_2d(values, method, axis, limit, fill_value, dtype)
481 method = clean_fill_method(method)
482 if method == "pad":
--> 483 values = transf(pad_2d(transf(values), limit=limit, mask=mask, dtype=dtype))
484 else:
485 values = transf(
C:\Anaconda3\lib\site-packages\pandas\core\missing.py in pad_2d(values, limit, mask, dtype)
546
547 if np.all(values.shape):
--> 548 algos.pad_2d_inplace(values, mask, limit=limit)
549 else:
550 # for test coverage
pandas\_libs\algos.pyx in pandas._libs.algos.__pyx_fused_cpdef()
TypeError: No matching signature found
經過千辛萬苦終於找到了問題的根源。
原來,我在加載數據的時候使用了一個 壓縮內存的函數
# 減少內存使用
def reduce_mem_usage(df, verbose=True):
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
start_mem = df.memory_usage().sum() / 1024 ** 2
for col in df.columns:
col_type = df[col].dtypes
if col_type in numerics:
c_min = df[col].min()
c_max = df[col].max()
if str(col_type)[:3] == 'int':
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
df[col] = df[col].astype(np.int8)
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
df[col] = df[col].astype(np.int16)
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
df[col] = df[col].astype(np.int32)
elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
df[col] = df[col].astype(np.int64)
else:
print('column name :',col)
if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
df[col] = df[col].astype(np.float16)
elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
df[col] = df[col].astype(np.float32)
else:
df[col] = df[col].astype(np.float64)
end_mem = df.memory_usage().sum() / 1024 ** 2
if verbose:
print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
return df
這里面產生了一種新的數據類型 np.float16
而這種類型,在pandas里是沒有的。
pandas里面只有的float類型。
np.float32類型 都沒有問題。
所以在填充的時候就會報錯。