date_df["rank_num"] = date_df.groupby("issuer_id").report_date.agg("rank", **{"ascending": 1, "method": "min"}) File "D:\python_virtualenv\es_env\lib\site-packages\pandas\core\groupby\groupby.py", line 3479, in aggregate return getattr(self, func_or_funcs)(*args, **kwargs) File "D:\python_virtualenv\es_env\lib\site-packages\pandas\core\groupby\groupby.py", line 1906, in rank na_option=na_option, pct=pct, axis=axis) File "D:\python_virtualenv\es_env\lib\site-packages\pandas\core\groupby\groupby.py", line 1025, in _cython_transform **kwargs) File "D:\python_virtualenv\es_env\lib\site-packages\pandas\core\groupby\groupby.py", line 2630, in transform return self._cython_operation('transform', values, how, axis, **kwargs) File "D:\python_virtualenv\es_env\lib\site-packages\pandas\core\groupby\groupby.py", line 2590, in _cython_operation **kwargs) File "D:\python_virtualenv\es_env\lib\site-packages\pandas\core\groupby\groupby.py", line 2664, in _transform transform_func(result, values, comp_ids, is_datetimelike, **kwargs) File "D:\python_virtualenv\es_env\lib\site-packages\pandas\core\groupby\groupby.py", line 2479, in wrapper return f(afunc, *args, **kwargs) File "D:\python_virtualenv\es_env\lib\site-packages\pandas\core\groupby\groupby.py", line 2430, in <lambda> kwargs.get('na_option', 'keep') TypeError: 'NoneType' object is not callable
在使用pandas對一列日期進行分組排序時報錯,
1. 根據錯誤提示 File "D:\python_virtualenv\es_env\lib\site-packages\pandas\core\groupby\groupby.py", line 2430, in <lambda> kwargs.get('na_option', 'keep') 可知,是因為pandas模塊的groupby.py文件的下面代碼中func函數傳入為None導致的。
'f': lambda func, a, b, c, d, **kwargs: func( a, b, c, d, kwargs.get('ties_method', 'average'), kwargs.get('ascending', True), kwargs.get('pct', False), kwargs.get('na_option', 'keep') )
2. 根據錯誤提示
File "D:\python_virtualenv\es_env\lib\site-packages\pandas\core\groupby\groupby.py", line 2478, in wrapper return f(afunc, *args, **kwargs)
可知afunc就是傳入的函數,這個afunc是使用get_func函數一步步獲取的,最終是看_libs\groupby.py文件下是否存在一個group_rank_object函數,但是文件中沒有,所以獲得的是None。
def _get_cython_function(self, kind, how, values, is_numeric): # 這一步查看values中的數據類型,date無法識別,datetime識別為int dtype_str = values.dtype.name def get_func(fname): # see if there is a fused-type version of function # only valid for numeric # 這一步看libgroupby中是不是有fname對應的函數 f = getattr(libgroupby, fname, None) if f is not None and is_numeric: return f # otherwise find dtype-specific version, falling back to object # 再看是不是有group_rank_object函數,因為沒有,所以最后返回的結果是None for dt in [dtype_str, 'object']: f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None) if f is not None: return f ftype = self._cython_functions[kind][how] if isinstance(ftype, dict): # 這一步獲取傳入的函數afunc func = afunc = get_func(ftype['name']) # a sub-function f = ftype.get('f') if f is not None: def wrapper(*args, **kwargs): return f(afunc, *args, **kwargs) # need to curry our sub-function func = wrapper
3.結論
(1).0.23.4的pandas沒有對object的排序方式,只存在針對int和float的排序方式。
(2).0.23.4的pandas無法識別date類型,是作為object類型。但是可以識別datetime類型,會把datetime類型識別為int來處理。
(3).所以要對日期列進行排序,需要先轉換成時間才行。
0.23版本的pandas存在這個問題,但是0.22版本沒有這個問題。