使用python計算vintage


from hinnc,添加了后面的

if __name__ == '__main__'
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 14 18:57:19 2019

@author: hinnc

"""


import numpy as np
import pandas as pd
#from pandas.tseries.offsets import DateOffset
from datetime import timedelta, datetime


def vintageCreation(contract, sjhk, release_ym = '放款月份', dpd_m = 30, 
                    contract_key = '申請編號', sjhk_key = '單號', repay_dt = '賬單日',
                    act_repay_dt = '結清日期', sjhk_need_amt = '實還本息', 
                    contract_need_amt = '本息和', start_dt = '2015-04-01',
                    end_dt = '2019-01-01', file_name = 'Vintage'):
    '''
    contract: DataFrame, 合同信息表, 需包含release_ym, contract_key和contract_need_amt              
    sjhk: DataFrame, 實際還款表, 需包含 sjhk_key, repay_dt, act_repay_dt和sjhk_need_amt
    release_ym: str, 放款年月
    dpd_m: int, 逾期定義的分界點, 函數按大於此天數計算逾期情況
    contract_key: str, contract表的唯一標示,需要與sjhk的sjhk_key匹配
    sjhk_key: str, 還款計划表的唯一標示,需要與contract表的contract_key匹配
    repay_dt: str, 應還款日期
    act_repay_dt: str, 實際還款日期
    sjhk_need_amt: str, 實際還款金額
    contract_need_amt: str, 合同中應還款金額
    start_dt: str, Vintage表格中列的起始時間
    end_dt: str, Vintage表格中列的結束時間
    '''
    # 生成列表頭,即觀察時點,'pd.date_range' function set the 'freq' (frequency) to 'M' (month end frequency)
    obs_list = [str(i.date()) for i in (pd.date_range(start = start_dt, 
                                                      end= end_dt, 
                                                      freq = 'M')).tolist()]
        
    # 預留 Vintage金額和合同數的 DataFrame
    vintage = pd.DataFrame(columns = obs_list)    
    vintage_prin = pd.DataFrame(columns = obs_list)
    vintage_n = pd.DataFrame(columns = obs_list)    
    vintage_num = pd.DataFrame(columns = obs_list)
            
    for i in sorted(contract[release_ym].unique()):
        tmp = pd.DataFrame(columns = obs_list)
        tmp_num = pd.DataFrame(columns = obs_list)
    
        df_sjhk = sjhk.loc[sjhk[sjhk_key].isin(contract.loc[contract[release_ym] == i, contract_key]), :]
        
        #每一個觀察時點分別計算
        for j in tmp.columns.tolist():             
            df_sjhk_tmp = df_sjhk.loc[df_sjhk[repay_dt] < (pd.to_datetime(j) + timedelta(days = 1)), 
                                      [sjhk_key, repay_dt, act_repay_dt, sjhk_need_amt]]
            
            if len(df_sjhk_tmp) == 0:
                tmp[j] = [0]
                tmp_num[j] = [0]
            
            else:
                #當前觀察時點逾期天數
                df_sjhk_tmp.loc[pd.notnull(df_sjhk_tmp[act_repay_dt]) & 
                                (df_sjhk_tmp[act_repay_dt] < (pd.to_datetime(j) + timedelta(days = 1))), 'dpd'] = 0
                                   
                df_sjhk_tmp.loc[pd.isnull(df_sjhk_tmp[act_repay_dt]) | 
                                (df_sjhk_tmp[act_repay_dt] >= (pd.to_datetime(j) + timedelta(days = 1))), 'dpd'] = (pd.to_datetime(j) - df_sjhk_tmp.loc[pd.isnull(df_sjhk_tmp[act_repay_dt]) | (df_sjhk_tmp[act_repay_dt] >= (pd.to_datetime(j) + timedelta(days = 1))), repay_dt]).dt.days                          
                
                current = df_sjhk_tmp.groupby(sjhk_key)[['dpd']].max()
                current.reset_index(inplace = True)
                current_m = current.loc[current['dpd'] > dpd_m, :]
        
                #當前逾期金額 = 總金額 - 已還金額  
                tmp[j] = [(contract.loc[contract[contract_key].isin(current_m[sjhk_key]), contract_need_amt].sum() - 
                           df_sjhk_tmp.loc[(df_sjhk_tmp[sjhk_key].isin(current_m[sjhk_key])) & 
                                   (df_sjhk_tmp['dpd'] == 0), sjhk_need_amt].sum())]
                
                #當前逾期合同數
                tmp_num[j] = [len(current_m)]
        
        # Vintage金額比例的分子/分母(逾期本金 or 逾期本息/放款本金 or 放款本息)
        vintage = pd.concat([vintage, tmp])        
        prin_tmp = np.array([contract.loc[contract[release_ym] == i, contract_need_amt].sum()] * vintage_prin.shape[1]).reshape((1, vintage_prin.shape[1]))    
        prin_df = pd.DataFrame(prin_tmp, columns = obs_list)            
        vintage_prin = pd.concat([vintage_prin, prin_df])
        
        # Vintage合同數比例的分子/分母(逾期合同數/放款合同數)
        vintage_n = pd.concat([vintage_n, tmp_num])        
        num_tmp = np.array([len(contract.loc[contract[release_ym] == i, :])] * vintage_num.shape[1]).reshape((1, vintage_num.shape[1]))    
        num_df = pd.DataFrame(num_tmp, columns = obs_list)            
        vintage_num = pd.concat([vintage_num, num_df])
                       
    vintage.set_index(keys = pd.Series(sorted(contract['放款月份'].unique())).map(lambda x: str(x)), inplace = True)
    vintage_prin.set_index(keys = pd.Series(sorted(contract['放款月份'].unique())).map(lambda x: str(x)), inplace = True)
    vintage_pct = vintage/vintage_prin
    
    vintage_n.set_index(keys = pd.Series(sorted(contract['放款月份'].unique())).map(lambda x: str(x)), inplace = True)
    vintage_num.set_index(keys = pd.Series(sorted(contract['放款月份'].unique())).map(lambda x: str(x)), inplace = True)
    vintage_n_pct = vintage_n/vintage_num
    
    #輸出結果    
    writer = pd.ExcelWriter(('{}_{}.xlsx'.format(file_name, datetime.now().strftime('%Y%m%d'))))
    vintage_pct.to_excel(writer, sheet_name ='vintage_金額比例', index = True)
    vintage.to_excel(writer, sheet_name ='vintage_逾期金額', index = True)
    vintage_prin.to_excel(writer, sheet_name ='vintage_放款金額', index = True)
    vintage_n_pct.to_excel(writer, sheet_name ='vintage_數量比例', index = True)
    vintage_n.to_excel(writer, sheet_name ='vintage_逾期合同數', index = True)
    vintage_num.to_excel(writer, sheet_name ='vintage_放款合同數', index = True)
    writer.save()


if __name__ == '__main__':
    contract=pd.read_excel('xxx\\contract.xlsx')
    sjhk=pd.read_excel('xxx\\shqk.xlsx')
    vintageCreation(contract, sjhk)

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM