利用kettle中的JS來完成ETL數據校驗


最近參與了一個信托行業的BI項目,由於信托業務系統設計的問題,很多都是用戶手工錄入的數據,也有一些是需要分析的但是用戶沒有錄入的數據,針對這樣的數據質量,我們就要在ETL抽取的過程中來對數據流進行校驗,今天我們就說一下如何利用ETL開源工具kettle來完成對數據的基礎性校驗

1:非空校驗

if(INTRUST_FLAG1==null){intrust_flag1_isnull=1;}

2:唯一校驗

var uniquesql_item_id = "SELECT count(*)  FROM titem where item_id="+ITEM_ID;

if(fireToDB(strConn,uniquesql_item_id)[0[0]==1){item_id_isunique=1;}

3:標准化校驗

var normalsql_intrust_type= "select  count(*) from  trust_tdictparam where type_id=1104 and type_value="+INTRUST_TYPE;
if(fireToDB(strConn, normalsql_intrust_type)[0][0]>0){intrust_type_isnormal=1;}

完整的JS處理邏輯展示如下

var check_status=0;//如果檢測到有任何一種校驗錯誤,則check_status=1
var check_table="T_ITEM_TITEM";//校驗有關的表名字,可能存在多個
var source_table="titem";//數據源表名稱,如果數據來自多個數據表,則需要聲明多個
var source_table_id="item_id";//數據源表主鍵,如果多個表聯合主鍵,則需要聲明多個主鍵
var check_detail="";//校驗到的錯誤詳細情況
var check_name="";//主鍵名稱
var check_type="";//校驗到的錯誤類型
var check_date=new Date();//校驗時間
var strConn = "bjitic_og";

//////////////////////////////////定義所有校驗枚舉值////////////////////
//1:唯一性枚舉值
var item_id_isunique=0;
var item_code_isunique=0;
var item_name_isunique=0;
//2:非空枚舉值
var intrust_flag1_isnull=0;
var intrust_type_isnull=0;
var intrust_type_subitem_isnull=0;
var intrust_type1_isnull=0;
var intrust_type2_isnull=0;
var item_num_isnull=0;
var item_money_isnull=0;
var exp_rate1_isnull=0;
var exp_rate2_isnull=0;
var intrust_flag3_isnull=0;
var intrust_flag4_isnull=0;
var entity_type_isnull=0;
var deal_type_isnull=0;
var managertype_isnull=0;
var natrust_type_isnull=0;
var custodian_bank_isnull=0;
var with_bank_flag_isnull=0;
var with_security_flag_isnull=0;
var with_private_flag_isnull=0;
var with_gov_flag_isnull=0;
var ben_period_isnull=0;  

//3:標准化枚舉值
var intrust_type_isnormal=0;
var intrust_type_subitem_isnormal=0;
var intrust_type1_isnormal=0;
var intrust_type2_isnormal=0;
var entity_type_isnormal=0;
var deal_type_isnormal=0;
var natrust_type_isnormal=0;
var custodian_bank_isnormal=0;
var innovat_type_isnormal=0;
var bank_id_isnormal=0;
var gov_regional_isnormal=0;
//4:類型校驗枚舉值
var ben_period_isnumber=0;  
////////////////////////////////唯一性校驗枚舉值賦值
var uniquesql_item_id = "SELECT count(1)  FROM titem where item_id="+ITEM_ID;
var uniquesql_item_code = "SELECT count(1)  FROM titem where item_code="+ITEM_CODE;
var uniquesql_item_name = "SELECT count(1)  FROM titem where item_name="+"'"+ITEM_NAME+"'";
if(fireToDB(strConn,uniquesql_item_id)[0][0]==1){item_id_isunique=1;}
if(fireToDB(strConn,uniquesql_item_code)[0][0]==1){item_code_isunique=1;}
if(fireToDB(strConn,uniquesql_item_name)[0][0]==1){item_name_isunique=1;}
////////////////////////////////非空校驗枚舉值賦值
if(INTRUST_FLAG1==null){intrust_flag1_isnull=1;}
if(INTRUST_TYPE==null){intrust_type_isnull=1;}
if(INTRUST_TYPE_SUBITEM==null){intrust_type_subitem_isnull=1;}
if(INTRUST_TYPE1==null){intrust_type1_isnull=1;}
if(INTRUST_TYPE2==null){intrust_type2_isnull=1;}
if(ITEM_NUM==null){item_num_isnull=1;}
if(ITEM_MONEY==null){item_money_isnull=1;}
if(EXP_RATE1==null){exp_rate1_isnull=1;}
if(EXP_RATE2==null){exp_rate2_isnull=1;}
if(INTRUST_FLAG3==null){intrust_flag3_isnull=1;}
if(INTRUST_FLAG4==null){intrust_flag4_isnull=1;}
if(ENTITY_TYPE==null){entity_type_isnull=1;}
if(DEAL_TYPE==null){deal_type_isnull=1;}
if(MANAGERTYPE==null){managertype_isnull=1;}
if(NATRUST_TYPE==null){natrust_type_isnull=1;}
if(CUSTODIAN_BANK==null){custodian_bank_isnull=1;}
if(WITH_BANK_FLAG==null){with_bank_flag_isnull=1;}
if(WITH_SECURITY_FLAG==null){with_security_flag_isnull=1;}
if(WITH_PRIVATE_FLAG==null){with_private_flag_isnull=1;}
if(WITH_GOV_FLAG==null){with_gov_flag_isnull=1;}
if(BEN_PERIOD==null){ben_period_isnull=1;}
////////////////////////////////標准化校驗枚舉值賦值
var normalsql_intrust_type= "select  count(1) from  trust_tdictparam where type_id=1104 and type_value="+INTRUST_TYPE;
var normalsql_intrust_type_subitem= "select  count(1) from  trust_tdictparam where type_id=1151 and type_value="+INTRUST_TYPE_SUBITEM;
var normalsql_intrust_type1= "select  count(1) from  trust_tdictparam where type_id=1138 and type_value="+INTRUST_TYPE1;
var normalsql_intrust_type2= "select  count(1) from  trust_tdictparam where type_id=1139 and type_value="+INTRUST_TYPE2;
var normalsql_entity_type= "select  count(1) from  trust_tdictparam where type_id=1140 and type_value="+ENTITY_TYPE;
var normalsql_deal_type= "select  count(1) from  trust_tdictparam where type_id=1141 and type_value="+DEAL_TYPE;
var normalsql_natrust_type= "select  count(1) from  trust_tdictparam where type_id=1143 and type_value="+NATRUST_TYPE;
var normalsql_custodian_bank= "select  count(1) from  trust_tdictparam where type_id=1103 and type_value="+"'"+CUSTODIAN_BANK+"'";;
var normalsql_innovat_type= "select  count(1) from  trust_tdictparam where type_id=1104 and type_value="+INNOVAT_TYPE;
var normalsql_gov_regional= "select  count(1) from  trust_tdictparam where type_id=9999 and type_value="+GOV_PROV_REGIONAL;
if(fireToDB(strConn, normalsql_intrust_type)[0][0]>0){intrust_type_isnormal=1;}
if(fireToDB(strConn, normalsql_intrust_type_subitem)[0][0]>0){intrust_type_subitem_isnormal=1;}
if(fireToDB(strConn, normalsql_intrust_type1)[0][0]>0){intrust_type1_isnormal=1;}
if(fireToDB(strConn, normalsql_intrust_type2)[0][0]>0){intrust_type2_isnormal=1;}
if(fireToDB(strConn, normalsql_entity_type)[0][0]>0){entity_type_isnormal=1;}
if(fireToDB(strConn, normalsql_deal_type)[0][0]>0){deal_type_isnormal=1;}
if(fireToDB(strConn, normalsql_natrust_type)[0][0]>0){natrust_type_isnormal=1;}
if(fireToDB(strConn, normalsql_custodian_bank)[0][0]>0){custodian_bank_isnormal=1;}
if(fireToDB(strConn, normalsql_innovat_type)[0][0]>0){innovat_type_isnormal=1;}
if(fireToDB(strConn, normalsql_gov_regional)[0][0]>0){gov_regional_isnormal=1;}
////////////////////////////////數據類型校驗枚舉值賦值
if(isNum(BEN_PERIOD))
{
ben_period_isnumber=1; 
}
//校驗所有表需要校驗的字段,如果有一個校驗失敗,則校驗狀態為1
if
(
item_id_isunique==0 || item_code_isunique==0 ||item_name_isunique==0|| //is unique
intrust_flag1_isnull==1 || intrust_type_isnull==1 || intrust_type_subitem_isnull==1 ||//is null
intrust_type1_isnull==1 || intrust_type2_isnull==1 || item_num_isnull==1 ||
item_money_isnull==1 || exp_rate1_isnull==1 || exp_rate2_isnull==1 || 
intrust_flag3_isnull==1 || intrust_flag4_isnull==1 || entity_type_isnull==1 || 
deal_type_isnull==1 || managertype_isnull==1 || natrust_type_isnull==1 || 
custodian_bank_isnull==1 || with_bank_flag_isnull==1 || with_security_flag_isnull==1 || 
with_private_flag_isnull==1 || with_gov_flag_isnull==1 || ben_period_isnull==1 ||
intrust_type_isnormal==0 || intrust_type_subitem_isnormal==0 || intrust_type1_isnormal==0 ||//is normal
intrust_type2_isnormal==0 || entity_type_isnormal==0 || deal_type_isnormal==0 ||
natrust_type_isnormal==0 || custodian_bank_isnormal==0 || innovat_type_isnormal==0 ||
bank_id_isnormal==0 || gov_regional_isnormal==0 ||
ben_period_isnumber==0//is number
)
{
check_status=1;
}
//check is unique? return not unique column
var isunique_column="";
if(item_id_isunique==0){isunique_column="item_id";}
if(item_code_isunique==0){if(isunique_column==""){isunique_column="item_code";}else{isunique_column=isunique_column+"、"+"item_code";}}
if(item_name_isunique==0){if(isunique_column==""){isunique_column="item_name";}else{isunique_column=isunique_column+"、"+"item_name";}}
//check is null? return null column
var isnull_column="";
if(intrust_flag1_isnull==1){isnull_column="intrust_flag1";}
if(intrust_type_isnull==1){if(isnull_column==""){isnull_column="intrust_type";}else{isnull_column=isnull_column+"、"+"intrust_type";}}
if(intrust_type_subitem_isnull==1){if(isnull_column==""){isnull_column="intrust_type_subitem";}else{isnull_column=isnull_column+"、"+"intrust_type_subitem";}}
if(intrust_type1_isnull==1){if(isnull_column==""){isnull_column="intrust_type1";}else{isnull_column=isnull_column+"、"+"intrust_type1";}}
if(intrust_type2_isnull==1){if(isnull_column==""){isnull_column="intrust_type2";}else{isnull_column=isnull_column+"、"+"intrust_type2";}}
if(item_num_isnull==1){if(isnull_column==""){isnull_column="item_num";}else{isnull_column=isnull_column+"、"+"item_num";}}
if(item_money_isnull==1){if(isnull_column==""){isnull_column="item_money";}else{isnull_column=isnull_column+"、"+"item_money";}}
if(exp_rate1_isnull==1){if(isnull_column==""){isnull_column="exp_rate1";}else{isnull_column=isnull_column+"、"+"exp_rate1";}}
if(exp_rate2_isnull==1){if(isnull_column==""){isnull_column="exp_rate2";}else{isnull_column=isnull_column+"、"+"exp_rate2";}}
if(intrust_flag3_isnull==1){if(isnull_column==""){isnull_column="intrust_flag3";}else{isnull_column=isnull_column+"、"+"intrust_flag3";}}
if(intrust_flag4_isnull==1){if(isnull_column==""){isnull_column="intrust_flag4";}else{isnull_column=isnull_column+"、"+"intrust_flag4";}}
if(entity_type_isnull==1){if(isnull_column==""){isnull_column="entity_type";}else{isnull_column=isnull_column+"、"+"entity_type";}}
if(deal_type_isnull==1){if(isnull_column==""){isnull_column="deal_type";}else{isnull_column=isnull_column+"、"+"deal_type";}}
if(managertype_isnull==1){if(isnull_column==""){isnull_column="managertype";}else{isnull_column=isnull_column+"、"+"managertype";}}
if(natrust_type_isnull==1){if(isnull_column==""){isnull_column="natrust_type";}else{isnull_column=isnull_column+"、"+"natrust_type";}}
if(custodian_bank_isnull==1){if(isnull_column==""){isnull_column="custodian_bank";}else{isnull_column=isnull_column+"、"+"custodian_bank";}}
if(with_bank_flag_isnull==1){if(isnull_column==""){isnull_column="with_bank_flag";}else{isnull_column=isnull_column+"、"+"with_bank_flag";}}
if(with_security_flag_isnull==1){if(isnull_column==""){isnull_column="with_security_flag";}else{isnull_column=isnull_column+"、"+"with_security_flag";}}
if(with_private_flag_isnull==1){if(isnull_column==""){isnull_column="with_private_flag";}else{isnull_column=isnull_column+"、"+"with_private_flag";}}
if(with_gov_flag_isnull==1){if(isnull_column==""){isnull_column="with_gov_flag";}else{isnull_column=isnull_column+"、"+"with_gov_flag";}}
if(ben_period_isnull==1){if(isnull_column==""){isnull_column="ben_period";}else{isnull_column=isnull_column+"、"+"ben_period";}}
//check is normal? return not normal column
var isnormal_column="";
if(intrust_type_isnormal==0){isnormal_column="intrust_type";}
if(intrust_type_subitem_isnormal==0){if(isnormal_column==""){isnormal_column="intrust_type_subitem";}else{isnormal_column=isnormal_column+"、"+"intrust_type_subitem";}}
if(intrust_type1_isnormal==0){if(isnormal_column==""){isnormal_column="intrust_type1";}else{isnormal_column=isnormal_column+"、"+"intrust_type1";}}
if(intrust_type2_isnormal==0){if(isnormal_column==""){isnormal_column="intrust_type2";}else{isnormal_column=isnormal_column+"、"+"intrust_type2";}}
if(entity_type_isnormal==0){if(isnormal_column==""){isnormal_column="entity_type";}else{isnormal_column=isnormal_column+"、"+"entity_type";}}
if(deal_type_isnormal==0){if(isnormal_column==""){isnormal_column="deal_type";}else{isnormal_column=isnormal_column+"、"+"deal_type";}}
if(natrust_type_isnormal==0){if(isnormal_column==""){isnormal_column="natrust_type";}else{isnormal_column=isnormal_column+"、"+"natrust_type";}}
if(custodian_bank_isnormal==0){if(isnormal_column==""){isnormal_column="custodian_bank";}else{isnormal_column=isnormal_column+"、"+"custodian_bank";}}
if(innovat_type_isnormal==0){if(isnormal_column==""){isnormal_column="iinnovat_type";}else{isnormal_column=isnormal_column+"、"+"innovat_type";}}
if(bank_id_isnormal==0){if(isnormal_column==""){isnormal_column="bank_id";}else{isnormal_column=isnormal_column+"、"+"bank_id";}}
if(gov_regional_isnormal==0){if(isnormal_column==""){isnormal_column="gov_regional";}else{isnormal_column=isnormal_column+"、"+"gov_regional";}}
//check is number? return not number column
var isnumber_column="";
if(ben_period_isnumber==0)
{
isnumber_column="ben_period";
}
//最終輸出的錯誤詳細情況
if(item_id_isunique==0 || item_code_isunique==0 ||item_name_isunique==0)
{
check_type="違反唯一規則";
check_detail="表"+source_table+"中,字段"+isunique_column+"違反了唯一規則";
}
if
(
intrust_flag1_isnull==1 || intrust_type_isnull==1 || intrust_type_subitem_isnull==1 ||
intrust_type1_isnull==1 || intrust_type2_isnull==1 || item_num_isnull==1 ||
item_money_isnull==1 || exp_rate1_isnull==1 || exp_rate2_isnull==1|| 
intrust_flag3_isnull==1 || intrust_flag4_isnull==1 || entity_type_isnull==1 || 
deal_type_isnull==1 || managertype_isnull==1 || natrust_type_isnull==1 || 
custodian_bank_isnull==1 || with_bank_flag_isnull==1 || with_security_flag_isnull==1 || 
with_private_flag_isnull==1 || with_gov_flag_isnull==1 || ben_period_isnull==1 
)
{
  if(check_detail=="")
  {
   check_type="違反非空規則";
   check_detail="字段"+isnull_column+"違反了非空規則";
 
  }
  else
  {
  check_type=check_type+","+"違反非空規則";
  check_detail=check_detail+",字段"+isnull_column+"違反了非空規則";
  }
}
if
(
intrust_type_isnormal==0 || intrust_type_subitem_isnormal==0 || intrust_type1_isnormal==0 ||
intrust_type2_isnormal==0 || entity_type_isnormal==0 || deal_type_isnormal==0 ||
natrust_type_isnormal==0 || custodian_bank_isnormal==0 || innovat_type_isnormal==0 ||
bank_id_isnormal==0 || gov_regional_isnormal==0
)
{
  if(check_detail=="")
  {
   check_type="違反標准化規則";
   check_detail="字段"+isnormal_column+"違反了標准化規則";
  }
  else
  {
  check_type=check_type+","+"違反標准化規則";
  check_detail=check_detail+",字段"+isnormal_column+"違反了標准化規則";
  }
}



if(ben_period_isnumber==0)
{
  if(check_detail=="")
  {
   check_type="違反數據類型為數字規則";
   check_detail="字段"+isnumber_column+"違反了數據類型為數字規則";
  }
  else
  {
  check_type=check_type+","+"違反數據類型為數字規則";
  check_detail=check_detail+",字段"+isnumber_column+"違反了數據類型為數字規則";
  }
}
if(check_detail!="")
{
check_detail=check_detail+","+source_table_id+"="+ITEM_ID;
}
check_name=ITEM_NAME;
View Code

 

校驗日志表結果:

輸出關鍵性指標-表,那些字段違反了那些規則,時間

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM