采用線性逼近法結合32段線性查找表的方式來實現1/z的計算。
首先將1/32-1/64的定點化數據存放到ROM中,ROM中存放的是擴大了2^20 次方的數字四舍五入后的整數部分。n值越大,精度越大,誤差越小。這里取n=20;
ROM中存儲的數據是1/(32+i)*2^20的四舍五入的整數部分。
32-64間的數據可以通過查表來實現,其他的數據則采用的是線性逼近的方法。
線性逼近的步驟為:
1.確定最高非零比特位的位置
2.對z進行左移或者右移,得到zp
3.zp查找ROM,得到1/zp,以及1/(zp+1),
4.求的1/zp-1/(zp+1),為誤差A
5.N=z-zp*2^(m-5)
6.B=A/2^(m-5)*N
7.將擴大的部分縮小回去,或者縮小了的放大回去,那么1/z=(1/zp-B)*(1/2^(m-5))
代碼插入:
module top_inv( input clk,syn_rst, input [20:0]dataa, input [20:0]datab, //input [20:0]ampout, output reg [19:0]inv // output reg done ); reg [4:0] address1; reg [4:0 ]address2; wire [4:0] m; // wire done; reg [19:0]invr; reg [20:0] ampout_r; reg [20:0] ampout_r1; wire [20:0] ampout; reg [20:0] ampoutr1,ampoutr2,ampoutr3,ampoutr4; wire [19:0] inv_r1; wire [19:0] inv_r2; reg [20:0] diff_r; reg [19:0] diffr; reg [19:0] diff; reg [19:0] N; reg [19:0] N1; reg en; always @(posedge clk or negedge syn_rst) begin if(~syn_rst) begin ampoutr1<=21'd0; ampoutr2<=21'd0; ampoutr3<=21'd0; ampoutr4<=21'd0; end else ampoutr1<=ampout; ampoutr2<=ampoutr1; ampoutr3<=ampoutr2; ampoutr4<=ampoutr3; end reg [19:0] inv_r1t1,inv_r1t2,inv_r1t3; always@(posedge clk or negedge syn_rst) begin if(~syn_rst) begin inv_r1t1<=0; inv_r1t2<=0; inv_r1t3<=0; end else begin inv_r1t1<=inv_r1; inv_r1t2<=inv_r1t1; inv_r1t3<=inv_r1t2; end end reg [4:0] mt1,mt2,mt3,mt4,mt5; always@(posedge clk or negedge syn_rst) begin if(~syn_rst) begin mt1<=0; mt2<=0; mt3<=0; mt4<=0; mt5<=0; end else begin mt1<=m; mt2<=mt1; mt3<=mt2; mt4<=mt3; mt5<=mt4; end end reg sel; reg selr1,selr2; always @(posedge clk or negedge syn_rst) begin if(~syn_rst) begin diff<=0; diffr <= 0; ampout_r<='b0; ampout_r1<=0; address1<='b0; address2<='b0; en<=0; sel<=0; end else begin // if(done) //begin if((ampout>=32)&&(ampout<=64)) begin ampout_r<=0; ampout_r1<=0; address1<=ampoutr3-32; address2<= 0; diff <= 0; diffr <= 0; N <= 0; N1<= 0; en<=0;//不需要計算m的值 sel<=0; selr1<=0; selr2<=0; end else begin en<=1;//需要計算m的值 if(m>5) begin // ampoutrr<=ampout; ampout_r<=ampoutr1>>(m-5); ampout_r1<=ampout_r;//zp address1<=ampout_r-32;///inv_r1 address2<=ampout_r-31;///inv_r2 diff <= inv_r1-inv_r2; diffr <=diff; N1<=ampout_r1<<(mt2-5); N<=ampoutr4-N1; selr1<=1; selr2 <= selr1; sel <= selr2; end if(m<5) begin //ampoutrr<=ampout; ampout_r<=ampoutr1<<(5-m);// mt4 mt3 mt2 ampout_r1 <= ampout_r;// N N1 ampout_r1 address1<=ampout_r-32;///mt4 inv_r1 address2<=ampout_r-31;//inv_r1t3 inv_r2 mt1 diff <= inv_r1-inv_r2;//diff_r<<diffr<<diff<<address<<ampout_r<< m <<ampout diffr <=diff; // ampoutr3 ampoutr2 ampoutr1 N1<=ampout_r1>>(5-mt2); N<=ampoutr4-N1; selr1<=1; selr2 <= selr1; sel <= selr2; end end end // end end // assign diff=sel?(inv_r1-inv_r2):'b0; //assign N=sel?(ampout-N1):0; //assign diff_r = en?(diff*N>>(m-5)):0; //assign diff_r = (m>5)?(diff*N>>(m-5)):(diff*N<<(5-m)); // assign inv = sel?(inv_r1-diff_r)>>(m-5):inv_r1; always@(posedge clk or negedge syn_rst) begin if(~syn_rst) begin invr<=0; // done<=0; diff_r<=0; end else begin if(sel) begin if(m>5)begin diff_r <= diffr*N>>(mt4-5); invr<=(inv_r1t3-diff_r)>>(mt5-5); // done<=1; end else begin diff_r <= diffr*N<<(5-mt4); invr<=(inv_r1t3-diff_r)<<(5-mt5); // done<=1; end end else begin diff_r<=0; invr<=inv_r1t3; end end end always@(posedge clk or negedge syn_rst) begin if(~syn_rst) begin inv<=0; end else begin if(invr) inv<= invr; else inv<=inv; end end //ROM 核的例化 rom u_rom(.clk(clk), .address1(address1), .address2(address2), .inv_r1(inv_r1), .inv_r2(inv_r2)//, //.c(c) ); //例化尋找最高非零位 not_0 u_not_0 ( // port map - connection between master ports and signals/registers .ampout(ampout), .clk(clk), .m(m), .en(en), .syn_rst(syn_rst) ); complex_abs u_comlex_abs( .clk(clk), .syn_rst(~syn_rst), .dataa(dataa), .datab(datab), .ampout(ampout) ); endmodule
那么最終的仿真結果:如果直接查詢的話,結果輸出延時一個時鍾周期,如果線性逼近的方法得到,延時3-5個時鍾周期,這里周期設定為20ns;
占用資源報告:
增加一個求平方根的模塊以后的仿真結果(數據輸入后,一共需要約10個時鍾周期才可以計算出一個平方更求導數值)。有一個小疑問就是怎么添加一個標志信號,讓我們知道哪里輸出的inv 信號是有效的