2維FFT算法實現——基於GPU的基2快速二維傅里葉變換


上篇講述了一維FFT的GPU實現(FFT算法實現——基於GPU的基2快速傅里葉變換),后來我又由於需要做了一下二維FFT,大概思路如下。

首先看的肯定是公式:

如上面公式所描述的,2維FFT只需要拆分成行FFT,和列FFT就行了,其中我在下面的實現是假設原點在F(0,0),由於我的代碼需要原點在中心,所以在最后我將原點移動到了中心。

下面是原點F(0,0)的2維FFT的偽代碼:

    //C2DFFT
    //被執行2DFFT的是一個N*N的矩陣,在source_2d中按行順序儲存
    //水平方向FFT
    for (int i=0;i<N;i++)
    {
        fft1(&source_2d[i*N],&source_2d_1[i*N],N);
    }
    //轉置列成行
    for (int i=0;i<N*N;i++)
    {
        int x = i%N;
        int y = i/N;
        int index = x*N+y;
        source_2d[index] = source_2d_1[i];
    }
    //垂直FFT
    for(int i=0;i<N;i++)
    {
        fft1(&source_2d[i*N],&source_2d_1[i*N],N);
    }
    //轉置回來
    for (int i=0;i<N*N;i++)
    {
        int x = i%N;
        int y = i/N;
        int index = x*N+y;
        source_2d[index] = source_2d_1[i];
    }

GPU實現無非把這些東西轉換到GPU上。

我基於OpenGL的fragment shader來計算fft;數據都存放在紋理或者FBO里面。和1維fft不同的是,NXN的數據里面,只是對當前列或者當前排做一維FFT,所以bit反轉表只需要一個1*N的buffer就可以了。對應的蝴蝶圖數據也只需要1*N即可。所以我們有如下的分配:

static ofFbo _fbo_bitrev_table;
static ofFbo _origin_butterfly_2d;

_fbo_bitrev_table.allocate(N,1,GL_RGBA32F);
_origin_butterfly_2d.allocate(N,1,GL_RGBA32F);

首先要做的是把長度為N的bit反轉表求出來,這個只需要求一次,所以在最開始的時候就用CPU求出來:

    for(int i=0;i<N;i++)
    {
          _bitrev_index_2d.setColor(i,0,ofFloatColor(bit_rev(i,N-1),0,0,0));
    }

    _bitrev_index_2d.update();

    //翻轉后的索引
    _fbo_bitrev_table.begin();
    _bitrev_index_2d.draw(0,0,N,1);
    _fbo_bitrev_table.end();

然后初始化最初的蝴蝶圖,這個和1維FFT是一樣的,只是長度不同而已:

for(int i=0;i<N;i++)
    {
        //初始化二維蝴蝶圖
        if(i%2==0)
        {
            _data_2d.setColor(i,0,ofFloatColor(0.f,2.f,0,i+1));
        }
        else
        {
            _data_2d.setColor(i,0,ofFloatColor(1.f,2.f,0,i-1));
        }
        
    }

    _data_2d.update();

    /////////////////2D初始化/////////////////
    //初始化2D蝴蝶圖
    _weight_index_2d[0].begin();
    _data_2d.draw(0,0,N,1);
    _weight_index_2d[0].end();
    //備份2D初始蝴蝶圖,用於下一次新的計算
    _origin_butterfly_2d.begin();
    _data_2d.draw(0,0,N,1);
    _origin_butterfly_2d.end();

輔助函數:

    static unsigned int bit_rev(unsigned int v, unsigned int maxv)
    {
        unsigned int t = log(maxv + 1)/log(2);
        unsigned int ret = 0;
        unsigned int s = 0x80000000>>(31);
        for (unsigned int i = 0; i < t; ++i)
        {
            unsigned int r = v&(s << i);
            ret |= (r << (t-i-1)) >> (i);    
        }
        return ret;
    }

    static void bit_reverse_copy(RBVector2 src[], RBVector2 des[], int len)
    {
        for (int i = 0; i < len;i++)
        {
            des[bit_rev(i, len-1)] = src[i];
        }
    }

下面定義計算2維IFFT的函數:

void GPUFFT::ifft_2d(ofFbo& in,ofFbo& out,int size);

其中in是輸入,out是輸出,size就是N,由初始化的時候傳入了一次,在這里寫是為了方便調試的時候臨時改變尺寸。

Ifft本身的代碼和上面形式一樣,內容變成了各種shader計算:

void GPUFFT::ifft_2d(ofFbo& in,ofFbo& out,int size)
{
    //禁用Alpha混合,否則繪制到FBO會混合Alpha,造成數據丟失
    ofDisableAlphaBlending();

    //水平FFT
    _weight_index_2d[_cur_2d].begin();
    _origin_butterfly_2d.draw(0,0,N,1);
    _weight_index_2d[_cur_2d].end();

    
    _fbo_in_bitreved_2d.begin();
    _bit_reverse_shader_2d.begin();
    _bit_reverse_shader_2d.setUniform3f("iResolution",N,N,0);
    _bit_reverse_shader_2d.setUniform1i("N",N);
    _bit_reverse_shader_2d.setUniform1i("dir",1);
    _bit_reverse_shader_2d.setUniformTexture("tex_origin",in.getTextureReference(),1);
    _bit_reverse_shader_2d.setUniformTexture("tex_bitreverse_table",_fbo_bitrev_table.getTextureReference(),2);
    ofRect(0,0,N,N);
    _bit_reverse_shader_2d.end();
    _fbo_in_bitreved_2d.end();

    //翻轉后的數據
    _res_back_2d[_cur_2d].begin();
    _fbo_in_bitreved_2d.draw(0,0,N,N);
    _res_back_2d[_cur_2d].end();


    for(int i = 1;i<N;i*=2)
    {
        _res_back_2d[1-_cur_2d].begin();
        ofClear(0,0,0,0);
        _gpu_fft_shader_2d.begin();
        _gpu_fft_shader_2d.setUniform1i("size",N);
        _gpu_fft_shader_2d.setUniform1i("n_step",i);
        _gpu_fft_shader_2d.setUniform3f("iResolution",N,N,0);
        _gpu_fft_shader_2d.setUniform1i("dir",1);
        _gpu_fft_shader_2d.setUniformTexture("tex_index_weight",_weight_index_2d[_cur_2d].getTextureReference(),1);
        _gpu_fft_shader_2d.setUniformTexture("tex_res_back",_res_back_2d[_cur_2d].getTextureReference(),2);
        //_gpu_fft_shader_2d.setUniformTexture("test",imag_test.getTextureReference(),4);

        ofRect(0,0,N,N);

        _gpu_fft_shader_2d.end();

        _res_back_2d[1-_cur_2d].end();


        _weight_index_2d[1-_cur_2d].begin();
        ofClear(0,0,0,0);

        _weight_index_shader_2d.begin();
        _weight_index_shader_2d.setUniform1i("size",N);
        _weight_index_shader_2d.setUniform1i("n_step",i);
        _weight_index_shader_2d.setUniform3f("iResolution",N,1,0);
        _weight_index_shader_2d.setUniform1i("dir",1);
        _weight_index_shader_2d.setUniformTexture("tex_input",_weight_index_2d[_cur_2d].getTextureReference(),1);

        ofRect(0,0,N,1);

        _weight_index_shader_2d.end();

        _weight_index_2d[1-_cur_2d].end();



        _cur_2d = 1 - _cur_2d;
    }

    //for ifft
    _res_back_2d[1-_cur_2d].begin();
    _res_back_2d[_cur_2d].draw(0,0,N,N);
    _res_back_2d[1-_cur_2d].end();

    _res_back_2d[_cur_2d].begin();
    _ifft_div_shader_2d.begin();
    _ifft_div_shader_2d.setUniform1i("N",N);
    _ifft_div_shader_2d.setUniform3f("iResolution",N,N,0);
    _ifft_div_shader_2d.setUniformTexture("tex_rgb",_res_back_2d[1-_cur_2d].getTextureReference(),1);
    ofRect(0,0,N,N);
    _ifft_div_shader_2d.end();
    _res_back_2d[_cur_2d].end();

    //垂直FFT
    //垂直方向的所有都是計算都按照垂直方向來
    _weight_index_2d[_cur_2d].begin();
    _origin_butterfly_2d.draw(0,0,N,1);
    _weight_index_2d[_cur_2d].end();

    //這一步不會將垂直水平化
    _fbo_in_bitreved_2d.begin();
    _bit_reverse_shader_2d.begin();
    _bit_reverse_shader_2d.setUniform3f("iResolution",N,N,0);
    _bit_reverse_shader_2d.setUniform1i("N",N);
    _bit_reverse_shader_2d.setUniform1i("dir",2);
    _bit_reverse_shader_2d.setUniformTexture("tex_origin",_res_back_2d[_cur_2d].getTextureReference(),1);
    _bit_reverse_shader_2d.setUniformTexture("tex_bitreverse_table",_fbo_bitrev_table.getTextureReference(),2);
    ofRect(0,0,N,N);
    _bit_reverse_shader_2d.end();
    _fbo_in_bitreved_2d.end();



    //翻轉后的數據
    _res_back_2d[_cur_2d].begin();
    _fbo_in_bitreved_2d.draw(0,0,N,N);
    _res_back_2d[_cur_2d].end();


    for(int i = 1;i<N;i*=2)
    {
        _res_back_2d[1-_cur_2d].begin();
        ofClear(0,0,0,0);
        _gpu_fft_shader_2d.begin();
        _gpu_fft_shader_2d.setUniform1i("size",N);
        _gpu_fft_shader_2d.setUniform1i("n_step",i);
        _gpu_fft_shader_2d.setUniform3f("iResolution",N,N,0);
        _gpu_fft_shader_2d.setUniform1i("dir",2);
        _gpu_fft_shader_2d.setUniformTexture("tex_index_weight",_weight_index_2d[_cur_2d].getTextureReference(),1);
        _gpu_fft_shader_2d.setUniformTexture("tex_res_back",_res_back_2d[_cur_2d].getTextureReference(),2);
        //_gpu_fft_shader_2d.setUniformTexture("test",imag_test.getTextureReference(),4);

        ofRect(0,0,N,N);

        _gpu_fft_shader_2d.end();

        _res_back_2d[1-_cur_2d].end();



        _weight_index_2d[1-_cur_2d].begin();
        ofClear(0,0,0,0);

        _weight_index_shader_2d.begin();
        _weight_index_shader_2d.setUniform1i("size",N);
        _weight_index_shader_2d.setUniform1i("n_step",i);
        _weight_index_shader_2d.setUniform3f("iResolution",N,1,0);
        _weight_index_shader_2d.setUniform1i("dir",2);
        _weight_index_shader_2d.setUniformTexture("tex_input",_weight_index_2d[_cur_2d].getTextureReference(),1);

        ofRect(0,0,N,1);

        _weight_index_shader_2d.end();

        _weight_index_2d[1-_cur_2d].end();

        _cur_2d = 1 - _cur_2d;
    }

    //for ifft
    _res_back_2d[1-_cur_2d].begin();
    _res_back_2d[_cur_2d].draw(0,0,N,N);
    _res_back_2d[1-_cur_2d].end();

    _res_back_2d[_cur_2d].begin();
    _ifft_div_shader_2d.begin();
    _ifft_div_shader_2d.setUniform1i("N",N);
    _ifft_div_shader_2d.setUniform3f("iResolution",N,N,0);
    _ifft_div_shader_2d.setUniformTexture("tex_rgb",_res_back_2d[1-_cur_2d].getTextureReference(),1);
    ofRect(0,0,N,N);
    _ifft_div_shader_2d.end();
    _res_back_2d[_cur_2d].end();

    out.begin();
    _res_back_2d[_cur_2d].draw(0,0,N,N);
    out.end();

    //恢復Alpha混合
    //ofEnableAlphaBlending();
}

現在來看shader內容:

 _bit_reverse_shader_2d

這個shader用於將整個N*N的數據全部按照行或者按照列進行翻裝,使之滿足執行fft的條件:

#version 130
uniform sampler2D tex_origin;
//1xN查找表,用於查找索引對應的bitreverse數
uniform sampler2D tex_bitreverse_table;
//1 for x direction,2 for y direction
uniform int dir;
uniform int N;
uniform vec3 iResolution;

out vec4 outColor;

void main()                                      
{   
    vec2 tex_coord = gl_FragCoord.xy/iResolution.xy;
    
    vec2 table_index;
    table_index.y = 0.5;
    if(dir==1)
        table_index.x = tex_coord.x;
    else
        table_index.x = tex_coord.y;
    float bitreverse = texture(tex_bitreverse_table,table_index).r;
    
    vec2 origin_index;
    if(dir==1)
    {
        //x方向
        origin_index.y = tex_coord.y;
        origin_index.x = (bitreverse+0.5)/N;
    }
    else
    {
        //y方向
        origin_index.x = tex_coord.x;
        origin_index.y = (bitreverse+0.5)/N;
    }
    vec2 param = texture(tex_origin,origin_index).xy;
    
    outColor = vec4(param,0,1);
}

_gpu_fft_shader_2d

這是fft執行計算的部分,同樣分為按行和按列:

#version 130
//NX1
uniform sampler2D tex_index_weight;
//NXN
uniform sampler2D tex_res_back;
uniform sampler2D test;
uniform int size;
uniform int n_step;
//1 for x direction,2 for y direction
uniform int dir;

uniform vec3 iResolution;

out vec4 outColor;

void main()                                      
{   
    vec2 tex_coord = gl_FragCoord.xy/iResolution.xy;
    
    vec2 first_index;
    if(dir==1)
    {
        first_index.y = 0.5;
        first_index.x = tex_coord.x;
    }
    else
    {
        first_index.y = 0.5;
        first_index.x = tex_coord.y;
    }
    
    float cur_x = gl_FragCoord.x - 0.5;
    float cur_y = gl_FragCoord.y - 0.5;
    
    vec2 outv;
    
    vec4 temp = texture(tex_index_weight,first_index);
    //ifft
    vec2 weight = vec2(cos(temp.r/temp.g*2*3.141592653),-sin(temp.r/temp.g*2*3.141592653));
    //fft
    //vec2 weight = vec2(cos(temp.r/temp.g*2*3.141592653),sin(temp.r/temp.g*2*3.141592653));
    vec2 _param2_index;

    if(dir==1)
    {
        _param2_index.x = (temp.a + 0.5)/size;
        _param2_index.y = tex_coord.y;
    }
    else
    {
        _param2_index.x = tex_coord.x;
        _param2_index.y = (temp.a + 0.5)/size;
    }
    
    
    vec2 param1 = texture(tex_res_back,tex_coord).rg;
    vec2 param2 = texture(tex_res_back,_param2_index).rg;
    
    float tex_coord_n1;
    float tex_coord_n2;
    if(dir==1)
    {
        tex_coord_n1 = cur_x;
    }
    else
    {
        tex_coord_n1 = cur_y;
    }
    
    tex_coord_n2 = temp.a;
    
    if(tex_coord_n1<tex_coord_n2)
    {
        outv.r = param1.r + param2.r*weight.r-weight.g*param2.g;
        outv.g = param1.g +weight.r*param2.g + weight.g*param2.r;
    }
    else
    {
        outv.r = param2.r + param1.r*weight.r-weight.g*param1.g;
        outv.g = param2.g +weight.r*param1.g + weight.g*param1.r;
    }
    
    
    outColor = vec4(outv,0,1);
    
}

_weight_index_shader_2d

更新蝴蝶圖索引:

#version 130

uniform sampler2D tex_input;
uniform int size;
uniform int n_total;
//start with 2
uniform int n_step;
//1 for x direction,2 for y direction
uniform int dir;

uniform vec3 iResolution;
out vec4 outColor;

void main()                                      
{      
    vec2 tex_coord = gl_FragCoord.xy/iResolution.xy;
    vec4 fetch = texture(tex_input,tex_coord);
    float cur_x = gl_FragCoord.x - 0.5;
    float cur_y = gl_FragCoord.y - 0.5;
    
    vec4 outv;
    float tex_coord_n;
    if(dir==1)
    {
        //x dir
        tex_coord_n = cur_x;
    }
    else
    {
        //y dir
        tex_coord_n = cur_x;
    }
    
    //updata weight
    vec2 pre_w = fetch.rg;
    float i = pre_w.r;
    float n = pre_w.g;
    float new_i;
    float new_n;
    new_i = i;
    new_n = n*2;
    if(int(tex_coord_n)%(n_step*4) > n_step*2-1)
    {
        new_i += n_step*2;
    }
    outv.r = new_i;
    outv.g = new_n;
    //outv.rg = tex_coord;
    
    //updata index
    vec2 pre_index = fetch.ba;
    int x = int(pre_index.x);
    int y = int(pre_index.y);
    int ni = n_step*2;
    float new_tex_coord_n = tex_coord_n;
    if((int(tex_coord_n)/ni)%2==0)
    {
        new_tex_coord_n += ni;
    }
    else
    {
        new_tex_coord_n -= ni;
    }
    
    outv.b = 0;
    outv.a = new_tex_coord_n;
    outColor = outv;
    //outColor = vec4(tex_coord_n,tex_coord_n%n_step,tex_coord_n%n_step,tex_coord_n%n_step);
} 

最后的

_ifft_div_shader_2d

是為了計算ifft,將每個計算結果除以一個N:

#version 130
uniform sampler2D tex_rgb;
uniform int N;
uniform vec3 iResolution;

out vec4 outColor;

void main()                                      
{   
    vec2 tex_coord = gl_FragCoord.xy/iResolution.xy;
    
    vec2 outv;
        
    vec4 c = texture(tex_rgb,tex_coord);

    outv.r = c.r/N;
    outv.g = c.g/N;
    outColor = vec4(outv,0,1);
}

最后,out里面就是結果了。

對於將原點移動到中心多了以下shader:

        vec4 c;
        if(tex_coord.x>0.5&&tex_coord.y>0.5)
        {
            c = texture(tex_rgb,tex_coord-vec2(0.5,0.5));
            
        }
        if(tex_coord.x>0.5&&tex_coord.y<0.5)
        {
            c = texture(tex_rgb,tex_coord+vec2(-0.5,0.5));
        }
        if(tex_coord.x<0.5&&tex_coord.y>0.5)
        {
            c = texture(tex_rgb,tex_coord+vec2(0.5,-0.5));
        }
        if(tex_coord.x<0.5&&tex_coord.y<0.5)
        {
            c = texture(tex_rgb,tex_coord+vec2(0.5,0.5));
        }
        outColor = c;

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM