這篇相當於是對前三篇的總結,基本效果如下:
在初試PyOpenGL一 (Python+OpenGL)講解Pyopengl環境搭建,網格,球體,第一與第三人稱攝像機的實現。在初試PyOpenGL二 (Python+OpenGL)基本地形生成與高度檢測 里以用高程圖生成地形以及以球體做三人稱漫游。初試PyOpenGL三 (Python+OpenGL)GPGPU基本運算與乒乓技術 里實現了基本的GPGPU運算。
我認為比較完善的GPU粒子系統應該如下,粒子初始化可以放在CPU里,但是相關數據運算首先要放在GPU里,並且運算后的數據也應該放在顯存里,而不是內存里。故用第三篇實現GPU粒子系統不滿足,因為他數據是存放在紋理中,要放入VBO里,必需先讀取經過內存,然后存放入顯存里,這里雖然運算是放入GPU了,但是數據要經過顯存-內存-顯存的過程,產生不必要的消耗,並且,因為數據是存放在紋理的像素里,故限定在片斷着色器中,這二個限制導致第三篇里的內容不能用來實現GPU粒子系統,而是用來實現一些需要結合CPU與GPU結合處理的運算。
在這里,我們采用OpenGL 里的Transform Feedback,和第三篇采用FBO結合浮點紋理不同,Transform Feedback簡單來說,傳入一個VBO,經過GPU運算后,放入另一個VBO中,注意二點,操作都是針對VBO,也就是針對顯存,故不需要經過CPU與內存,還有一點就是在Transform Feedback里,一個緩存不能同時作為輸入和輸出。
首先來看一下簡單的例子介紹Transform Feedback的基本應用,首先指出一點,GLSL3.0與GLSL4.0的Transform Feedback寫法有些區別,手上分別有支持3.0與4.0的顯示,但是為了更好的兼容性,選擇3.0的寫法,相應代碼和着色器代碼如下:

1 tf_v = """ 2 #version 330 3 in float inValue; 4 out float outValue; 5 out float out2; 6 void main() { 7 outValue = inValue+3.0; 8 out2 = 1.0; 9 }"""

1 this.tfProgram = glCreateProgram() 2 this.tfProgram = ShaderProgram(this.tfProgram) 3 tfvshader = shaders.compileShader(tf_v,GL_VERTEX_SHADER) 4 glAttachShader(this.tfProgram,tfvshader) 5 LP_LP_c_char = POINTER(POINTER(c_char)) 6 ptrs = (c_char_p * 2)('outValue', 'out2') 7 print ptrs,len(ptrs) 8 c_array = cast(ptrs, LP_LP_c_char) 9 glTransformFeedbackVaryings(this.tfProgram, len(ptrs), c_array, GL_INTERLEAVED_ATTRIBS) 10 glLinkProgram(this.tfProgram) 11 this.tfProgram.invalue = glGetAttribLocation(this.tfProgram,"inValue")

1 class transformFeedback(common): 2 def __init__(this,pro): 3 data = [1.0, 2.0, 3.0, 4.0, 5.0] 4 data1 = [1.0] * 5 5 this.vbo = vbo.VBO(ny.array(data,'f')) 6 this.tbo = vbo.VBO(ny.array(data1,'f')) 7 glUseProgram(pro) 8 pi = pro.invalue 9 #this.vbo = glGenBuffers(1) 10 #glBindBuffer(GL_ARRAY_BUFFER, this.vbo) 11 #output data 12 this.tbo = glGenBuffers(1) 13 glBindBuffer(GL_ARRAY_BUFFER, this.tbo) 14 glBufferData(GL_ARRAY_BUFFER, 40, None, GL_STATIC_DRAW) 15 #input data 16 this.vbo.bind() 17 glEnableVertexAttribArray(pi) 18 #in pyopengl,the glVertexAttribPointer last two params must not be 0,0 19 glVertexAttribPointer(pi,1,GL_FLOAT,False,4*1,this.vbo) 20 glEnable(GL_RASTERIZER_DISCARD) 21 glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, this.tbo) 22 glBeginTransformFeedback(GL_POINTS) 23 glDrawArrays(GL_POINTS, 0, 5) 24 glEndTransformFeedback() 25 glDisable(GL_RASTERIZER_DISCARD) 26 glDisableVertexAttribArray(pi) 27 glFlush() 28 29 glBindBuffer(GL_ARRAY_BUFFER, this.tbo) 30 buffer = (ctypes.c_float * 10)() 31 #get buffer pointer 32 point = ctypes.cast(buffer, ctypes.POINTER(ctypes.c_float)) 33 glGetBufferSubData(GL_ARRAY_BUFFER, 0, 10 * 4,point) 34 #convert pointer to array 35 array = ny.ctypeslib.as_array(point,(10,)) 36 print "tf",array 37 38 bf = glMapBuffer(GL_TRANSFORM_FEEDBACK_BUFFER,GL_READ_WRITE) 39 pointv = ctypes.cast(bf, ctypes.POINTER(ctypes.c_float)) 40 arrayv = ny.ctypeslib.as_array(pointv,(5,)) 41 print "tfv",arrayv 42 glUnmapBuffer(GL_ARRAY_BUFFER)
着色器里代碼很簡單,傳入一個float數據,返回二個float數據,上面我們傳入一個數組,[1.0, 2.0, 3.0, 4.0, 5.0],經過着色器里簡單運算,分別返回這個數據加3值,與一個固定值1.0.然后在transformFeedback我們為了驗證正確與否,需要讀取VBO里的數據。在這里,pyopengl可以使用glGetBufferSubData與glMapBuffer來得到VBO里的數據,需要注意的是,python與c之間的一些指針,數據的轉換,引入ctype,聲明ctype類型的數組,然后轉換成對應的指針,填充這個數組后,然后轉換把指針轉化成numpy里的數組.得到的數據如下:
可以看到,傳出的數據是4,1,5,1,6,1,7,1,8,1,對比傳入的是1.0, 2.0, 3.0, 4.0, 5.0。驗證正確。
下面我們以上面的例子來實現我們的粒子系統,這里先入相關Python代碼。

1 class particleSystem(object): 2 def __init__(this,len=1): 3 this.length = len 4 this.cparticles = [0.0] * 7 * len 5 this.nparticles = [0.0] * 7 * len 6 this.index = 0 7 this.center = 0.0,0.0 8 this.currenttime = 0.0 9 this.height = 2.0 10 this.init1() 11 this.createVAO() 12 def init1(this): 13 #pos(x,y,z),vel(x,y,z),time 14 for i in range(this.length): 15 ind = i * 7 16 px,py,pz,tt = ind,ind + 1,ind + 2,ind + 6 17 vx,vy,vz = ind + 3,ind + 4,ind + 5 18 this.cparticles[px] = 0.0 19 this.cparticles[py] = 3.0 20 this.cparticles[pz] = random.uniform(0,5) 21 this.cparticles[vx] = random.random() 22 this.cparticles[vy] = 0.0 23 this.cparticles[vz] = 0.0 24 this.cparticles[tt] = random.uniform(1.0,40.0)#random.uniform(0, 3 * this.height) 25 def createVAO(this): 26 this.currvbo = vbo.VBO(ny.array(this.cparticles,'f')) 27 this.nextvbo = vbo.VBO(ny.array(this.nparticles,'f')) 28 def render(this,program): 29 ind = this.index % 2 30 span = time.time() - this.currenttime if this.currenttime != 0.0 else 0.0 31 invbo,outvbo = (this.currvbo,this.nextvbo) if ind == 0 else (this.nextvbo,this.currvbo) 32 #gpu compute. 33 print span 34 glUseProgram(program) 35 glUniform1f(program.span, span) 36 glUniform1f(program.live, 40) 37 this.update(invbo,outvbo) 38 glUseProgram(0) 39 #draw particle. 40 glColor(0.5,0.8,0.9) 41 glPointSize(3.0) 42 outvbo.bind() 43 glVertexPointer(3,GL_FLOAT,28,outvbo) 44 glDrawArrays(GL_POINTS, 0, this.length) 45 outvbo.unbind() 46 this.index = this.index + 1 47 this.currenttime = time.time() 48 def update(this,fvbo,svbo): 49 #fvbo->shader(GPU)->svbo,should svbo and fvbo both bind. 50 svbo.bind() 51 fvbo.bind() 52 glEnableVertexAttribArray(0) 53 glEnableVertexAttribArray(1) 54 glEnableVertexAttribArray(2) 55 glVertexAttribPointer(0,3,GL_FLOAT,False,4 * 7,fvbo) 56 glVertexAttribPointer(1,3,GL_FLOAT,False,4 * 7,fvbo + 12) 57 glVertexAttribPointer(2,1,GL_FLOAT,False,4 * 7,fvbo + 24) 58 glEnable(GL_RASTERIZER_DISCARD) 59 glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER,0,svbo) 60 glBeginTransformFeedback(GL_POINTS) 61 glDrawArrays(GL_POINTS, 0, this.length) 62 glEndTransformFeedback() 63 glDisable(GL_RASTERIZER_DISCARD) 64 glDisableVertexAttribArray(0) 65 glDisableVertexAttribArray(1) 66 glDisableVertexAttribArray(2) 67 fvbo.unbind() 68 #query gpu data is chage? 69 #svbo.bind() 70 #bf = glMapBuffer(GL_ARRAY_BUFFER,GL_READ_WRITE) 71 #pointv = ctypes.cast(bf, ctypes.POINTER(ctypes.c_float)) 72 #arrayv = ny.ctypeslib.as_array(pointv,(70,)) 73 #print "tfv",arrayv 74 #glUnmapBuffer(GL_ARRAY_BUFFER)
結合前面的例子和上文中的乒乓來看,粒子在這里我們每個定義七個數據,前三個用來表示他的位置,后三個用來表示他的速度,最后一個用來表示他在顯存里的存活時間。在update就是把數據從一個緩存經過GPU運算放入另一個緩存的過程,例如第一楨,我們傳入fvbo,然后數據輸出到svbo.在第二楨里,數據就從svbo經過GPU傳入到fvbo,第三,第四分別如第一,第二。這樣就能實現如第三篇中的乒乓技術。然后在顯示render里,我們就用當前輸出的緩存里的數據簡單的輸出顯示,本文只是介紹用法,實現如雪花,雨滴,瀑布等特效需要對相關初始化粒子,着色器代碼,添加紋理做更改,但是基本處理還是如上。
下面是着色器代碼,實現粒子與球的碰撞,也有與地面的交互。代碼如下:

1 particle_v = """ 2 #version 330 3 in vec3 pos; 4 in vec3 vel; 5 in float time; 6 uniform float span; 7 uniform vec2 planeSacle; 8 uniform sampler2D plane; 9 uniform vec3 sphere; 10 uniform float live; 11 out vec3 outpos; 12 out vec3 outvel; 13 out float outtime; 14 void main() { 15 outpos = pos + vel*span; 16 vec2 uv = vec2(pos.xz/planeSacle + vec2(0.5,0.5)); 17 uv.y = 1.0 - uv.y; 18 float hight = texture2D(plane, uv).r; 19 vec3 tvel = vel; 20 //sphere collision 21 float radius = sphere.y; 22 vec3 sphereh = sphere + vec3(0.0,hight,0.0); 23 if(distance(outpos,sphereh) <= radius) 24 { 25 tvel = reflect(vel,normalize(outpos-sphereh))/2.0; 26 } 27 tvel = tvel + vec3(0.0,-0.5,0.0)*span; 28 29 //ground collision 30 if(hight > outpos.y) 31 { 32 outpos.y = hight; 33 tvel = vec3(max(vel.x-span*1.1,0.0),0.0,max(vel.z - span*1.1,0.0)); 34 } 35 //update particle live 36 outtime = time + span; 37 if(outtime>=live) 38 { 39 outpos = vec3(0.0,3.0,hight*5.0); 40 outtime = 0.0; 41 tvel = vec3(hight,0.0,0.0); 42 } 43 outvel = tvel; 44 }"""
整個過程比較簡單,也只考慮一些基本的碰撞,比如球的速度也應該影響碰撞后粒子的方向,但是這里只考慮粒子碰撞球后反射的方向,與地面的碰撞后,不會反彈,會慢慢停止向前移動。
最后一些相關着色器的參數設置代碼。

1 this.particleProgram = glCreateProgram() 2 this.particleProgram = ShaderProgram(this.particleProgram) 3 particleshader = shaders.compileShader(particle_v,GL_VERTEX_SHADER) 4 glAttachShader(this.particleProgram,particleshader) 5 LP_LP_c_char = POINTER(POINTER(c_char)) 6 ptrs = (c_char_p * 3)('outpos', 'outvel','outtime') 7 c_array = cast(ptrs, LP_LP_c_char) 8 glTransformFeedbackVaryings(this.particleProgram, len(ptrs), c_array, GL_INTERLEAVED_ATTRIBS) 9 glLinkProgram(this.particleProgram) 10 this.particleProgram.pos = glGetAttribLocation(this.particleProgram,"pos") 11 this.particleProgram.vel = glGetAttribLocation(this.particleProgram,"vel") 12 this.particleProgram.time = glGetAttribLocation(this.particleProgram,"time") 13 this.particleProgram.span = glGetUniformLocation(this.particleProgram,"span") 14 this.particleProgram.live = glGetUniformLocation(this.particleProgram,"live") 15 this.particleProgram.plane = glGetUniformLocation(this.particleProgram,"plane") 16 this.particleProgram.planeSacle = glGetUniformLocation(this.particleProgram,"planeSacle") 17 this.particleProgram.sphere = glGetUniformLocation(this.particleProgram,"sphere")
在本文中,試着用了5千W個粒子,發現初始化很慢,花了十幾秒,但是楨數和5000個粒子基本沒有差別,從這里可以看出,GPU並行處理的強大之處。
完整代碼:PythonGPU粒子系統.zip 操作方式EDSF前后左右移動,WR分別向上與向下,鼠標右鍵加移動鼠標控制方向,V切換第一人稱與第三人稱。UP與DOWN切換前面操作的移動幅度。