在Android端使用OpenGL的compute shader加速計算
阿新 • • 發佈:2019-01-21
在Android端使用compute shader需要OpenGL ES3.1,即Android5.1以上的平臺。可能是oples的原因,在Android上使用compute shader有幾個注意要點:
- 生成texture時不能使用glTexIamge2D, 需使用glTexStorage2D,然後使用glTexSubImage2D將資料賦予texture
- 在寫shader時,輸入輸出image2D需要顯式地用限定符readonly或writeonly限定其讀寫許可權,不然編譯shader程式會失敗
- 注意生成texture時的level值需要與資料格式對應
layout(binding = 0, rgba32f) readonly uniform image2D input_image; layout(binding = 1, rgba32f) writeonly uniform image2D output_image;
在Android上使用opengl最方便地做法就是就是使用GLSurfaceView生成EGL環境,具體用法不清楚的話可以參看網上教程,有很多,這裡不再詳述。這個例子仍然是生成模擬資料,然後通過compute shader對資料做一些加法後再讀回。
先在Activity中設定EGL環境
public class ComputeActivity extends Activity { private GLSurfaceView glsv; @Override protected void onCreate(@Nullable Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_compute); glsv = findViewById(R.id.glsv); glsv.setEGLContextClientVersion(3); glsv.setRenderer(new ComputeRender(this)); glsv.setRenderMode(GLSurfaceView.RENDERMODE_WHEN_DIRTY); } }
在ComputeRender中生成模擬資料
private FloatBuffer createInputBuffer() {
FloatBuffer floatBuffer = FloatBuffer.allocate(mSize);
for (int i = 0; i < mSize; i++) {
floatBuffer.put(i);
}
floatBuffer.position(0);
return floatBuffer;
}
生成FrameBuffer和Texture
public void createEnvi() {
GLES31.glGenFramebuffers(1, fFrame, 0);
GLES31.glBindFramebuffer(GLES31.GL_FRAMEBUFFER, fFrame[0]);
GLES31.glGenTextures(3, fTexture, 0);
for (int i = 0; i < 3; i++) {
GLES31.glBindTexture(GLES31.GL_TEXTURE_2D, fTexture[i]);
GLES31.glTexStorage2D(GLES31.GL_TEXTURE_2D, 1, GLES31.GL_RGBA32F, mWidth, mHeight);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
GLES31.glBindTexture(GL_TEXTURE_2D, 0);
}
GLES31.glFramebufferTexture2D(GLES31.GL_FRAMEBUFFER, GLES31.GL_COLOR_ATTACHMENT0,
GLES31.GL_TEXTURE_2D, fTexture[0], 0);
GLES31.glFramebufferTexture2D(GLES31.GL_FRAMEBUFFER, GLES31.GL_COLOR_ATTACHMENT1,
GLES31.GL_TEXTURE_2D, fTexture[1], 0);
GLES31.glFramebufferTexture2D(GLES31.GL_FRAMEBUFFER, GLES31.GL_COLOR_ATTACHMENT2,
GLES31.GL_TEXTURE_2D, fTexture[2], 0);
}
繫結資料和Texture
private void transferToTexture(Buffer data, int texID) {
GLES31.glBindTexture(GLES31.GL_TEXTURE_2D, texID);
GLES31.glTexSubImage2D(GLES31.GL_TEXTURE_2D, 0, 0, 0, mWidth, mHeight, GLES31.GL_RGBA, GLES31.GL_FLOAT, data);
}
建立並連結shader程式
#version 310 es
layout (local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
uniform float v[1000];
layout(binding = 0, rgba32f) readonly uniform image2D input_image;
layout(binding = 1, rgba32f) writeonly uniform image2D output_image;
shared vec4 scanline[32][32];
void main(void)
{
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
scanline[pos.x][pos.y] = imageLoad(input_image, pos);
barrier();
vec4 data = scanline[pos.x][pos.y];
data.r = data.r + v[999] ;
data.g = data.g;
data.b = data.b;
data.a = data.a;
imageStore(output_image, pos.xy, data);
}
private void initGLSL() {
mComputeProg = GLES31.glCreateProgram();
String source = ShaderUtils.loadFromAssetsFile("compute.cs", mContext.getResources());
ShaderUtils.vglAttachShaderSource(mComputeProg, GLES31.GL_COMPUTE_SHADER, source);
GLES31.glLinkProgram(mComputeProg);
}
執行計算
private void performCompute(int inputTeture, int outputTexture) {
GLES31.glUseProgram(mComputeProg);
GLES31.glUniform1fv(GLES31.glGetUniformLocation(mComputeProg, "v"), mValueSize, mValueBuffer);
GLES31.glBindImageTexture(0, inputTeture, 0, false, 0, GLES31.GL_READ_ONLY, GLES31.GL_RGBA32F);
GLES31.glBindImageTexture(1, outputTexture, 0, false, 0, GLES31.GL_WRITE_ONLY, GLES31.GL_RGBA32F);
GLES31.glDispatchCompute(1, 1, 1);
GLES31.glMemoryBarrier(GLES31.GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
讀回資料
@Override
public void onDrawFrame(GL10 gl) {
createEnvi();
transferToTexture(mInputBuffer, fTexture[0]);
FloatBuffer a0 = FloatBuffer.allocate(mSize);
FloatBuffer a1 = FloatBuffer.allocate(mSize);
FloatBuffer a2 = FloatBuffer.allocate(mSize);
long begin = System.currentTimeMillis();
performCompute(fTexture[0], fTexture[1]);
performCompute(fTexture[1], fTexture[2]);
Log.w(TAG, "total compute spent:" + (System.currentTimeMillis() - begin));
GLES31.glReadBuffer(GLES31.GL_COLOR_ATTACHMENT0);
GLES31.glReadPixels(0, 0, mWidth, mHeight, GLES31.GL_RGBA, GLES31.GL_FLOAT, a0);
GLES31.glReadBuffer(GLES31.GL_COLOR_ATTACHMENT1);
GLES31.glReadPixels(0, 0, mWidth, mHeight, GLES31.GL_RGBA, GLES31.GL_FLOAT, a1);
GLES31.glReadBuffer(GLES31.GL_COLOR_ATTACHMENT2);
GLES31.glReadPixels(0, 0, mWidth, mHeight, GLES31.GL_RGBA, GLES31.GL_FLOAT, a2);
float[] o1 = a0.array();
float[] o2 = a1.array();
float[] o3 = a2.array();
}
最後可以觀察o1,o2,o3三個資料資料是否正確。經測試通過compute shader計算,執行200次計算著色器計算,也僅耗時5~7ms。因此用來做移動端深度學習加速完全可行。