当前位置: 首页 > news >正文

[OpenGL]使用 Compute Shader 实现矩阵点乘

一、简介

本文介绍了如何使用 OpenGL 中的 compute shader 进行矩阵相乘的并行运算。代码目标是,输入两个大小为 10*10 的矩阵 A 和 B,计算 A*B 的结果并存储到矩阵 C 中。

二、代码

0. 代码逻辑

1. 初始化 glfw, glad, 窗口
2. 初始化 compute shader
3. 准备输入数据
4. 运行 compute shader
5. 读取结果并打印
6. 释放资源

1. main.cpp

#include <glad/glad.h>
#include <GLFW/glfw3.h>
#include "ComputeShader.hpp"#include <cstdint>
#include <iostream>
#include <iostream>// 用于处理窗口大小改变的回调函数
void framebuffer_size_callback(GLFWwindow *window, int width, int height);
void window_close_callback(GLFWwindow *window);// 用于处理用户输入的函数
void processInput(GLFWwindow *window);// 指定窗口默认width和height像素大小
unsigned int SCR_WIDTH = 800;
unsigned int SCR_HEIGHT = 600;/************************************/int main()
{/****** 1. 初始化 glfw, glad, 窗口 *******/// glfw 初始化 + 配置 glfw 参数glfwInit();glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);// glfw 生成窗口GLFWwindow *window = glfwCreateWindow(SCR_WIDTH, SCR_HEIGHT, "LearnOpenGL", NULL, NULL);if (window == NULL){// 检查是否成功生成窗口,如果没有成功打印出错信息并且退出std::cout << "Failed to create GLFW window" << std::endl;glfwTerminate();return -1;}// 设置窗口window的上下文glfwMakeContextCurrent(window);// 配置window变化时的回调函数glfwSetFramebufferSizeCallback(window, framebuffer_size_callback);// 设置窗口关闭回调glfwSetWindowCloseCallback(window, window_close_callback);// 使用 glad 加载 OpenGL 中的各种函数if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress)){std::cout << "Failed to initialize GLAD" << std::endl;return -1;}/************************************//****** 2. 初始化 compute shader ******/ComputeShader computeShader("../resources/Compute.comp");/************************************//****** 3. 准备输入数据 ******/// 输入矩阵 Afloat A[100];for (int i = 0; i < 10; i++){for (int j = 0; j < 10; j++){A[i * 10 + j] = 1.0f * i;}}// 输入矩阵 Bfloat B[100];for (int i = 0; i < 10; i++){for (int j = 0; j < 10; j++){B[i * 10 + j] = 1.0f * i;}}// 输出矩阵 Cfloat C[100];GLuint SSBO_A, SSBO_B, SSBO_C;glGenBuffers(1, &SSBO_A);glGenBuffers(1, &SSBO_B);glGenBuffers(1, &SSBO_C);glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO_A);glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(A), A, GL_STATIC_READ);glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, SSBO_A);glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO_B);glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(B), B, GL_STATIC_READ);glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, SSBO_B);glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO_C);glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(C), C, GL_DYNAMIC_DRAW);glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, SSBO_C);/************************************//****** 4. 运行 compute shader ******/// 运行 compute shader, 分为 10*10*1 个 workgroup, 每个 workgroup 计算 C 矩阵中的一个元素值computeShader.use();glDispatchCompute((unsigned int)10, (unsigned int)10, 1);glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);/************************************//****** 5. 读取结果并打印 ******/glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO_C);glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(C), C);for (int row = 0; row < 10; ++row){for (int col = 0; col < 10; ++col){printf("%0.3f ", C[row * 10 + col]);}printf("\n");}/************************************//****** 6.释放资源 ******/// glfw 释放 glfw使用的所有资源glfwTerminate();/************************************/return 0;
}// 用于处理用户输入的函数
void processInput(GLFWwindow *window)
{// 当按下 Esc 按键时调用 glfwSetWindowShouldClose() 函数,关闭窗口if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS){glfwSetWindowShouldClose(window, true);}
}// 在使用 OpenGL 和 GLFW 库时,处理窗口大小改变的回调函数
// 当窗口大小发生变化时,确保 OpenGL 渲染的内容能够适应新的窗口大小,避免图像被拉伸、压缩或出现其他比例失真的问题
void framebuffer_size_callback(GLFWwindow *window, int width, int height)
{SCR_WIDTH = width;SCR_HEIGHT = height;glViewport(0, 0, width, height);
}
void window_close_callback(GLFWwindow *window)
{// 这里可以做一些额外的清理工作// 例如释放资源、记录日志等std::cout << "Window is closing..." << std::endl;
}

2. ComputeShader 类

#ifndef COMPUTESHADER_H
#define COMPUTESHADER_H#include <glad/glad.h>
#include <glm/glm.hpp>#include <string>
#include <fstream>
#include <sstream>
#include <iostream>class ComputeShader
{public:unsigned int ID;// constructor generates the shader on the fly// ------------------------------------------------------------------------ComputeShader() {};ComputeShader(const char *computePath){// 1. retrieve the vertex/fragment source code from filePathstd::string computeCode;std::ifstream cShaderFile;// ensure ifstream objects can throw exceptions:cShaderFile.exceptions(std::ifstream::failbit | std::ifstream::badbit);try{// open filescShaderFile.open(computePath);std::stringstream cShaderStream;// read file's buffer contents into streamscShaderStream << cShaderFile.rdbuf();// close file handlerscShaderFile.close();// convert stream into stringcomputeCode = cShaderStream.str();}catch (std::ifstream::failure &e){std::cout << "ERROR::SHADER::FILE_NOT_SUCCESSFULLY_READ: " << e.what() << std::endl;}const char *cShaderCode = computeCode.c_str();// 2. compile shadersunsigned int compute;// compute shadercompute = glCreateShader(GL_COMPUTE_SHADER);glShaderSource(compute, 1, &cShaderCode, NULL);glCompileShader(compute);checkCompileErrors(compute, "COMPUTE");// shader ProgramID = glCreateProgram();glAttachShader(ID, compute);glLinkProgram(ID);checkCompileErrors(ID, "PROGRAM");// delete the shaders as they're linked into our program now and no longer necessaryglDeleteShader(compute);}// activate the shader// ------------------------------------------------------------------------void use() const{glUseProgram(ID);}// ------------------------------------------------------------------------void setInt(const std::string &name, int value) const{glUniform1i(glGetUniformLocation(ID, name.c_str()), value);}private:// utility function for checking shader compilation/linking errors.// ------------------------------------------------------------------------void checkCompileErrors(GLuint shader, std::string type){GLint success;GLchar infoLog[1024];if (type != "PROGRAM"){glGetShaderiv(shader, GL_COMPILE_STATUS, &success);if (!success){glGetShaderInfoLog(shader, 1024, NULL, infoLog);std::cout << "ERROR::SHADER_COMPILATION_ERROR of type: " << type << "\n"<< infoLog << "\n -- --------------------------------------------------- -- " << std::endl;}}else{glGetProgramiv(shader, GL_LINK_STATUS, &success);if (!success){glGetProgramInfoLog(shader, 1024, NULL, infoLog);std::cout << "ERROR::PROGRAM_LINKING_ERROR of type: " << type << "\n"<< infoLog << "\n -- --------------------------------------------------- -- " << std::endl;}}}
};
#endif

3. compute shader (Compute.comp)

#version 430layout(std430, binding = 0) buffer inputMatrixA { float A[]; };
layout(std430, binding = 1) buffer inputMatrixB { float B[]; };
layout(std430, binding = 2) buffer OnputData { float C[]; };layout(local_size_x = 1,local_size_y = 1) in; // 每个 workgroup item 计算 C 的一个元素void main() {//   获取当前 workgroup item 的全局位置uint row = gl_GlobalInvocationID.x;uint col = gl_GlobalInvocationID.y;// 确保不会越界if (row >= 10 || col >= 10) {return;}// 从矩阵 A 和矩阵 B 中读取数据float valueA = 0.0f;float valueB = 0.0f;// 计算矩阵 C 中对应的元素float result = 0.0;for (int k = 0; k < 10; k++) {valueA = A[row * 10 + k];valueB = B[k * 10 + col];result += valueA * valueB; // 矩阵乘法}C[row * 10 + col] = result;
}

4. 运行结果

0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 
45.000 45.000 45.000 45.000 45.000 45.000 45.000 45.000 45.000 45.000 
90.000 90.000 90.000 90.000 90.000 90.000 90.000 90.000 90.000 90.000 
135.000 135.000 135.000 135.000 135.000 135.000 135.000 135.000 135.000 135.000 
180.000 180.000 180.000 180.000 180.000 180.000 180.000 180.000 180.000 180.000 
225.000 225.000 225.000 225.000 225.000 225.000 225.000 225.000 225.000 225.000 
270.000 270.000 270.000 270.000 270.000 270.000 270.000 270.000 270.000 270.000 
315.000 315.000 315.000 315.000 315.000 315.000 315.000 315.000 315.000 315.000 
360.000 360.000 360.000 360.000 360.000 360.000 360.000 360.000 360.000 360.000 
405.000 405.000 405.000 405.000 405.000 405.000 405.000 405.000 405.000 405.000

三、参考

[1]LearnOpenGL-Guest Articles-2022-Compute Shaders


http://www.mrgr.cn/news/81764.html

相关文章:

  • Kafka高性能设计
  • CBSD管理QEMU仿真虚拟机
  • 如何在 Vue 中处理 API 请求?
  • 第六部分:NumPy在科学计算中的应用
  • 【Unity3D】ECS入门学习(六)状态组件 ISystemStateComponentData
  • 【入门】买杯子
  • 路由器刷机TP-Link tp-link-WDR5660 路由器升级宽带速度
  • SQL进阶技巧:如何分析双重职务问题?
  • C语言期末复习题(PTA)
  • 基于深度学习(HyperLPR3框架)的中文车牌识别系统-前言
  • 蓝桥杯——冒险者公会
  • 蓝桥杯——神奇的数组
  • 解决k8s部署dashboard时一直处于Pending状态的问题
  • Spark生态圈
  • MySQL 性能瓶颈,为什么 MySQL 表的数据量不能太大?
  • Java重要面试名词整理(十):Kafka
  • 第10章 初等数论
  • 【弱监督视频异常检测】2024-TCSVT-基于片段间特征相似度的多尺度时间 MLP 弱监督视频异常检测
  • Python异常处理在“简易记事本”项目中的应用
  • C# 窗体应用程序嵌套web网页,基于谷歌浏览器内核(含源码)
  • 逻辑控制语句
  • Gitlab17.7+Jenkins2.4.91实现Fastapi/Django项目持续发布版本详细操作(亲测可用)
  • 《第十四部分》WDG看门狗
  • List详解
  • 【Linux命令】`ps -a` , `ps -ef` 和 `ps aux` 的区别
  • 【虚拟机网络拓扑记录】