Skip to content

Commit 6b9199f

Browse files
committed
Replace manual addressing with cuda texture
1 parent ec969db commit 6b9199f

File tree

4 files changed

+55
-38
lines changed

4 files changed

+55
-38
lines changed

ray-tracing-cuda/model.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ struct Model {
8282
"/textures/" + BaseName(path.C_Str());
8383
LOG(INFO) << "loading texture at: \"" << image_path << "\"";
8484
auto data = stbi_load(image_path.c_str(), &textures[texture_id].width,
85-
&textures[texture_id].height, &channels, 3);
85+
&textures[texture_id].height, &channels, 4);
8686
textures[texture_id].data =
8787
std::string(data, data + (textures[texture_id].width *
88-
textures[texture_id].height * 3));
88+
textures[texture_id].height * 4));
8989
stbi_image_free(data);
9090
meshes[id].texture_id = texture_id;
9191
}

ray-tracing-cuda/textures/image_texture.cu

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,37 @@
22

33
using glm::clamp;
44

5-
__host__ __device__ ImageTexture::ImageTexture(int height, int width,
6-
int components, uint8_t *data)
7-
: height_(height), width_(width), components_(components), data_(data) {}
5+
__host__ __device__
6+
ImageTexture::ImageTexture(cudaTextureObject_t image_texture)
7+
: image_texture_(image_texture) {}
88

99
__device__ glm::vec3 ImageTexture::Value(double u, double v,
1010
const glm::vec3 &p) const {
11-
u = clamp(u, 0.0, 1.0);
12-
v = 1.0 - clamp(v, 0.0, 1.0); // Flip V to image coordinates
11+
v = 1.0 - v; // Flip V to image coordinates
1312

14-
auto i = static_cast<int>(u * width_);
15-
auto j = static_cast<int>(v * height_);
16-
17-
// Clamp integer mapping, since actual coordinates should be less than 1.0
18-
if (i >= width_) i = width_ - 1;
19-
if (j >= height_) j = height_ - 1;
20-
21-
uint8_t *pixel = data_ + j * (components_ * width_) + i * components_;
13+
float4 ret = tex2D<float4>(image_texture_, u, v);
14+
return glm::vec3(ret.x, ret.y, ret.z);
15+
}
2216

23-
return glm::vec3(pixel[0], pixel[1], pixel[2]) / 255.0f;
17+
cudaTextureObject_t ImageTexture::CreateCudaTextureObj(
18+
uint8_t *dev_buffer, int height, int width, uint64_t pitch_in_bytes) {
19+
cudaResourceDesc res_desc;
20+
memset(&res_desc, 0, sizeof(res_desc));
21+
res_desc.resType = cudaResourceTypePitch2D;
22+
res_desc.res.pitch2D.devPtr = dev_buffer;
23+
res_desc.res.pitch2D.desc.f = cudaChannelFormatKindUnsigned;
24+
res_desc.res.pitch2D.desc.x = 8;
25+
res_desc.res.pitch2D.desc.y = 8;
26+
res_desc.res.pitch2D.desc.z = 8;
27+
res_desc.res.pitch2D.desc.w = 8;
28+
res_desc.res.pitch2D.height = height;
29+
res_desc.res.pitch2D.width = width;
30+
res_desc.res.pitch2D.pitchInBytes = pitch_in_bytes;
31+
cudaTextureDesc tex_desc;
32+
memset(&tex_desc, 0, sizeof(tex_desc));
33+
tex_desc.readMode = cudaReadModeNormalizedFloat;
34+
tex_desc.normalizedCoords = 1;
35+
cudaTextureObject_t tex;
36+
cudaCreateTextureObject(&tex, &res_desc, &tex_desc, nullptr);
37+
return tex;
2438
}
Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
#pragma once
22

3+
#include <cuda_runtime.h>
4+
35
#include <glm/glm.hpp>
46

57
#include "textures/texture.cuh"
68

79
class ImageTexture : public Texture {
810
private:
9-
uint8_t *data_;
10-
int height_, width_, components_;
11+
cudaTextureObject_t image_texture_;
1112

1213
public:
13-
__host__ __device__ ImageTexture(int height, int width, int components,
14-
uint8_t *data);
14+
__host__ __device__ ImageTexture(cudaTextureObject_t image_texture);
1515
__device__ glm::vec3 Value(double u, double v, const glm::vec3 &p) const;
16+
17+
static cudaTextureObject_t CreateCudaTextureObj(uint8_t *dev_buffer,
18+
int height, int width,
19+
uint64_t pitch_in_bytes);
1620
};

scenes/birthday.cu

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,16 @@ curandState *d_states;
3131
Camera *d_camera;
3232
HitableList *d_world;
3333
glm::vec3 *d_image;
34-
35-
struct DeviceImage {
36-
uint8_t *image;
37-
int height, width, components;
38-
};
39-
40-
DeviceImage d_earthmap;
34+
uint8_t *d_tex_image;
35+
cudaTextureObject_t tex;
4136

4237
using glm::pi;
4338
using glm::rotateX;
4439
using glm::rotateY;
4540
using glm::vec3;
4641

4742
__global__ void InitWorld(HitableList *world, Camera *camera,
48-
DeviceImage earthmap) {
43+
cudaTextureObject_t tex) {
4944
new (world) HitableList();
5045
new (camera) Camera(vec3(278, 278, -800), vec3(278, 278, 0), vec3(0, 1, 0),
5146
pi<double>() * 2 / 9, double(WIDTH) / HEIGHT);
@@ -56,8 +51,7 @@ __global__ void InitWorld(HitableList *world, Camera *camera,
5651

5752
auto light_material_ptr =
5853
new DiffuseLight(new ConstantTexture(vec3(1, 1, 1)));
59-
auto earthmap_texture_ptr = new ImageTexture(
60-
earthmap.height, earthmap.width, earthmap.components, earthmap.image);
54+
auto earthmap_texture_ptr = new ImageTexture(tex);
6155
auto earthmap_material_ptr = new Lambertian(earthmap_texture_ptr);
6256
auto sky = new Sky();
6357

@@ -79,22 +73,27 @@ __global__ void InitWorld(HitableList *world, Camera *camera,
7973
world->Append(new Sphere(vec3(278, 278, 0), 100, earthmap_material_ptr));
8074
}
8175

82-
void InitImageTextures(DeviceImage *earthmap) {
83-
auto data = stbi_load("resources/earthmap.jpg", &earthmap->width,
84-
&earthmap->height, &earthmap->components, 0);
85-
int size = earthmap->height * earthmap->width * earthmap->components;
86-
cudaMalloc(&earthmap->image, size);
87-
cudaMemcpy(earthmap->image, data, size, cudaMemcpyHostToDevice);
76+
cudaTextureObject_t InitImageTextures() {
77+
int width, height, channels;
78+
uint64_t pitch_in_bytes;
79+
auto data =
80+
stbi_load("resources/earthmap.jpg", &width, &height, &channels, 4);
81+
cudaMallocPitch(&d_tex_image, &pitch_in_bytes, 4 * width, height);
82+
cudaMemcpy2D(d_tex_image, pitch_in_bytes, data, 4 * width, 4 * width, height,
83+
cudaMemcpyHostToDevice);
84+
auto tex = ImageTexture::CreateCudaTextureObj(d_tex_image, height, width,
85+
pitch_in_bytes);
8886
auto err = cudaGetLastError();
8987
CHECK(err == cudaSuccess) << cudaGetErrorString(err);
88+
return tex;
9089
}
9190

9291
int main() {
9392
Main(
9493
&d_states, &d_camera, &d_world, &d_image,
9594
[](HitableList *world, Camera *camera) {
96-
InitImageTextures(&d_earthmap);
97-
InitWorld<<<1, 1>>>(world, camera, d_earthmap);
95+
auto tex = InitImageTextures();
96+
InitWorld<<<1, 1>>>(world, camera, tex);
9897
},
9998
HEIGHT, WIDTH, 200);
10099
return 0;

0 commit comments

Comments
 (0)