From 7742e68bd17f036dbae6d32e3201545d30765b28 Mon Sep 17 00:00:00 2001 From: Ondrej Jamriska Date: Sat, 11 Aug 2018 03:58:19 +0200 Subject: [PATCH] make the random search kernel execution more granular --- src/patchmatch_gpu.h | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/patchmatch_gpu.h b/src/patchmatch_gpu.h index 3ea8847..a1adaf1 100644 --- a/src/patchmatch_gpu.h +++ b/src/patchmatch_gpu.h @@ -287,6 +287,7 @@ __device__ void tryRandomOffsetInRadius(const int r, tryPatch(sizeA,sizeB,Omega,patchWidth,patchError,lambda,x,y,nx,ny,nbest,ebest); } +/* template __global__ void krnlRandomSearchPass(const V2i sizeA, const V2i sizeB, @@ -321,6 +322,40 @@ __global__ void krnlRandomSearchPass(const V2i sizeA, } } } +*/ + +template +__global__ void krnlRandomSearchPass(const V2i sizeA, + const V2i sizeB, + MemArray2 Omega, + const int patchWidth, + FUNC patchError, + const float lambda, + const int radius, + TexArray2<2,int> NNF, + TexArray2<1,float> E, + TexArray2<1,unsigned char> mask, + pcgState* rngStates) +{ + const int x = blockDim.x*blockIdx.x + threadIdx.x; + const int y = blockDim.y*blockIdx.y + threadIdx.y; + + if (x void patchmatchGPU(const V2i sizeA, @@ -359,7 +394,10 @@ void patchmatchGPU(const V2i sizeA, checkCudaError(cudaDeviceSynchronize()); - krnlRandomSearchPass<<>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,NNF,E,mask,rngStates); + for(int r=1;r>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,r,NNF,E,mask,rngStates); + } checkCudaError(cudaDeviceSynchronize()); }