mirror of
https://github.com/jamriska/ebsynth.git
synced 2025-12-16 08:27:45 +01:00
make the random search kernel execution more granular
This commit is contained in:
@@ -287,6 +287,7 @@ __device__ void tryRandomOffsetInRadius(const int r,
|
|||||||
tryPatch(sizeA,sizeB,Omega,patchWidth,patchError,lambda,x,y,nx,ny,nbest,ebest);
|
tryPatch(sizeA,sizeB,Omega,patchWidth,patchError,lambda,x,y,nx,ny,nbest,ebest);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
template<typename FUNC>
|
template<typename FUNC>
|
||||||
__global__ void krnlRandomSearchPass(const V2i sizeA,
|
__global__ void krnlRandomSearchPass(const V2i sizeA,
|
||||||
const V2i sizeB,
|
const V2i sizeB,
|
||||||
@@ -321,6 +322,40 @@ __global__ void krnlRandomSearchPass(const V2i sizeA,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
template<typename FUNC>
|
||||||
|
__global__ void krnlRandomSearchPass(const V2i sizeA,
|
||||||
|
const V2i sizeB,
|
||||||
|
MemArray2<int> Omega,
|
||||||
|
const int patchWidth,
|
||||||
|
FUNC patchError,
|
||||||
|
const float lambda,
|
||||||
|
const int radius,
|
||||||
|
TexArray2<2,int> NNF,
|
||||||
|
TexArray2<1,float> E,
|
||||||
|
TexArray2<1,unsigned char> mask,
|
||||||
|
pcgState* rngStates)
|
||||||
|
{
|
||||||
|
const int x = blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
const int y = blockDim.y*blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
|
if (x<sizeA(0) && y<sizeA(1))
|
||||||
|
{
|
||||||
|
if (mask(x,y)[0]==255)
|
||||||
|
{
|
||||||
|
V2i nbest = NNF(x,y);
|
||||||
|
float ebest = E(x,y)(0);
|
||||||
|
|
||||||
|
const V2i norg = nbest;
|
||||||
|
|
||||||
|
tryRandomOffsetInRadius(radius,sizeA,sizeB,Omega,patchWidth,patchError,lambda,x,y,norg,nbest,ebest,&rngStates[x+y*NNF.width]);
|
||||||
|
|
||||||
|
E.write(x,y,V1f(ebest));
|
||||||
|
NNF.write(x,y,nbest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<typename FUNC>
|
template<typename FUNC>
|
||||||
void patchmatchGPU(const V2i sizeA,
|
void patchmatchGPU(const V2i sizeA,
|
||||||
@@ -359,7 +394,10 @@ void patchmatchGPU(const V2i sizeA,
|
|||||||
|
|
||||||
checkCudaError(cudaDeviceSynchronize());
|
checkCudaError(cudaDeviceSynchronize());
|
||||||
|
|
||||||
krnlRandomSearchPass<<<numBlocks,threadsPerBlock>>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,NNF,E,mask,rngStates);
|
for(int r=1;r<max(sizeB(0),sizeB(1))/2;r=r*2)
|
||||||
|
{
|
||||||
|
krnlRandomSearchPass<<<numBlocks,threadsPerBlock>>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,r,NNF,E,mask,rngStates);
|
||||||
|
}
|
||||||
|
|
||||||
checkCudaError(cudaDeviceSynchronize());
|
checkCudaError(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user