mirror of
https://github.com/jamriska/ebsynth.git
synced 2025-12-15 16:07:46 +01:00
add the CPU backend
This commit is contained in:
@@ -28,6 +28,7 @@ ebsynth -style <style.png> -guide <source.png> <target.png> -output <output.png>
|
||||
-pyramidlevels <number>
|
||||
-searchvoteiters <number>
|
||||
-patchmatchiters <number>
|
||||
-backend [cpu|cuda]
|
||||
```
|
||||
|
||||
## Download
|
||||
@@ -129,10 +130,6 @@ equalized to match the luminance of the source painting.
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
|
||||
## Requirements
|
||||
|
||||
`ebsynth` needs a CUDA-capable gpu in order to run. Besides CUDA, there are no other external dependencies. A cpu-only version that doesn't require CUDA will be released later.
|
||||
|
||||
## License
|
||||
|
||||
The code is released into the public domain. You can do anything you want with it.
|
||||
|
||||
2
build-linux-cpu+cuda.sh
Executable file
2
build-linux-cpu+cuda.sh
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/sh
|
||||
nvcc -arch compute_30 src/ebsynth.cpp src/ebsynth_cpu.cpp src/ebsynth_cuda.cu -I"include" -DNDEBUG -D__CORRECT_ISO_CPP11_MATH_H_PROTO -O6 -std=c++11 -w -Xcompiler -fopenmp -o bin/ebsynth
|
||||
2
build-linux-cpu_only.sh
Executable file
2
build-linux-cpu_only.sh
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/sh
|
||||
g++ src/ebsynth.cpp src/ebsynth_cpu.cpp src/ebsynth_nocuda.cpp -DNDEBUG -O6 -fopenmp -I"include" -std=c++11 -o bin/ebsynth
|
||||
@@ -1,2 +0,0 @@
|
||||
#!/bin/sh
|
||||
nvcc -arch compute_30 src/ebsynth.cu -o bin/ebsynth -I "include" -std=c++11 -Xcompiler "-DNDEBUG -O6 -D__CORRECT_ISO_CPP11_MATH_H_PROTO"
|
||||
14
build-win32-cpu+cuda.bat
Normal file
14
build-win32-cpu+cuda.bat
Normal file
@@ -0,0 +1,14 @@
|
||||
@echo off
|
||||
setlocal ENABLEDELAYEDEXPANSION
|
||||
|
||||
for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" x86 && goto compile
|
||||
|
||||
:compile
|
||||
nvcc -m32 -arch compute_30 src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_cuda.cu -DNDEBUG -O6 -I "include" -o "bin\ebsynth.exe" -Xcompiler "/openmp /fp:fast" -Xlinker "/IMPLIB:dummy.lib" -w || goto error
|
||||
nvcc -m32 -arch compute_30 src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_cuda.cu -DNDEBUG -O6 -I "include" -o "bin\ebsynth.dll" -Xcompiler "/openmp /fp:fast" -Xlinker "/IMPLIB:lib\ebsynth.lib" -shared -DEBSYNTH_API=__declspec(dllexport) -w || goto error
|
||||
del dummy.lib;dummy.exp 2> NUL
|
||||
goto :EOF
|
||||
|
||||
:error
|
||||
echo FAILED
|
||||
@%COMSPEC% /C exit 1 >nul
|
||||
14
build-win32-cpu_only.bat
Normal file
14
build-win32-cpu_only.bat
Normal file
@@ -0,0 +1,14 @@
|
||||
@echo off
|
||||
setlocal ENABLEDELAYEDEXPANSION
|
||||
|
||||
for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" x86 && goto compile
|
||||
|
||||
:compile
|
||||
cl src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_nocuda.cpp /DNDEBUG /O2 /openmp /EHsc /nologo /I"include" /Fe"bin\ebsynth.exe" || goto error
|
||||
cl src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_nocuda.cpp /DNDEBUG /O2 /openmp /EHsc /nologo /I"include" /Fe"bin\ebsynth.dll" /DEBSYNTH_API="__declspec(dllexport)" /link /IMPLIB:"lib\ebsynth.lib" || goto error
|
||||
del ebsynth.obj;ebsynth_cpu.obj;ebsynth_nocuda.obj 2> NUL
|
||||
goto :EOF
|
||||
|
||||
:error
|
||||
echo FAILED
|
||||
@%COMSPEC% /C exit 1 >nul
|
||||
@@ -1,12 +0,0 @@
|
||||
@echo off
|
||||
setlocal ENABLEDELAYEDEXPANSION
|
||||
|
||||
for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" x86 && goto compile
|
||||
|
||||
:compile
|
||||
nvcc -arch compute_30 src\ebsynth.cu -m32 -O6 -w -I "include" -o "bin\ebsynth.exe" -Xcompiler "/DNDEBUG /Ox /Oy /Gy /Oi /fp:fast" -Xlinker "/IMPLIB:\"lib\ebsynth.lib\"" || goto error
|
||||
goto :EOF
|
||||
|
||||
:error
|
||||
echo FAILED
|
||||
@%COMSPEC% /C exit 1 >nul
|
||||
14
build-win64-cpu+cuda.bat
Normal file
14
build-win64-cpu+cuda.bat
Normal file
@@ -0,0 +1,14 @@
|
||||
@echo off
|
||||
setlocal ENABLEDELAYEDEXPANSION
|
||||
|
||||
for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" amd64 && goto compile
|
||||
|
||||
:compile
|
||||
nvcc -arch compute_30 src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_cuda.cu -DNDEBUG -O6 -I "include" -o "bin\ebsynth.exe" -Xcompiler "/openmp /fp:fast" -Xlinker "/IMPLIB:dummy.lib" -w || goto error
|
||||
nvcc -arch compute_30 src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_cuda.cu -DNDEBUG -O6 -I "include" -o "bin\ebsynth.dll" -Xcompiler "/openmp /fp:fast" -Xlinker "/IMPLIB:lib\ebsynth.lib" -shared -DEBSYNTH_API=__declspec(dllexport) -w || goto error
|
||||
del dummy.lib;dummy.exp 2> NUL
|
||||
goto :EOF
|
||||
|
||||
:error
|
||||
echo FAILED
|
||||
@%COMSPEC% /C exit 1 >nul
|
||||
14
build-win64-cpu_only.bat
Normal file
14
build-win64-cpu_only.bat
Normal file
@@ -0,0 +1,14 @@
|
||||
@echo off
|
||||
setlocal ENABLEDELAYEDEXPANSION
|
||||
|
||||
for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" amd64 && goto compile
|
||||
|
||||
:compile
|
||||
cl src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_nocuda.cpp /DNDEBUG /O2 /openmp /EHsc /nologo /I"include" /Fe"bin\ebsynth.exe" || goto error
|
||||
cl src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_nocuda.cpp /DNDEBUG /O2 /openmp /EHsc /nologo /I"include" /Fe"bin\ebsynth.dll" /DEBSYNTH_API="__declspec(dllexport)" /link /IMPLIB:"lib\ebsynth.lib" || goto error
|
||||
del ebsynth.obj;ebsynth_cpu.obj;ebsynth_nocuda.obj 2> NUL
|
||||
goto :EOF
|
||||
|
||||
:error
|
||||
echo FAILED
|
||||
@%COMSPEC% /C exit 1 >nul
|
||||
@@ -1,12 +0,0 @@
|
||||
@echo off
|
||||
setlocal ENABLEDELAYEDEXPANSION
|
||||
|
||||
for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" amd64 && goto compile
|
||||
|
||||
:compile
|
||||
nvcc -arch compute_30 src\ebsynth.cu -m64 -O6 -w -I "include" -o "bin\ebsynth.exe" -Xcompiler "/DNDEBUG /Ox /Oy /Gy /Oi /fp:fast" -Xlinker "/IMPLIB:\"lib\ebsynth.lib\"" || goto error
|
||||
goto :EOF
|
||||
|
||||
:error
|
||||
echo FAILED
|
||||
@%COMSPEC% /C exit 1 >nul
|
||||
551
src/ebsynth.cpp
Normal file
551
src/ebsynth.cpp
Normal file
@@ -0,0 +1,551 @@
|
||||
// This software is in the public domain. Where that dedication is not
|
||||
// recognized, you are granted a perpetual, irrevocable license to copy
|
||||
// and modify this file as you see fit.
|
||||
|
||||
#include "ebsynth.h"
|
||||
#include "ebsynth_cpu.h"
|
||||
#include "ebsynth_cuda.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
|
||||
EBSYNTH_API
|
||||
void ebsynthRun(int ebsynthBackend,
|
||||
int numStyleChannels,
|
||||
int numGuideChannels,
|
||||
int sourceWidth,
|
||||
int sourceHeight,
|
||||
void* sourceStyleData,
|
||||
void* sourceGuideData,
|
||||
int targetWidth,
|
||||
int targetHeight,
|
||||
void* targetGuideData,
|
||||
void* targetModulationData,
|
||||
float* styleWeights,
|
||||
float* guideWeights,
|
||||
float uniformityWeight,
|
||||
int patchSize,
|
||||
int voteMode,
|
||||
int numPyramidLevels,
|
||||
int* numSearchVoteItersPerLevel,
|
||||
int* numPatchMatchItersPerLevel,
|
||||
int* stopThresholdPerLevel,
|
||||
void* outputNnfData,
|
||||
void* outputImageData)
|
||||
{
|
||||
void (*backendDispatch)(int,int,int,int,void*,void*,int,int,void*,void*,float*,float*,float,int,int,int,int*,int*,int*,void*,void*) = 0;
|
||||
|
||||
if (ebsynthBackend==EBSYNTH_BACKEND_CPU ) { backendDispatch = ebsynthRunCpu; }
|
||||
else if (ebsynthBackend==EBSYNTH_BACKEND_CUDA) { backendDispatch = ebsynthRunCuda; }
|
||||
else if (ebsynthBackend==EBSYNTH_BACKEND_AUTO) { backendDispatch = ebsynthBackendAvailableCuda() ? ebsynthRunCuda : ebsynthRunCpu; }
|
||||
|
||||
if (backendDispatch!=0)
|
||||
{
|
||||
backendDispatch(numStyleChannels,
|
||||
numGuideChannels,
|
||||
sourceWidth,
|
||||
sourceHeight,
|
||||
sourceStyleData,
|
||||
sourceGuideData,
|
||||
targetWidth,
|
||||
targetHeight,
|
||||
targetGuideData,
|
||||
targetModulationData,
|
||||
styleWeights,
|
||||
guideWeights,
|
||||
uniformityWeight,
|
||||
patchSize,
|
||||
voteMode,
|
||||
numPyramidLevels,
|
||||
numSearchVoteItersPerLevel,
|
||||
numPatchMatchItersPerLevel,
|
||||
stopThresholdPerLevel,
|
||||
outputNnfData,
|
||||
outputImageData);
|
||||
}
|
||||
}
|
||||
|
||||
EBSYNTH_API
|
||||
int ebsynthBackendAvailable(int ebsynthBackend)
|
||||
{
|
||||
if (ebsynthBackend==EBSYNTH_BACKEND_CPU ) { return ebsynthBackendAvailableCpu(); }
|
||||
else if (ebsynthBackend==EBSYNTH_BACKEND_CUDA) { return ebsynthBackendAvailableCuda(); }
|
||||
else if (ebsynthBackend==EBSYNTH_BACKEND_AUTO) { return ebsynthBackendAvailableCpu() || ebsynthBackendAvailableCuda(); }
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
#include "jzq.h"
|
||||
|
||||
template<typename FUNC>
|
||||
bool tryToParseArg(const std::vector<std::string>& args,int* inout_argi,const char* name,bool* out_fail,FUNC handler)
|
||||
{
|
||||
int& argi = *inout_argi;
|
||||
bool& fail = *out_fail;
|
||||
|
||||
if (argi<0 || argi>=args.size()) { fail = true; return false; }
|
||||
|
||||
if (args[argi]==name)
|
||||
{
|
||||
argi++;
|
||||
fail = !handler();
|
||||
return true;
|
||||
}
|
||||
|
||||
fail = false; return false;
|
||||
}
|
||||
|
||||
bool tryToParseIntArg(const std::vector<std::string>& args,int* inout_argi,const char* name,int* out_value,bool* out_fail)
|
||||
{
|
||||
return tryToParseArg(args,inout_argi,name,out_fail,[&]
|
||||
{
|
||||
int& argi = *inout_argi;
|
||||
if (argi<args.size())
|
||||
{
|
||||
const std::string& arg = args[argi];
|
||||
try
|
||||
{
|
||||
std::size_t pos = 0;
|
||||
*out_value = std::stoi(arg,&pos);
|
||||
if (pos!=arg.size()) { printf("error: bad %s argument '%s'\n",name,arg.c_str()); return false; }
|
||||
return true;
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
printf("error: bad %s argument '%s'\n",name,arg.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
printf("error: missing argument for the %s option\n",name);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
bool tryToParseFloatArg(const std::vector<std::string>& args,int* inout_argi,const char* name,float* out_value,bool* out_fail)
|
||||
{
|
||||
return tryToParseArg(args,inout_argi,name,out_fail,[&]
|
||||
{
|
||||
int& argi = *inout_argi;
|
||||
if (argi<args.size())
|
||||
{
|
||||
const std::string& arg = args[argi];
|
||||
try
|
||||
{
|
||||
std::size_t pos = 0;
|
||||
*out_value = std::stof(arg,&pos);
|
||||
if (pos!=arg.size()) { printf("error: bad %s argument '%s'\n",name,arg.c_str()); return false; }
|
||||
return true;
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
printf("error: bad %s argument '%s'\n",name,args[argi].c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
printf("error: missing argument for the %s option\n",name);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
bool tryToParseStringArg(const std::vector<std::string>& args,int* inout_argi,const char* name,std::string* out_value,bool* out_fail)
|
||||
{
|
||||
return tryToParseArg(args,inout_argi,name,out_fail,[&]
|
||||
{
|
||||
int& argi = *inout_argi;
|
||||
if (argi<args.size())
|
||||
{
|
||||
*out_value = args[argi];
|
||||
return true;
|
||||
}
|
||||
printf("error: missing argument for the %s option\n",name);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
bool tryToParseStringPairArg(const std::vector<std::string>& args,int* inout_argi,const char* name,std::pair<std::string,std::string>* out_value,bool* out_fail)
|
||||
{
|
||||
return tryToParseArg(args,inout_argi,name,out_fail,[&]
|
||||
{
|
||||
int& argi = *inout_argi;
|
||||
if ((argi+1)<args.size())
|
||||
{
|
||||
*out_value = std::make_pair(args[argi],args[argi+1]);
|
||||
argi++;
|
||||
return true;
|
||||
}
|
||||
printf("error: missing argument for the %s option\n",name);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#include "stb_image.h"
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#include "stb_image_write.h"
|
||||
|
||||
unsigned char* tryLoad(const std::string& fileName,int* width,int* height)
|
||||
{
|
||||
unsigned char* data = stbi_load(fileName.c_str(),width,height,NULL,4);
|
||||
if (data==NULL)
|
||||
{
|
||||
printf("error: failed to load '%s'\n",fileName.c_str());
|
||||
printf("%s\n",stbi_failure_reason());
|
||||
exit(1);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
int evalNumChannels(const unsigned char* data,const int numPixels)
|
||||
{
|
||||
bool isGray = true;
|
||||
bool hasAlpha = false;
|
||||
|
||||
for(int xy=0;xy<numPixels;xy++)
|
||||
{
|
||||
const unsigned char r = data[xy*4+0];
|
||||
const unsigned char g = data[xy*4+1];
|
||||
const unsigned char b = data[xy*4+2];
|
||||
const unsigned char a = data[xy*4+3];
|
||||
|
||||
if (!(r==g && g==b)) { isGray = false; }
|
||||
if (a<255) { hasAlpha = true; }
|
||||
}
|
||||
|
||||
const int numChannels = (isGray ? 1 : 3) + (hasAlpha ? 1 : 0);
|
||||
|
||||
return numChannels;
|
||||
}
|
||||
|
||||
V2i pyramidLevelSize(const V2i& sizeBase,const int level)
|
||||
{
|
||||
return V2i(V2f(sizeBase)*std::pow(2.0f,-float(level)));
|
||||
}
|
||||
|
||||
std::string backendToString(const int ebsynthBackend)
|
||||
{
|
||||
if (ebsynthBackend==EBSYNTH_BACKEND_CPU) { return "cpu"; }
|
||||
else if (ebsynthBackend==EBSYNTH_BACKEND_CUDA) { return "cuda"; }
|
||||
else if (ebsynthBackend==EBSYNTH_BACKEND_AUTO) { return "auto"; }
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
int main(int argc,char** argv)
|
||||
{
|
||||
if (argc<2)
|
||||
{
|
||||
printf("usage: %s [options]\n",argv[0]);
|
||||
printf("\n");
|
||||
printf("options:\n");
|
||||
printf(" -style <style.png>\n");
|
||||
printf(" -guide <source.png> <target.png>\n");
|
||||
printf(" -output <output.png>\n");
|
||||
printf(" -weight <value>\n");
|
||||
printf(" -uniformity <value>\n");
|
||||
printf(" -patchsize <size>\n");
|
||||
printf(" -pyramidlevels <number>\n");
|
||||
printf(" -searchvoteiters <number>\n");
|
||||
printf(" -patchmatchiters <number>\n");
|
||||
printf(" -stopthreshold <value>\n");
|
||||
printf(" -backend [cpu|cuda]\n");
|
||||
printf("\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string styleFileName;
|
||||
float styleWeight = NAN;
|
||||
std::string outputFileName = "output.png";
|
||||
|
||||
struct Guide
|
||||
{
|
||||
std::string sourceFileName;
|
||||
std::string targetFileName;
|
||||
float weight;
|
||||
|
||||
int sourceWidth;
|
||||
int sourceHeight;
|
||||
unsigned char* sourceData;
|
||||
|
||||
int targetWidth;
|
||||
int targetHeight;
|
||||
unsigned char* targetData;
|
||||
|
||||
int numChannels;
|
||||
};
|
||||
|
||||
std::vector<Guide> guides;
|
||||
|
||||
float uniformityWeight = 3500;
|
||||
int patchSize = 5;
|
||||
int numPyramidLevels = -1;
|
||||
int numSearchVoteIters = 6;
|
||||
int numPatchMatchIters = 4;
|
||||
int stopThreshold = 5;
|
||||
int backend = ebsynthBackendAvailable(EBSYNTH_BACKEND_CUDA) ? EBSYNTH_BACKEND_CUDA : EBSYNTH_BACKEND_CPU;
|
||||
|
||||
{
|
||||
std::vector<std::string> args(argc);
|
||||
for(int i=0;i<argc;i++) { args[i] = argv[i]; }
|
||||
|
||||
bool fail = false;
|
||||
int argi = 1;
|
||||
|
||||
float* precedingStyleOrGuideWeight = 0;
|
||||
while(argi<argc && !fail)
|
||||
{
|
||||
float weight;
|
||||
std::pair<std::string,std::string> guidePair;
|
||||
std::string backendName;
|
||||
|
||||
if (tryToParseStringArg(args,&argi,"-style",&styleFileName,&fail))
|
||||
{
|
||||
styleWeight = NAN;
|
||||
precedingStyleOrGuideWeight = &styleWeight;
|
||||
argi++;
|
||||
}
|
||||
else if (tryToParseStringPairArg(args,&argi,"-guide",&guidePair,&fail))
|
||||
{
|
||||
Guide guide;
|
||||
guide.sourceFileName = guidePair.first;
|
||||
guide.targetFileName = guidePair.second;
|
||||
guide.weight = NAN;
|
||||
guides.push_back(guide);
|
||||
precedingStyleOrGuideWeight = &guides[guides.size()-1].weight;
|
||||
argi++;
|
||||
}
|
||||
else if (tryToParseStringArg(args,&argi,"-output",&outputFileName,&fail))
|
||||
{
|
||||
argi++;
|
||||
}
|
||||
else if (tryToParseFloatArg(args,&argi,"-weight",&weight,&fail))
|
||||
{
|
||||
if (precedingStyleOrGuideWeight!=0) { *precedingStyleOrGuideWeight = weight; }
|
||||
else { printf("error: at least one -style or -guide option must precede the -weight option!\n"); return 1; }
|
||||
argi++;
|
||||
}
|
||||
else if (tryToParseFloatArg(args,&argi,"-uniformity",&uniformityWeight,&fail)) { argi++; }
|
||||
else if (tryToParseIntArg(args,&argi,"-patchsize",&patchSize,&fail))
|
||||
{
|
||||
if (patchSize<3) { printf("error: patchsize is too small!\n"); return 1; }
|
||||
if (patchSize%2==0) { printf("error: patchsize must be an odd number!\n"); return 1; }
|
||||
argi++;
|
||||
}
|
||||
else if (tryToParseIntArg(args,&argi,"-pyramidlevels",&numPyramidLevels,&fail))
|
||||
{
|
||||
if (numPyramidLevels<1) { printf("error: bad argument for -pyramidlevels!\n"); return 1; }
|
||||
argi++;
|
||||
}
|
||||
else if (tryToParseIntArg(args,&argi,"-searchvoteiters",&numSearchVoteIters,&fail))
|
||||
{
|
||||
if (numSearchVoteIters<0) { printf("error: bad argument for -searchvoteiters!\n"); return 1; }
|
||||
argi++;
|
||||
}
|
||||
else if (tryToParseIntArg(args,&argi,"-patchmatchiters",&numPatchMatchIters,&fail))
|
||||
{
|
||||
if (numPatchMatchIters<0) { printf("error: bad argument for -patchmatchiters!\n"); return 1; }
|
||||
argi++;
|
||||
}
|
||||
else if (tryToParseIntArg(args,&argi,"-stopthreshold",&stopThreshold,&fail))
|
||||
{
|
||||
if (stopThreshold<0) { printf("error: bad argument for -stopthreshold!\n"); return 1; }
|
||||
argi++;
|
||||
}
|
||||
else if (tryToParseStringArg(args,&argi,"-backend",&backendName,&fail))
|
||||
{
|
||||
if (backendName=="cpu" ) { backend = EBSYNTH_BACKEND_CPU; }
|
||||
else if (backendName=="cuda") { backend = EBSYNTH_BACKEND_CUDA; }
|
||||
else { printf("error: unrecognized backend '%s'\n",backendName.c_str()); return 1; }
|
||||
|
||||
if (!ebsynthBackendAvailable(backend)) { printf("error: the %s backend is not available!\n",backendToString(backend).c_str()); return 1; }
|
||||
|
||||
argi++;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("error: unrecognized option '%s'\n",args[argi].c_str());
|
||||
fail = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (fail) { return 1; }
|
||||
}
|
||||
|
||||
const int numGuides = guides.size();
|
||||
|
||||
int sourceWidth = 0;
|
||||
int sourceHeight = 0;
|
||||
unsigned char* sourceStyleData = tryLoad(styleFileName,&sourceWidth,&sourceHeight);
|
||||
const int numStyleChannelsTotal = evalNumChannels(sourceStyleData,sourceWidth*sourceHeight);
|
||||
|
||||
std::vector<unsigned char> sourceStyle(sourceWidth*sourceHeight*numStyleChannelsTotal);
|
||||
for(int xy=0;xy<sourceWidth*sourceHeight;xy++)
|
||||
{
|
||||
if (numStyleChannelsTotal>0) { sourceStyle[xy*numStyleChannelsTotal+0] = sourceStyleData[xy*4+0]; }
|
||||
if (numStyleChannelsTotal==2) { sourceStyle[xy*numStyleChannelsTotal+1] = sourceStyleData[xy*4+3]; }
|
||||
else if (numStyleChannelsTotal>1) { sourceStyle[xy*numStyleChannelsTotal+1] = sourceStyleData[xy*4+1]; }
|
||||
if (numStyleChannelsTotal>2) { sourceStyle[xy*numStyleChannelsTotal+2] = sourceStyleData[xy*4+2]; }
|
||||
if (numStyleChannelsTotal>3) { sourceStyle[xy*numStyleChannelsTotal+3] = sourceStyleData[xy*4+3]; }
|
||||
}
|
||||
|
||||
int targetWidth = 0;
|
||||
int targetHeight = 0;
|
||||
int numGuideChannelsTotal = 0;
|
||||
|
||||
for(int i=0;i<numGuides;i++)
|
||||
{
|
||||
Guide& guide = guides[i];
|
||||
|
||||
guide.sourceData = tryLoad(guide.sourceFileName,&guide.sourceWidth,&guide.sourceHeight);
|
||||
guide.targetData = tryLoad(guide.targetFileName,&guide.targetWidth,&guide.targetHeight);
|
||||
|
||||
if (guide.sourceWidth!=sourceWidth || guide.sourceHeight!=sourceHeight) { printf("error: source guide '%s' doesn't match the resolution of '%s'\n",guide.sourceFileName.c_str(),styleFileName.c_str()); return 1; }
|
||||
if (i>0 && (guide.targetWidth!=targetWidth || guide.targetHeight!=targetHeight)) { printf("error: target guide '%s' doesn't match the resolution of '%s'\n",guide.targetFileName.c_str(),guides[0].targetFileName.c_str()); return 1; }
|
||||
else if (i==0) { targetWidth = guide.targetWidth; targetHeight = guide.targetHeight; }
|
||||
|
||||
guide.numChannels = std::max(evalNumChannels(guide.sourceData,sourceWidth*sourceHeight),
|
||||
evalNumChannels(guide.targetData,targetWidth*targetHeight));
|
||||
|
||||
numGuideChannelsTotal += guide.numChannels;
|
||||
}
|
||||
|
||||
if (numStyleChannelsTotal>EBSYNTH_MAX_STYLE_CHANNELS) { printf("error: too many style channels (%d), maximum number is %d\n",numStyleChannelsTotal,EBSYNTH_MAX_STYLE_CHANNELS); return 1; }
|
||||
if (numGuideChannelsTotal>EBSYNTH_MAX_GUIDE_CHANNELS) { printf("error: too many guide channels (%d), maximum number is %d\n",numGuideChannelsTotal,EBSYNTH_MAX_GUIDE_CHANNELS); return 1; }
|
||||
|
||||
std::vector<unsigned char> sourceGuides(sourceWidth*sourceHeight*numGuideChannelsTotal);
|
||||
for(int xy=0;xy<sourceWidth*sourceHeight;xy++)
|
||||
{
|
||||
int c = 0;
|
||||
for(int i=0;i<numGuides;i++)
|
||||
{
|
||||
const int numChannels = guides[i].numChannels;
|
||||
|
||||
if (numChannels>0) { sourceGuides[xy*numGuideChannelsTotal+c+0] = guides[i].sourceData[xy*4+0]; }
|
||||
if (numChannels==2) { sourceGuides[xy*numGuideChannelsTotal+c+1] = guides[i].sourceData[xy*4+3]; }
|
||||
else if (numChannels>1) { sourceGuides[xy*numGuideChannelsTotal+c+1] = guides[i].sourceData[xy*4+1]; }
|
||||
if (numChannels>2) { sourceGuides[xy*numGuideChannelsTotal+c+2] = guides[i].sourceData[xy*4+2]; }
|
||||
if (numChannels>3) { sourceGuides[xy*numGuideChannelsTotal+c+3] = guides[i].sourceData[xy*4+3]; }
|
||||
|
||||
c += numChannels;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<unsigned char> targetGuides(targetWidth*targetHeight*numGuideChannelsTotal);
|
||||
for(int xy=0;xy<targetWidth*targetHeight;xy++)
|
||||
{
|
||||
int c = 0;
|
||||
for(int i=0;i<numGuides;i++)
|
||||
{
|
||||
const int numChannels = guides[i].numChannels;
|
||||
|
||||
if (numChannels>0) { targetGuides[xy*numGuideChannelsTotal+c+0] = guides[i].targetData[xy*4+0]; }
|
||||
if (numChannels==2) { targetGuides[xy*numGuideChannelsTotal+c+1] = guides[i].targetData[xy*4+3]; }
|
||||
else if (numChannels>1) { targetGuides[xy*numGuideChannelsTotal+c+1] = guides[i].targetData[xy*4+1]; }
|
||||
if (numChannels>2) { targetGuides[xy*numGuideChannelsTotal+c+2] = guides[i].targetData[xy*4+2]; }
|
||||
if (numChannels>3) { targetGuides[xy*numGuideChannelsTotal+c+3] = guides[i].targetData[xy*4+3]; }
|
||||
|
||||
c += numChannels;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<float> styleWeights(numStyleChannelsTotal);
|
||||
if (isnan(styleWeight)) { styleWeight = 1.0f; }
|
||||
for(int i=0;i<numStyleChannelsTotal;i++) { styleWeights[i] = styleWeight / float(numStyleChannelsTotal); }
|
||||
|
||||
for(int i=0;i<numGuides;i++) { if (isnan(guides[i].weight)) { guides[i].weight = 1.0f/float(numGuides); } }
|
||||
|
||||
std::vector<float> guideWeights(numGuideChannelsTotal);
|
||||
{
|
||||
int c = 0;
|
||||
for(int i=0;i<numGuides;i++)
|
||||
{
|
||||
const int numChannels = guides[i].numChannels;
|
||||
|
||||
for(int j=0;j<numChannels;j++)
|
||||
{
|
||||
guideWeights[c+j] = guides[i].weight / float(numChannels);
|
||||
}
|
||||
|
||||
c += numChannels;
|
||||
}
|
||||
}
|
||||
|
||||
int maxPyramidLevels = 0;
|
||||
for(int level=32;level>=0;level--)
|
||||
{
|
||||
if (min(pyramidLevelSize(std::min(V2i(sourceWidth,sourceHeight),V2i(targetWidth,targetHeight)),level)) >= (2*patchSize+1))
|
||||
{
|
||||
maxPyramidLevels = level+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (numPyramidLevels==-1) { numPyramidLevels = maxPyramidLevels; }
|
||||
numPyramidLevels = std::min(numPyramidLevels,maxPyramidLevels);
|
||||
|
||||
std::vector<int> numSearchVoteItersPerLevel(numPyramidLevels);
|
||||
std::vector<int> numPatchMatchItersPerLevel(numPyramidLevels);
|
||||
std::vector<int> stopThresholdPerLevel(numPyramidLevels);
|
||||
for(int i=0;i<numPyramidLevels;i++)
|
||||
{
|
||||
numSearchVoteItersPerLevel[i] = numSearchVoteIters;
|
||||
numPatchMatchItersPerLevel[i] = numPatchMatchIters;
|
||||
stopThresholdPerLevel[i] = stopThreshold;
|
||||
}
|
||||
|
||||
std::vector<unsigned char> output(targetWidth*targetHeight*numStyleChannelsTotal);
|
||||
|
||||
printf("uniformity: %.0f\n",uniformityWeight);
|
||||
printf("patchsize: %d\n",patchSize);
|
||||
printf("pyramidlevels: %d\n",numPyramidLevels);
|
||||
printf("searchvoteiters: %d\n",numSearchVoteIters);
|
||||
printf("patchmatchiters: %d\n",numPatchMatchIters);
|
||||
printf("stopthreshold: %d\n",stopThreshold);
|
||||
printf("backend: %s\n",backendToString(backend).c_str());
|
||||
|
||||
ebsynthRun(backend,
|
||||
numStyleChannelsTotal,
|
||||
numGuideChannelsTotal,
|
||||
sourceWidth,
|
||||
sourceHeight,
|
||||
sourceStyle.data(),
|
||||
sourceGuides.data(),
|
||||
targetWidth,
|
||||
targetHeight,
|
||||
targetGuides.data(),
|
||||
NULL,
|
||||
styleWeights.data(),
|
||||
guideWeights.data(),
|
||||
uniformityWeight,
|
||||
patchSize,
|
||||
EBSYNTH_VOTEMODE_PLAIN,
|
||||
numPyramidLevels,
|
||||
numSearchVoteItersPerLevel.data(),
|
||||
numPatchMatchItersPerLevel.data(),
|
||||
stopThresholdPerLevel.data(),
|
||||
NULL,
|
||||
output.data());
|
||||
|
||||
stbi_write_png(outputFileName.c_str(),targetWidth,targetHeight,numStyleChannelsTotal,output.data(),numStyleChannelsTotal*targetWidth);
|
||||
|
||||
printf("result was written to %s\n",outputFileName.c_str());
|
||||
|
||||
stbi_image_free(sourceStyleData);
|
||||
|
||||
for(int i=0;i<numGuides;i++)
|
||||
{
|
||||
stbi_image_free(guides[i].sourceData);
|
||||
stbi_image_free(guides[i].targetData);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
1037
src/ebsynth_cpu.cpp
Normal file
1037
src/ebsynth_cpu.cpp
Normal file
File diff suppressed because it is too large
Load Diff
32
src/ebsynth_cpu.h
Normal file
32
src/ebsynth_cpu.h
Normal file
@@ -0,0 +1,32 @@
|
||||
// This software is in the public domain. Where that dedication is not
|
||||
// recognized, you are granted a perpetual, irrevocable license to copy
|
||||
// and modify this file as you see fit.
|
||||
|
||||
#ifndef EBSYNTH_CPU_H_
|
||||
#define EBSYNTH_CPU_H_
|
||||
|
||||
void ebsynthRunCpu(int numStyleChannels,
|
||||
int numGuideChannels,
|
||||
int sourceWidth,
|
||||
int sourceHeight,
|
||||
void* sourceStyleData,
|
||||
void* sourceGuideData,
|
||||
int targetWidth,
|
||||
int targetHeight,
|
||||
void* targetGuideData,
|
||||
void* targetModulationData,
|
||||
float* styleWeights,
|
||||
float* guideWeights,
|
||||
float uniformityWeight,
|
||||
int patchSize,
|
||||
int voteMode,
|
||||
int numPyramidLevels,
|
||||
int* numSearchVoteItersPerLevel,
|
||||
int* numPatchMatchItersPerLevel,
|
||||
int* stopThresholdPerLevel,
|
||||
void* outputNnfData,
|
||||
void* outputImageData);
|
||||
|
||||
int ebsynthBackendAvailableCpu();
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
32
src/ebsynth_cuda.h
Normal file
32
src/ebsynth_cuda.h
Normal file
@@ -0,0 +1,32 @@
|
||||
// This software is in the public domain. Where that dedication is not
|
||||
// recognized, you are granted a perpetual, irrevocable license to copy
|
||||
// and modify this file as you see fit.
|
||||
|
||||
#ifndef EBSYNTH_CUDA_H_
|
||||
#define EBSYNTH_CUDA_H_
|
||||
|
||||
void ebsynthRunCuda(int numStyleChannels,
|
||||
int numGuideChannels,
|
||||
int sourceWidth,
|
||||
int sourceHeight,
|
||||
void* sourceStyleData,
|
||||
void* sourceGuideData,
|
||||
int targetWidth,
|
||||
int targetHeight,
|
||||
void* targetGuideData,
|
||||
void* targetModulationData,
|
||||
float* styleWeights,
|
||||
float* guideWeights,
|
||||
float uniformityWeight,
|
||||
int patchSize,
|
||||
int voteMode,
|
||||
int numPyramidLevels,
|
||||
int* numSearchVoteItersPerLevel,
|
||||
int* numPatchMatchItersPerLevel,
|
||||
int* stopThresholdPerLevel,
|
||||
void* outputNnfData,
|
||||
void* outputImageData);
|
||||
|
||||
int ebsynthBackendAvailableCuda();
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef CUDACHECK_H_
|
||||
#define CUDACHECK_H_
|
||||
#ifndef EBSYNTH_CUDA_CHECK_H_
|
||||
#define EBSYNTH_CUDA_CHECK_H_
|
||||
|
||||
template<typename T>
|
||||
bool checkCudaError_(T result,char const* const func,const char* const file,int const line)
|
||||
@@ -2,11 +2,11 @@
|
||||
// recognized, you are granted a perpetual, irrevocable license to copy
|
||||
// and modify this file as you see fit.
|
||||
|
||||
#ifndef MEMARRAY2_H_
|
||||
#define MEMARRAY2_H_
|
||||
#ifndef EBSYNTH_CUDA_MEMARRAY2_H_
|
||||
#define EBSYNTH_CUDA_MEMARRAY2_H_
|
||||
|
||||
#include "jzq.h"
|
||||
//#include "cudacheck.h"
|
||||
#include "ebsynth_cuda_check.h"
|
||||
|
||||
template<typename T>
|
||||
struct MemArray2
|
||||
@@ -2,11 +2,11 @@
|
||||
// recognized, you are granted a perpetual, irrevocable license to copy
|
||||
// and modify this file as you see fit.
|
||||
|
||||
#ifndef TEXARRAY2_H_
|
||||
#define TEXARRAY2_H_
|
||||
#ifndef EBSYNTH_CUDA_TEXARRAY2_H_
|
||||
#define EBSYNTH_CUDA_TEXARRAY2_H_
|
||||
|
||||
#include "jzq.h"
|
||||
#include "cudacheck.h"
|
||||
#include "ebsynth_cuda_check.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
33
src/ebsynth_nocuda.cpp
Normal file
33
src/ebsynth_nocuda.cpp
Normal file
@@ -0,0 +1,33 @@
|
||||
// This software is in the public domain. Where that dedication is not
|
||||
// recognized, you are granted a perpetual, irrevocable license to copy
|
||||
// and modify this file as you see fit.
|
||||
|
||||
void ebsynthRunCuda(int numStyleChannels,
|
||||
int numGuideChannels,
|
||||
int sourceWidth,
|
||||
int sourceHeight,
|
||||
void* sourceStyleData,
|
||||
void* sourceGuideData,
|
||||
int targetWidth,
|
||||
int targetHeight,
|
||||
void* targetGuideData,
|
||||
void* targetModulationData,
|
||||
float* styleWeights,
|
||||
float* guideWeights,
|
||||
float uniformityWeight,
|
||||
int patchSize,
|
||||
int voteMode,
|
||||
int numPyramidLevels,
|
||||
int* numSearchVoteItersPerLevel,
|
||||
int* numPatchMatchItersPerLevel,
|
||||
int* stopThresholdPerLevel,
|
||||
void* outputNnfData,
|
||||
void* outputImageData)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
int ebsynthBackendAvailableCuda()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -1,410 +0,0 @@
|
||||
// This software is in the public domain. Where that dedication is not
|
||||
// recognized, you are granted a perpetual, irrevocable license to copy
|
||||
// and modify this file as you see fit.
|
||||
|
||||
#ifndef PATCHMATCH_GPU_H_
|
||||
#define PATCHMATCH_GPU_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cfloat>
|
||||
|
||||
#include "texarray2.h"
|
||||
#include "memarray2.h"
|
||||
|
||||
struct pcgState
|
||||
{
|
||||
uint64_t state;
|
||||
uint64_t increment;
|
||||
};
|
||||
|
||||
__device__ void pcgAdvance(pcgState* rng)
|
||||
{
|
||||
rng->state = rng->state * 6364136223846793005ULL + rng->increment;
|
||||
}
|
||||
|
||||
__device__ uint32_t pcgOutput(uint64_t state)
|
||||
{
|
||||
return (uint32_t)(((state >> 22u) ^ state) >> ((state >> 61u) + 22u));
|
||||
}
|
||||
|
||||
__device__ uint32_t pcgRand(pcgState* rng)
|
||||
{
|
||||
uint64_t oldstate = rng->state;
|
||||
pcgAdvance(rng);
|
||||
return pcgOutput(oldstate);
|
||||
}
|
||||
|
||||
__device__ void pcgInit(pcgState* rng,uint64_t seed,uint64_t stream)
|
||||
{
|
||||
rng->state = 0U;
|
||||
rng->increment = (stream << 1u) | 1u;
|
||||
pcgAdvance(rng);
|
||||
rng->state += seed;
|
||||
pcgAdvance(rng);
|
||||
}
|
||||
|
||||
typedef Vec<1,float> V1f;
|
||||
typedef Array2<Vec<1,float>> A2V1f;
|
||||
|
||||
__global__ void krnlInitRngStates(const int width,
|
||||
const int height,
|
||||
pcgState* rngStates)
|
||||
{
|
||||
const int x = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y*blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x<width && y<height)
|
||||
{
|
||||
const int idx = x+y*width;
|
||||
pcgInit(&rngStates[idx],1337,idx);
|
||||
}
|
||||
}
|
||||
|
||||
pcgState* initGpuRng(const int width,
|
||||
const int height)
|
||||
{
|
||||
pcgState* gpuRngStates;
|
||||
cudaMalloc(&gpuRngStates,width*height*sizeof(pcgState));
|
||||
|
||||
const dim3 threadsPerBlock(16,16);
|
||||
const dim3 numBlocks((width+threadsPerBlock.x)/threadsPerBlock.x,
|
||||
(height+threadsPerBlock.y)/threadsPerBlock.y);
|
||||
|
||||
krnlInitRngStates<<<numBlocks,threadsPerBlock>>>(width,height,gpuRngStates);
|
||||
|
||||
return gpuRngStates;
|
||||
}
|
||||
|
||||
template<int N,typename T,int M>
|
||||
struct PatchSSD
|
||||
{
|
||||
const TexArray2<N,T,M> A;
|
||||
const TexArray2<N,T,M> B;
|
||||
const Vec<N,float> weights;
|
||||
|
||||
PatchSSD(const TexArray2<N,T,M>& A,
|
||||
const TexArray2<N,T,M>& B,
|
||||
const Vec<N,float>& weights)
|
||||
|
||||
: A(A),B(B),weights(weights) {}
|
||||
|
||||
__device__ float operator()(int patchWidth,
|
||||
const int ax,
|
||||
const int ay,
|
||||
const int bx,
|
||||
const int by,
|
||||
const float ebest)
|
||||
{
|
||||
const int hpw = patchWidth/2;
|
||||
float ssd = 0;
|
||||
|
||||
for(int py=-hpw;py<=+hpw;py++)
|
||||
{
|
||||
for(int px=-hpw;px<=+hpw;px++)
|
||||
{
|
||||
const Vec<N,T> pixelA = A(ax + px, ay + py);
|
||||
const Vec<N,T> pixelB = B(bx + px, by + py);
|
||||
for(int i=0;i<N;i++)
|
||||
{
|
||||
const float diff = float(pixelA[i])-float(pixelB[i]);
|
||||
ssd += weights[i]*diff*diff;
|
||||
}
|
||||
}
|
||||
|
||||
if (ssd>ebest) { return ssd; }
|
||||
}
|
||||
|
||||
return ssd;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename FUNC>
|
||||
__global__ void krnlEvalErrorPass(const int patchWidth,
|
||||
FUNC patchError,
|
||||
const TexArray2<2,int> NNF,
|
||||
TexArray2<1,float> E)
|
||||
{
|
||||
const int x = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y*blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x<NNF.width && y<NNF.height)
|
||||
{
|
||||
const V2i n = NNF(x,y);
|
||||
E.write(x,y,V1f(patchError(patchWidth,x,y,n[0],n[1],FLT_MAX)));
|
||||
}
|
||||
}
|
||||
|
||||
void __device__ updateOmega(MemArray2<int>& Omega,const int patchWidth,const int bx,const int by,const int incdec)
|
||||
{
|
||||
const int r = patchWidth/2;
|
||||
|
||||
for(int oy=-r;oy<=+r;oy++)
|
||||
for(int ox=-r;ox<=+r;ox++)
|
||||
{
|
||||
const int x = bx+ox;
|
||||
const int y = by+oy;
|
||||
atomicAdd(&Omega.data[x+y*Omega.width],incdec);
|
||||
//Omega.data[x+y*Omega.width] += incdec;
|
||||
}
|
||||
}
|
||||
|
||||
int __device__ patchOmega(const int patchWidth,const int bx,const int by,const MemArray2<int>& Omega)
|
||||
{
|
||||
const int r = patchWidth/2;
|
||||
|
||||
int sum = 0;
|
||||
|
||||
for(int oy=-r;oy<=+r;oy++)
|
||||
for(int ox=-r;ox<=+r;ox++)
|
||||
{
|
||||
const int x = bx+ox;
|
||||
const int y = by+oy;
|
||||
sum += Omega.data[x+y*Omega.width]; /// XXX: atomic read instead ??
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<typename FUNC>
|
||||
__device__ void tryPatch(const V2i& sizeA,
|
||||
const V2i& sizeB,
|
||||
MemArray2<int>& Omega,
|
||||
const int patchWidth,
|
||||
FUNC patchError,
|
||||
const float lambda,
|
||||
const int ax,
|
||||
const int ay,
|
||||
const int bx,
|
||||
const int by,
|
||||
V2i& nbest,
|
||||
float& ebest)
|
||||
{
|
||||
const float omegaBest = (float(sizeA(0)*sizeA(1)) /
|
||||
float(sizeB(0)*sizeB(1))) * float(patchWidth*patchWidth);
|
||||
|
||||
const float curOcc = (float(patchOmega(patchWidth,nbest(0),nbest(1),Omega))/float(patchWidth*patchWidth))/omegaBest;
|
||||
const float newOcc = (float(patchOmega(patchWidth, bx, by,Omega))/float(patchWidth*patchWidth))/omegaBest;
|
||||
|
||||
const float curErr = ebest;
|
||||
const float newErr = patchError(patchWidth,ax,ay,bx,by,curErr+lambda*curOcc);
|
||||
|
||||
if ((newErr+lambda*newOcc) < (curErr+lambda*curOcc))
|
||||
{
|
||||
updateOmega(Omega,patchWidth, bx, by,+1);
|
||||
updateOmega(Omega,patchWidth,nbest(0),nbest(1),-1);
|
||||
nbest = V2i(bx,by);
|
||||
ebest = newErr;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FUNC>
|
||||
__device__ void tryNeighborsOffset(const int x,
|
||||
const int y,
|
||||
const int ox,
|
||||
const int oy,
|
||||
V2i& nbest,
|
||||
float& ebest,
|
||||
const V2i& sizeA,
|
||||
const V2i& sizeB,
|
||||
MemArray2<int>& Omega,
|
||||
const int patchWidth,
|
||||
FUNC patchError,
|
||||
const float lambda,
|
||||
const TexArray2<2,int>& NNF)
|
||||
{
|
||||
const int hpw = patchWidth/2;
|
||||
|
||||
const V2i on = NNF(x+ox,y+oy);
|
||||
const int nx = on(0)-ox;
|
||||
const int ny = on(1)-oy;
|
||||
|
||||
if (nx>=hpw && nx<sizeB(0)-hpw &&
|
||||
ny>=hpw && ny<sizeB(1)-hpw)
|
||||
{
|
||||
tryPatch(sizeA,sizeB,Omega,patchWidth,patchError,lambda,x,y,nx,ny,nbest,ebest);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FUNC>
|
||||
__global__ void krnlPropagationPass(const V2i sizeA,
|
||||
const V2i sizeB,
|
||||
MemArray2<int> Omega,
|
||||
const int patchWidth,
|
||||
FUNC patchError,
|
||||
const float lambda,
|
||||
const int r,
|
||||
const TexArray2<2,int> NNF,
|
||||
TexArray2<2,int> NNF2,
|
||||
TexArray2<1,float> E,
|
||||
TexArray2<1,unsigned char> mask)
|
||||
{
|
||||
const int x = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y*blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x<sizeA(0) && y<sizeA(1))
|
||||
{
|
||||
V2i nbest = NNF(x,y);
|
||||
float ebest = E(x,y)(0);
|
||||
|
||||
if (mask(x,y)[0]==255)
|
||||
{
|
||||
tryNeighborsOffset(x,y,-r,0,nbest,ebest,sizeA,sizeB,Omega,patchWidth,patchError,lambda,NNF);
|
||||
tryNeighborsOffset(x,y,+r,0,nbest,ebest,sizeA,sizeB,Omega,patchWidth,patchError,lambda,NNF);
|
||||
tryNeighborsOffset(x,y,0,-r,nbest,ebest,sizeA,sizeB,Omega,patchWidth,patchError,lambda,NNF);
|
||||
tryNeighborsOffset(x,y,0,+r,nbest,ebest,sizeA,sizeB,Omega,patchWidth,patchError,lambda,NNF);
|
||||
}
|
||||
|
||||
E.write(x,y,V1f(ebest));
|
||||
NNF2.write(x,y,nbest);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FUNC>
|
||||
__device__ void tryRandomOffsetInRadius(const int r,
|
||||
const V2i& sizeA,
|
||||
const V2i& sizeB,
|
||||
MemArray2<int>& Omega,
|
||||
const int patchWidth,
|
||||
FUNC patchError,
|
||||
const float lambda,
|
||||
const int x,
|
||||
const int y,
|
||||
const V2i& norg,
|
||||
V2i& nbest,
|
||||
float& ebest,
|
||||
pcgState* rngState)
|
||||
{
|
||||
const int hpw = patchWidth/2;
|
||||
|
||||
const int xmin = max(norg(0)-r,hpw);
|
||||
const int xmax = min(norg(0)+r,sizeB(0)-1-hpw);
|
||||
const int ymin = max(norg(1)-r,hpw);
|
||||
const int ymax = min(norg(1)+r,sizeB(1)-1-hpw);
|
||||
|
||||
const int nx = xmin+(pcgRand(rngState)%(xmax-xmin+1));
|
||||
const int ny = ymin+(pcgRand(rngState)%(ymax-ymin+1));
|
||||
|
||||
tryPatch(sizeA,sizeB,Omega,patchWidth,patchError,lambda,x,y,nx,ny,nbest,ebest);
|
||||
}
|
||||
|
||||
/*
|
||||
template<typename FUNC>
|
||||
__global__ void krnlRandomSearchPass(const V2i sizeA,
|
||||
const V2i sizeB,
|
||||
MemArray2<int> Omega,
|
||||
const int patchWidth,
|
||||
FUNC patchError,
|
||||
const float lambda,
|
||||
TexArray2<2,int> NNF,
|
||||
TexArray2<1,float> E,
|
||||
TexArray2<1,unsigned char> mask,
|
||||
pcgState* rngStates)
|
||||
{
|
||||
const int x = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y*blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x<sizeA(0) && y<sizeA(1))
|
||||
{
|
||||
if (mask(x,y)[0]==255)
|
||||
{
|
||||
V2i nbest = NNF(x,y);
|
||||
float ebest = E(x,y)(0);
|
||||
|
||||
const V2i norg = nbest;
|
||||
|
||||
for(int r=1;r<max(sizeB(0),sizeB(1))/2;r=r*2)
|
||||
{
|
||||
tryRandomOffsetInRadius(r,sizeA,sizeB,Omega,patchWidth,patchError,lambda,x,y,norg,nbest,ebest,&rngStates[x+y*NNF.width]);
|
||||
}
|
||||
|
||||
E.write(x,y,V1f(ebest));
|
||||
NNF.write(x,y,nbest);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
template<typename FUNC>
|
||||
__global__ void krnlRandomSearchPass(const V2i sizeA,
|
||||
const V2i sizeB,
|
||||
MemArray2<int> Omega,
|
||||
const int patchWidth,
|
||||
FUNC patchError,
|
||||
const float lambda,
|
||||
const int radius,
|
||||
TexArray2<2,int> NNF,
|
||||
TexArray2<1,float> E,
|
||||
TexArray2<1,unsigned char> mask,
|
||||
pcgState* rngStates)
|
||||
{
|
||||
const int x = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y*blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x<sizeA(0) && y<sizeA(1))
|
||||
{
|
||||
if (mask(x,y)[0]==255)
|
||||
{
|
||||
V2i nbest = NNF(x,y);
|
||||
float ebest = E(x,y)(0);
|
||||
|
||||
const V2i norg = nbest;
|
||||
|
||||
tryRandomOffsetInRadius(radius,sizeA,sizeB,Omega,patchWidth,patchError,lambda,x,y,norg,nbest,ebest,&rngStates[x+y*NNF.width]);
|
||||
|
||||
E.write(x,y,V1f(ebest));
|
||||
NNF.write(x,y,nbest);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FUNC>
|
||||
void patchmatchGPU(const V2i sizeA,
|
||||
const V2i sizeB,
|
||||
MemArray2<int>& Omega,
|
||||
const int patchWidth,
|
||||
FUNC patchError,
|
||||
const float lambda,
|
||||
const int numIters,
|
||||
const int numThreadsPerBlock,
|
||||
TexArray2<2,int>& NNF,
|
||||
TexArray2<2,int>& NNF2,
|
||||
TexArray2<1,float>& E,
|
||||
TexArray2<1,unsigned char>& mask,
|
||||
pcgState* rngStates)
|
||||
{
|
||||
const dim3 threadsPerBlock = dim3(numThreadsPerBlock,numThreadsPerBlock);
|
||||
const dim3 numBlocks = dim3((NNF.width+threadsPerBlock.x)/threadsPerBlock.x,
|
||||
(NNF.height+threadsPerBlock.y)/threadsPerBlock.y);
|
||||
|
||||
krnlEvalErrorPass<<<numBlocks,threadsPerBlock>>>(patchWidth,patchError,NNF,E);
|
||||
|
||||
checkCudaError(cudaDeviceSynchronize());
|
||||
|
||||
for(int i=0;i<numIters;i++)
|
||||
{
|
||||
krnlPropagationPass<<<numBlocks,threadsPerBlock>>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,4,NNF,NNF2,E,mask); std::swap(NNF,NNF2);
|
||||
|
||||
checkCudaError(cudaDeviceSynchronize());
|
||||
|
||||
krnlPropagationPass<<<numBlocks,threadsPerBlock>>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,2,NNF,NNF2,E,mask); std::swap(NNF,NNF2);
|
||||
|
||||
checkCudaError(cudaDeviceSynchronize());
|
||||
|
||||
krnlPropagationPass<<<numBlocks,threadsPerBlock>>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,1,NNF,NNF2,E,mask); std::swap(NNF,NNF2);
|
||||
|
||||
checkCudaError(cudaDeviceSynchronize());
|
||||
|
||||
for(int r=1;r<max(sizeB(0),sizeB(1))/2;r=r*2)
|
||||
{
|
||||
krnlRandomSearchPass<<<numBlocks,threadsPerBlock>>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,r,NNF,E,mask,rngStates);
|
||||
}
|
||||
|
||||
checkCudaError(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
krnlEvalErrorPass<<<numBlocks,threadsPerBlock>>>(patchWidth,patchError,NNF,E);
|
||||
|
||||
checkCudaError(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user