diff --git a/README.md b/README.md index b2be42b..ba2c028 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ ebsynth -style -guide -output -pyramidlevels -searchvoteiters -patchmatchiters +-backend [cpu|cuda] ``` ## Download @@ -129,10 +130,6 @@ equalized to match the luminance of the source painting. -------------------------------------------------------------------------- -## Requirements - -`ebsynth` needs a CUDA-capable gpu in order to run. Besides CUDA, there are no other external dependencies. A cpu-only version that doesn't require CUDA will be released later. - ## License The code is released into the public domain. You can do anything you want with it. diff --git a/build-linux-cpu+cuda.sh b/build-linux-cpu+cuda.sh new file mode 100755 index 0000000..8e9d2c3 --- /dev/null +++ b/build-linux-cpu+cuda.sh @@ -0,0 +1,2 @@ +#!/bin/sh +nvcc -arch compute_30 src/ebsynth.cpp src/ebsynth_cpu.cpp src/ebsynth_cuda.cu -I"include" -DNDEBUG -D__CORRECT_ISO_CPP11_MATH_H_PROTO -O6 -std=c++11 -w -Xcompiler -fopenmp -o bin/ebsynth diff --git a/build-linux-cpu_only.sh b/build-linux-cpu_only.sh new file mode 100755 index 0000000..01d419b --- /dev/null +++ b/build-linux-cpu_only.sh @@ -0,0 +1,2 @@ +#!/bin/sh +g++ src/ebsynth.cpp src/ebsynth_cpu.cpp src/ebsynth_nocuda.cpp -DNDEBUG -O6 -fopenmp -I"include" -std=c++11 -o bin/ebsynth diff --git a/build-linux.sh b/build-linux.sh deleted file mode 100755 index 9c6185e..0000000 --- a/build-linux.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -nvcc -arch compute_30 src/ebsynth.cu -o bin/ebsynth -I "include" -std=c++11 -Xcompiler "-DNDEBUG -O6 -D__CORRECT_ISO_CPP11_MATH_H_PROTO" diff --git a/build-win32-cpu+cuda.bat b/build-win32-cpu+cuda.bat new file mode 100644 index 0000000..19a3b6a --- /dev/null +++ b/build-win32-cpu+cuda.bat @@ -0,0 +1,14 @@ +@echo off +setlocal ENABLEDELAYEDEXPANSION + +for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" x86 && goto compile + +:compile +nvcc -m32 -arch compute_30 src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_cuda.cu -DNDEBUG -O6 -I "include" -o "bin\ebsynth.exe" -Xcompiler "/openmp /fp:fast" -Xlinker "/IMPLIB:dummy.lib" -w || goto error +nvcc -m32 -arch compute_30 src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_cuda.cu -DNDEBUG -O6 -I "include" -o "bin\ebsynth.dll" -Xcompiler "/openmp /fp:fast" -Xlinker "/IMPLIB:lib\ebsynth.lib" -shared -DEBSYNTH_API=__declspec(dllexport) -w || goto error +del dummy.lib;dummy.exp 2> NUL +goto :EOF + +:error +echo FAILED +@%COMSPEC% /C exit 1 >nul diff --git a/build-win32-cpu_only.bat b/build-win32-cpu_only.bat new file mode 100644 index 0000000..1c7cd93 --- /dev/null +++ b/build-win32-cpu_only.bat @@ -0,0 +1,14 @@ +@echo off +setlocal ENABLEDELAYEDEXPANSION + +for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" x86 && goto compile + +:compile +cl src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_nocuda.cpp /DNDEBUG /O2 /openmp /EHsc /nologo /I"include" /Fe"bin\ebsynth.exe" || goto error +cl src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_nocuda.cpp /DNDEBUG /O2 /openmp /EHsc /nologo /I"include" /Fe"bin\ebsynth.dll" /DEBSYNTH_API="__declspec(dllexport)" /link /IMPLIB:"lib\ebsynth.lib" || goto error +del ebsynth.obj;ebsynth_cpu.obj;ebsynth_nocuda.obj 2> NUL +goto :EOF + +:error +echo FAILED +@%COMSPEC% /C exit 1 >nul diff --git a/build-win32.bat b/build-win32.bat deleted file mode 100644 index b2f0db8..0000000 --- a/build-win32.bat +++ /dev/null @@ -1,12 +0,0 @@ -@echo off -setlocal ENABLEDELAYEDEXPANSION - -for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" x86 && goto compile - -:compile -nvcc -arch compute_30 src\ebsynth.cu -m32 -O6 -w -I "include" -o "bin\ebsynth.exe" -Xcompiler "/DNDEBUG /Ox /Oy /Gy /Oi /fp:fast" -Xlinker "/IMPLIB:\"lib\ebsynth.lib\"" || goto error -goto :EOF - -:error -echo FAILED -@%COMSPEC% /C exit 1 >nul diff --git a/build-win64-cpu+cuda.bat b/build-win64-cpu+cuda.bat new file mode 100644 index 0000000..7a25549 --- /dev/null +++ b/build-win64-cpu+cuda.bat @@ -0,0 +1,14 @@ +@echo off +setlocal ENABLEDELAYEDEXPANSION + +for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" amd64 && goto compile + +:compile +nvcc -arch compute_30 src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_cuda.cu -DNDEBUG -O6 -I "include" -o "bin\ebsynth.exe" -Xcompiler "/openmp /fp:fast" -Xlinker "/IMPLIB:dummy.lib" -w || goto error +nvcc -arch compute_30 src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_cuda.cu -DNDEBUG -O6 -I "include" -o "bin\ebsynth.dll" -Xcompiler "/openmp /fp:fast" -Xlinker "/IMPLIB:lib\ebsynth.lib" -shared -DEBSYNTH_API=__declspec(dllexport) -w || goto error +del dummy.lib;dummy.exp 2> NUL +goto :EOF + +:error +echo FAILED +@%COMSPEC% /C exit 1 >nul diff --git a/build-win64-cpu_only.bat b/build-win64-cpu_only.bat new file mode 100644 index 0000000..abf2df7 --- /dev/null +++ b/build-win64-cpu_only.bat @@ -0,0 +1,14 @@ +@echo off +setlocal ENABLEDELAYEDEXPANSION + +for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" amd64 && goto compile + +:compile +cl src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_nocuda.cpp /DNDEBUG /O2 /openmp /EHsc /nologo /I"include" /Fe"bin\ebsynth.exe" || goto error +cl src\ebsynth.cpp src\ebsynth_cpu.cpp src\ebsynth_nocuda.cpp /DNDEBUG /O2 /openmp /EHsc /nologo /I"include" /Fe"bin\ebsynth.dll" /DEBSYNTH_API="__declspec(dllexport)" /link /IMPLIB:"lib\ebsynth.lib" || goto error +del ebsynth.obj;ebsynth_cpu.obj;ebsynth_nocuda.obj 2> NUL +goto :EOF + +:error +echo FAILED +@%COMSPEC% /C exit 1 >nul diff --git a/build-win64.bat b/build-win64.bat deleted file mode 100644 index 1ab8fe8..0000000 --- a/build-win64.bat +++ /dev/null @@ -1,12 +0,0 @@ -@echo off -setlocal ENABLEDELAYEDEXPANSION - -for %%V in (15,14,12,11) do if exist "!VS%%V0COMNTOOLS!" call "!VS%%V0COMNTOOLS!..\..\VC\vcvarsall.bat" amd64 && goto compile - -:compile -nvcc -arch compute_30 src\ebsynth.cu -m64 -O6 -w -I "include" -o "bin\ebsynth.exe" -Xcompiler "/DNDEBUG /Ox /Oy /Gy /Oi /fp:fast" -Xlinker "/IMPLIB:\"lib\ebsynth.lib\"" || goto error -goto :EOF - -:error -echo FAILED -@%COMSPEC% /C exit 1 >nul diff --git a/src/ebsynth.cpp b/src/ebsynth.cpp new file mode 100644 index 0000000..d6dc2af --- /dev/null +++ b/src/ebsynth.cpp @@ -0,0 +1,551 @@ +// This software is in the public domain. Where that dedication is not +// recognized, you are granted a perpetual, irrevocable license to copy +// and modify this file as you see fit. + +#include "ebsynth.h" +#include "ebsynth_cpu.h" +#include "ebsynth_cuda.h" + +#include +#include + +EBSYNTH_API +void ebsynthRun(int ebsynthBackend, + int numStyleChannels, + int numGuideChannels, + int sourceWidth, + int sourceHeight, + void* sourceStyleData, + void* sourceGuideData, + int targetWidth, + int targetHeight, + void* targetGuideData, + void* targetModulationData, + float* styleWeights, + float* guideWeights, + float uniformityWeight, + int patchSize, + int voteMode, + int numPyramidLevels, + int* numSearchVoteItersPerLevel, + int* numPatchMatchItersPerLevel, + int* stopThresholdPerLevel, + void* outputNnfData, + void* outputImageData) +{ + void (*backendDispatch)(int,int,int,int,void*,void*,int,int,void*,void*,float*,float*,float,int,int,int,int*,int*,int*,void*,void*) = 0; + + if (ebsynthBackend==EBSYNTH_BACKEND_CPU ) { backendDispatch = ebsynthRunCpu; } + else if (ebsynthBackend==EBSYNTH_BACKEND_CUDA) { backendDispatch = ebsynthRunCuda; } + else if (ebsynthBackend==EBSYNTH_BACKEND_AUTO) { backendDispatch = ebsynthBackendAvailableCuda() ? ebsynthRunCuda : ebsynthRunCpu; } + + if (backendDispatch!=0) + { + backendDispatch(numStyleChannels, + numGuideChannels, + sourceWidth, + sourceHeight, + sourceStyleData, + sourceGuideData, + targetWidth, + targetHeight, + targetGuideData, + targetModulationData, + styleWeights, + guideWeights, + uniformityWeight, + patchSize, + voteMode, + numPyramidLevels, + numSearchVoteItersPerLevel, + numPatchMatchItersPerLevel, + stopThresholdPerLevel, + outputNnfData, + outputImageData); + } +} + +EBSYNTH_API +int ebsynthBackendAvailable(int ebsynthBackend) +{ + if (ebsynthBackend==EBSYNTH_BACKEND_CPU ) { return ebsynthBackendAvailableCpu(); } + else if (ebsynthBackend==EBSYNTH_BACKEND_CUDA) { return ebsynthBackendAvailableCuda(); } + else if (ebsynthBackend==EBSYNTH_BACKEND_AUTO) { return ebsynthBackendAvailableCpu() || ebsynthBackendAvailableCuda(); } + + return 0; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include + +#include +#include +#include + +#include "jzq.h" + +template +bool tryToParseArg(const std::vector& args,int* inout_argi,const char* name,bool* out_fail,FUNC handler) +{ + int& argi = *inout_argi; + bool& fail = *out_fail; + + if (argi<0 || argi>=args.size()) { fail = true; return false; } + + if (args[argi]==name) + { + argi++; + fail = !handler(); + return true; + } + + fail = false; return false; +} + +bool tryToParseIntArg(const std::vector& args,int* inout_argi,const char* name,int* out_value,bool* out_fail) +{ + return tryToParseArg(args,inout_argi,name,out_fail,[&] + { + int& argi = *inout_argi; + if (argi& args,int* inout_argi,const char* name,float* out_value,bool* out_fail) +{ + return tryToParseArg(args,inout_argi,name,out_fail,[&] + { + int& argi = *inout_argi; + if (argi& args,int* inout_argi,const char* name,std::string* out_value,bool* out_fail) +{ + return tryToParseArg(args,inout_argi,name,out_fail,[&] + { + int& argi = *inout_argi; + if (argi& args,int* inout_argi,const char* name,std::pair* out_value,bool* out_fail) +{ + return tryToParseArg(args,inout_argi,name,out_fail,[&] + { + int& argi = *inout_argi; + if ((argi+1)\n"); + printf(" -guide \n"); + printf(" -output \n"); + printf(" -weight \n"); + printf(" -uniformity \n"); + printf(" -patchsize \n"); + printf(" -pyramidlevels \n"); + printf(" -searchvoteiters \n"); + printf(" -patchmatchiters \n"); + printf(" -stopthreshold \n"); + printf(" -backend [cpu|cuda]\n"); + printf("\n"); + return 1; + } + + std::string styleFileName; + float styleWeight = NAN; + std::string outputFileName = "output.png"; + + struct Guide + { + std::string sourceFileName; + std::string targetFileName; + float weight; + + int sourceWidth; + int sourceHeight; + unsigned char* sourceData; + + int targetWidth; + int targetHeight; + unsigned char* targetData; + + int numChannels; + }; + + std::vector guides; + + float uniformityWeight = 3500; + int patchSize = 5; + int numPyramidLevels = -1; + int numSearchVoteIters = 6; + int numPatchMatchIters = 4; + int stopThreshold = 5; + int backend = ebsynthBackendAvailable(EBSYNTH_BACKEND_CUDA) ? EBSYNTH_BACKEND_CUDA : EBSYNTH_BACKEND_CPU; + + { + std::vector args(argc); + for(int i=0;i guidePair; + std::string backendName; + + if (tryToParseStringArg(args,&argi,"-style",&styleFileName,&fail)) + { + styleWeight = NAN; + precedingStyleOrGuideWeight = &styleWeight; + argi++; + } + else if (tryToParseStringPairArg(args,&argi,"-guide",&guidePair,&fail)) + { + Guide guide; + guide.sourceFileName = guidePair.first; + guide.targetFileName = guidePair.second; + guide.weight = NAN; + guides.push_back(guide); + precedingStyleOrGuideWeight = &guides[guides.size()-1].weight; + argi++; + } + else if (tryToParseStringArg(args,&argi,"-output",&outputFileName,&fail)) + { + argi++; + } + else if (tryToParseFloatArg(args,&argi,"-weight",&weight,&fail)) + { + if (precedingStyleOrGuideWeight!=0) { *precedingStyleOrGuideWeight = weight; } + else { printf("error: at least one -style or -guide option must precede the -weight option!\n"); return 1; } + argi++; + } + else if (tryToParseFloatArg(args,&argi,"-uniformity",&uniformityWeight,&fail)) { argi++; } + else if (tryToParseIntArg(args,&argi,"-patchsize",&patchSize,&fail)) + { + if (patchSize<3) { printf("error: patchsize is too small!\n"); return 1; } + if (patchSize%2==0) { printf("error: patchsize must be an odd number!\n"); return 1; } + argi++; + } + else if (tryToParseIntArg(args,&argi,"-pyramidlevels",&numPyramidLevels,&fail)) + { + if (numPyramidLevels<1) { printf("error: bad argument for -pyramidlevels!\n"); return 1; } + argi++; + } + else if (tryToParseIntArg(args,&argi,"-searchvoteiters",&numSearchVoteIters,&fail)) + { + if (numSearchVoteIters<0) { printf("error: bad argument for -searchvoteiters!\n"); return 1; } + argi++; + } + else if (tryToParseIntArg(args,&argi,"-patchmatchiters",&numPatchMatchIters,&fail)) + { + if (numPatchMatchIters<0) { printf("error: bad argument for -patchmatchiters!\n"); return 1; } + argi++; + } + else if (tryToParseIntArg(args,&argi,"-stopthreshold",&stopThreshold,&fail)) + { + if (stopThreshold<0) { printf("error: bad argument for -stopthreshold!\n"); return 1; } + argi++; + } + else if (tryToParseStringArg(args,&argi,"-backend",&backendName,&fail)) + { + if (backendName=="cpu" ) { backend = EBSYNTH_BACKEND_CPU; } + else if (backendName=="cuda") { backend = EBSYNTH_BACKEND_CUDA; } + else { printf("error: unrecognized backend '%s'\n",backendName.c_str()); return 1; } + + if (!ebsynthBackendAvailable(backend)) { printf("error: the %s backend is not available!\n",backendToString(backend).c_str()); return 1; } + + argi++; + } + else + { + printf("error: unrecognized option '%s'\n",args[argi].c_str()); + fail = true; + } + + } + + if (fail) { return 1; } + } + + const int numGuides = guides.size(); + + int sourceWidth = 0; + int sourceHeight = 0; + unsigned char* sourceStyleData = tryLoad(styleFileName,&sourceWidth,&sourceHeight); + const int numStyleChannelsTotal = evalNumChannels(sourceStyleData,sourceWidth*sourceHeight); + + std::vector sourceStyle(sourceWidth*sourceHeight*numStyleChannelsTotal); + for(int xy=0;xy0) { sourceStyle[xy*numStyleChannelsTotal+0] = sourceStyleData[xy*4+0]; } + if (numStyleChannelsTotal==2) { sourceStyle[xy*numStyleChannelsTotal+1] = sourceStyleData[xy*4+3]; } + else if (numStyleChannelsTotal>1) { sourceStyle[xy*numStyleChannelsTotal+1] = sourceStyleData[xy*4+1]; } + if (numStyleChannelsTotal>2) { sourceStyle[xy*numStyleChannelsTotal+2] = sourceStyleData[xy*4+2]; } + if (numStyleChannelsTotal>3) { sourceStyle[xy*numStyleChannelsTotal+3] = sourceStyleData[xy*4+3]; } + } + + int targetWidth = 0; + int targetHeight = 0; + int numGuideChannelsTotal = 0; + + for(int i=0;i0 && (guide.targetWidth!=targetWidth || guide.targetHeight!=targetHeight)) { printf("error: target guide '%s' doesn't match the resolution of '%s'\n",guide.targetFileName.c_str(),guides[0].targetFileName.c_str()); return 1; } + else if (i==0) { targetWidth = guide.targetWidth; targetHeight = guide.targetHeight; } + + guide.numChannels = std::max(evalNumChannels(guide.sourceData,sourceWidth*sourceHeight), + evalNumChannels(guide.targetData,targetWidth*targetHeight)); + + numGuideChannelsTotal += guide.numChannels; + } + + if (numStyleChannelsTotal>EBSYNTH_MAX_STYLE_CHANNELS) { printf("error: too many style channels (%d), maximum number is %d\n",numStyleChannelsTotal,EBSYNTH_MAX_STYLE_CHANNELS); return 1; } + if (numGuideChannelsTotal>EBSYNTH_MAX_GUIDE_CHANNELS) { printf("error: too many guide channels (%d), maximum number is %d\n",numGuideChannelsTotal,EBSYNTH_MAX_GUIDE_CHANNELS); return 1; } + + std::vector sourceGuides(sourceWidth*sourceHeight*numGuideChannelsTotal); + for(int xy=0;xy0) { sourceGuides[xy*numGuideChannelsTotal+c+0] = guides[i].sourceData[xy*4+0]; } + if (numChannels==2) { sourceGuides[xy*numGuideChannelsTotal+c+1] = guides[i].sourceData[xy*4+3]; } + else if (numChannels>1) { sourceGuides[xy*numGuideChannelsTotal+c+1] = guides[i].sourceData[xy*4+1]; } + if (numChannels>2) { sourceGuides[xy*numGuideChannelsTotal+c+2] = guides[i].sourceData[xy*4+2]; } + if (numChannels>3) { sourceGuides[xy*numGuideChannelsTotal+c+3] = guides[i].sourceData[xy*4+3]; } + + c += numChannels; + } + } + + std::vector targetGuides(targetWidth*targetHeight*numGuideChannelsTotal); + for(int xy=0;xy0) { targetGuides[xy*numGuideChannelsTotal+c+0] = guides[i].targetData[xy*4+0]; } + if (numChannels==2) { targetGuides[xy*numGuideChannelsTotal+c+1] = guides[i].targetData[xy*4+3]; } + else if (numChannels>1) { targetGuides[xy*numGuideChannelsTotal+c+1] = guides[i].targetData[xy*4+1]; } + if (numChannels>2) { targetGuides[xy*numGuideChannelsTotal+c+2] = guides[i].targetData[xy*4+2]; } + if (numChannels>3) { targetGuides[xy*numGuideChannelsTotal+c+3] = guides[i].targetData[xy*4+3]; } + + c += numChannels; + } + } + + std::vector styleWeights(numStyleChannelsTotal); + if (isnan(styleWeight)) { styleWeight = 1.0f; } + for(int i=0;i guideWeights(numGuideChannelsTotal); + { + int c = 0; + for(int i=0;i=0;level--) + { + if (min(pyramidLevelSize(std::min(V2i(sourceWidth,sourceHeight),V2i(targetWidth,targetHeight)),level)) >= (2*patchSize+1)) + { + maxPyramidLevels = level+1; + break; + } + } + + if (numPyramidLevels==-1) { numPyramidLevels = maxPyramidLevels; } + numPyramidLevels = std::min(numPyramidLevels,maxPyramidLevels); + + std::vector numSearchVoteItersPerLevel(numPyramidLevels); + std::vector numPatchMatchItersPerLevel(numPyramidLevels); + std::vector stopThresholdPerLevel(numPyramidLevels); + for(int i=0;i output(targetWidth*targetHeight*numStyleChannelsTotal); + + printf("uniformity: %.0f\n",uniformityWeight); + printf("patchsize: %d\n",patchSize); + printf("pyramidlevels: %d\n",numPyramidLevels); + printf("searchvoteiters: %d\n",numSearchVoteIters); + printf("patchmatchiters: %d\n",numPatchMatchIters); + printf("stopthreshold: %d\n",stopThreshold); + printf("backend: %s\n",backendToString(backend).c_str()); + + ebsynthRun(backend, + numStyleChannelsTotal, + numGuideChannelsTotal, + sourceWidth, + sourceHeight, + sourceStyle.data(), + sourceGuides.data(), + targetWidth, + targetHeight, + targetGuides.data(), + NULL, + styleWeights.data(), + guideWeights.data(), + uniformityWeight, + patchSize, + EBSYNTH_VOTEMODE_PLAIN, + numPyramidLevels, + numSearchVoteItersPerLevel.data(), + numPatchMatchItersPerLevel.data(), + stopThresholdPerLevel.data(), + NULL, + output.data()); + + stbi_write_png(outputFileName.c_str(),targetWidth,targetHeight,numStyleChannelsTotal,output.data(),numStyleChannelsTotal*targetWidth); + + printf("result was written to %s\n",outputFileName.c_str()); + + stbi_image_free(sourceStyleData); + + for(int i=0;i +#include +#include + +#define FOR(A,X,Y) for(int Y=0;Y +A2f nnfError(const A2V2i& NNF, + const int patchWidth, + FUNC patchError) +{ + A2f E(size(NNF)); + + #pragma omp parallel for schedule(static) + for(int y=0;y +void krnlVotePlain( Array2>& target, + const Array2>& source, + const Array2>& NNF, + const int patchSize) +{ + for(int y=0;y sumColor = zero>::value(); + float sumWeight = 0; + + for (int py = -r; py <= +r; py++) + for (int px = -r; px <= +r; px++) + { + if + ( + x+px >= 0 && x+px < NNF.width () && + y+py >= 0 && y+py < NNF.height() + ) + { + const V2i n = NNF(x+px,y+py)-V2i(px,py); + + if + ( + n[0] >= 0 && n[0] < source.width () && + n[1] >= 0 && n[1] < source.height() + ) + { + const float weight = 1.0f; + sumColor += weight*Vec(source(n(0),n(1))); + sumWeight += weight; + } + } + } + + const Vec v = Vec(sumColor/sumWeight); + target(x,y) = v; + } +} + +#if 0 +template +__global__ void krnlVoteWeighted( TexArray2 target, + const TexArray2 source, + const TexArray2<2,int> NNF, + const TexArray2<1,float> E, + const int patchSize) +{ + const int x = blockDim.x*blockIdx.x + threadIdx.x; + const int y = blockDim.y*blockIdx.y + threadIdx.y; + + if (x sumColor = zero>::value(); + float sumWeight = 0; + + for (int py = -r; py <= +r; py++) + for (int px = -r; px <= +r; px++) + { + /* + if + ( + x+px >= 0 && x+px < NNF.width () && + y+py >= 0 && y+py < NNF.height() + ) + */ + { + const V2i n = NNF(x+px,y+py)-V2i(px,py); + + /*if + ( + n[0] >= 0 && n[0] < S.width () && + n[1] >= 0 && n[1] < S.height() + )*/ + { + const float error = E(x+px,y+py)(0)/(patchSize*patchSize*N); + const float weight = 1.0f/(1.0f+error); + sumColor += weight*Vec(source(n(0),n(1))); + sumWeight += weight; + } + } + } + + const Vec v = Vec(sumColor/sumWeight); + target.write(x,y,v); + } +} +#endif + +template +Vec sampleBilinear(const Array2>& I,float x,float y) +{ + const int ix = x; + const int iy = y; + + const float s = x-ix; + const float t = y-iy; + + return Vec((1.0f-s)*(1.0f-t)*Vec(I(clamp(ix ,0,I.width()-1),clamp(iy ,0,I.height()-1)))+ + ( s)*(1.0f-t)*Vec(I(clamp(ix+1,0,I.width()-1),clamp(iy ,0,I.height()-1)))+ + (1.0f-s)*( t)*Vec(I(clamp(ix ,0,I.width()-1),clamp(iy+1,0,I.height()-1)))+ + ( s)*( t)*Vec(I(clamp(ix+1,0,I.width()-1),clamp(iy+1,0,I.height()-1)))); +}; + +/* +template +__global__ void krnlEvalMask( TexArray2<1,unsigned char> mask, + const TexArray2 style, + const TexArray2 style2, + const int stopThreshold) +{ + const int x = blockDim.x*blockIdx.x + threadIdx.x; + const int y = blockDim.y*blockIdx.y + threadIdx.y; + + if (x s = style(x,y); + const Vec s2 = style2(x,y); + + int maxDiff = 0; + for(int c=0;cmaxDiff ? diff:maxDiff; + } + + const Vec<1,unsigned char> msk = maxDiff < stopThreshold ? Vec<1,unsigned char>(0) : Vec<1,unsigned char>(255); + + mask.write(x,y,msk); + } +} + +__global__ void krnlDilateMask(TexArray2<1,unsigned char> mask2, + const TexArray2<1,unsigned char> mask, + const int patchSize) +{ + const int x = blockDim.x*blockIdx.x + threadIdx.x; + const int y = blockDim.y*blockIdx.y + threadIdx.y; + + if (x msk = Vec<1,unsigned char>(0); + + for (int py = -r; py <= +r; py++) + for (int px = -r; px <= +r; px++) + { + if (mask(x+px,y+py)[0]==255) { msk = Vec<1,unsigned char>(255); } + } + + mask2.write(x,y,msk); + } +} +*/ + +template +void resampleCPU( Array2>& O, + const Array2>& I) +{ + const float s = float(I.width())/float(O.width()); + + for(int y=0;y +struct PatchSSD_Split +{ + const Array2>& targetStyle; + const Array2>& sourceStyle; + + const Array2>& targetGuide; + const Array2>& sourceGuide; + + const Vec& styleWeights; + const Vec& guideWeights; + + PatchSSD_Split(const Array2>& targetStyle, + const Array2>& sourceStyle, + + const Array2>& targetGuide, + const Array2>& sourceGuide, + + const Vec& styleWeights, + const Vec& guideWeights) + + : targetStyle(targetStyle),sourceStyle(sourceStyle), + targetGuide(targetGuide),sourceGuide(sourceGuide), + styleWeights(styleWeights),guideWeights(guideWeights) {} + + float operator()(const int patchSize, + const V2i txy, + const V2i sxy, + const float ebest) + { + const int tx = txy(0); + const int ty = txy(1); + const int sx = sxy(0); + const int sy = sxy(1); + + const int r = patchSize/2; + float error = 0; + + if(tx-r>=0 && tx+r=0 && ty+rebest) { break; } + } + } + else + { + for(int py=-r;py<=+r;py++) + for(int px=-r;px<=+r;px++) + { + { + const Vec pixTs = targetStyle(clamp(tx + px,0,targetStyle.width()-1),clamp(ty + py,0,targetStyle.height()-1)); + const Vec pixSs = sourceStyle(clamp(sx + px,0,sourceStyle.width()-1),clamp(sy + py,0,sourceStyle.height()-1)); + for(int i=0;i pixTg = targetGuide(clamp(tx + px,0,targetGuide.width()-1),clamp(ty + py,0,targetGuide.height()-1)); + const Vec pixSg = sourceGuide(clamp(sx + px,0,sourceGuide.width()-1),clamp(sy + py,0,sourceGuide.height()-1)); + for(int i=0;i +struct PatchSSD_Split_Modulation +{ + const TexArray2 targetStyle; + const TexArray2 sourceStyle; + + const TexArray2 targetGuide; + const TexArray2 sourceGuide; + + const TexArray2 targetModulation; + + const Vec styleWeights; + const Vec guideWeights; + + PatchSSD_Split_Modulation(const TexArray2& targetStyle, + const TexArray2& sourceStyle, + + const TexArray2& targetGuide, + const TexArray2& sourceGuide, + + const TexArray2& targetModulation, + + const Vec& styleWeights, + const Vec& guideWeights) + + : targetStyle(targetStyle),sourceStyle(sourceStyle), + targetGuide(targetGuide),sourceGuide(sourceGuide), + targetModulation(targetModulation), + styleWeights(styleWeights),guideWeights(guideWeights) {} + + __device__ float operator()(const int patchSize, + const int tx, + const int ty, + const int sx, + const int sy, + const float ebest) + { + const int r = patchSize/2; + float error = 0; + + for(int py=-r;py<=+r;py++) + { + for(int px=-r;px<=+r;px++) + { + { + const Vec pixTs = targetStyle(tx + px,ty + py); + const Vec pixSs = sourceStyle(sx + px,sy + py); + for(int i=0;i pixTg = targetGuide(tx + px,ty + py); + const Vec pixSg = sourceGuide(sx + px,sy + py); + const Vec mult = Vec(targetModulation(tx,ty))/255.0f; + + for(int i=0;iebest) { return error; } + } + + return error; + } +}; +*/ + +static V2i pyramidLevelSize(const V2i& sizeBase,const int numLevels,const int level) +{ + return V2i(V2f(sizeBase)*std::pow(2.0f,-float(numLevels-1-level))); +} + +template +void copy(Array2* out_dst,void* src) +{ + Array2& dst = *out_dst; + memcpy(dst.data(),src,numel(dst)*sizeof(T)); +} + +template +void copy(void** out_dst,const Array2& src) +{ + void*& dst = *out_dst; + memcpy(dst,src.data(),numel(src)*sizeof(T)); +} + +void updateOmega(A2i& Omega,const V2i& sizeA,const int patchWidth,const V2i& axy,const V2i& bxy,const int incdec) +{ + const int r = patchWidth/2; + + int* ptr = (int*)&Omega(bxy(0)-r,bxy(1)-r); + const int ofs = (Omega.width()-patchWidth); + + for(int j=0;j +bool tryPatch(FUNC patchError,const V2i& sizeA,int patchWidth,const V2i& axy,const V2i& bxy,A2V2i& N,A2f& E,A2i& Omega,float omegaBest,float lambda) +{ + const float curOcc = (float(patchOmega(patchWidth,N(axy),Omega))/float(patchWidth*patchWidth))/omegaBest; + const float newOcc = (float(patchOmega(patchWidth, bxy,Omega))/float(patchWidth*patchWidth))/omegaBest; + + const float curErr = E(axy); + const float newErr = patchError(patchWidth,axy,bxy,curErr+lambda*curOcc); + + if ((newErr+lambda*newOcc) < (curErr+lambda*curOcc)) + { + updateOmega(Omega,sizeA,patchWidth,axy,bxy ,+1); + updateOmega(Omega,sizeA,patchWidth,axy,N(axy),-1); + N(axy) = bxy; + E(axy) = newErr; + } + + return true; +} + +template +void patchmatch(const V2i& sizeA, + const V2i& sizeB, + const int patchWidth, + FUNC patchError, + const float lambda, + const int numIters, + const int numThreads, + A2V2i& N, + A2f& E) +{ + const int w = patchWidth; + + E = nnfError(N,patchWidth,patchError); + + const float sra = 0.5f; + + std::vector irad; + + irad.push_back((sizeB(0) > sizeB(1) ? sizeB(0) : sizeB(1))); + + while (irad.back() != 1) irad.push_back(int(std::pow(sra, int(irad.size())) * irad[0])); + + const int nir = int(irad.size()); + + const int numThreads_ = numThreads<1 ? omp_get_max_threads() : numThreads; + const int minTileHeight = 8; + const int numTiles = int(ceil(float(sizeA(1))/float(numThreads_))) > minTileHeight ? numThreads_ : std::max(int(ceil(float(sizeA(1))/float(minTileHeight))),1); + const int tileHeight = sizeA(1)/numTiles; + + const float omegaBest = (float(sizeA(0)*sizeA(1)) / + float(sizeB(0)*sizeB(1))) * float(patchWidth*patchWidth); + A2i Omega(sizeB); + fill(&Omega,(int)0); + for(int y=0;y 0) : (x < sizeA(0)-1)) + { + V2i n = N(x-q,y); n[0] += q; + + if (odd ? (n[0] < sizeB(0)-w/2) : (n[0] >= w/2)) + { + tryPatch(patchError,sizeA,w,V2i(x,y),n,N,E,Omega,omegaBest,lambda); + } + } + + if (odd ? (y > 0) : (y = w/2)) + { + tryPatch(patchError,sizeA,w,V2i(x,y),n,N,E,Omega,omegaBest,lambda); + } + } + + #define RANDI(u) (18000 * ((u) & 65535) + ((u) >> 16)) + + unsigned int seed = (x | (y<<11)) ^ iter_seed; + seed = RANDI(seed); + + const V2i pix0 = N(x,y); + //for (int i = 0; i < nir; i++) + for (int i = nir-1; i >=0; i--) + { + V2i tl = pix0 - V2i(irad[i], irad[i]); + V2i br = pix0 + V2i(irad[i], irad[i]); + + tl = std::max(tl,V2i(w/2,w/2)); + br = std::min(br,sizeB-V2i(w/2,w/2)); + + const int _rndX = RANDI(seed); + const int _rndY = RANDI(_rndX); + seed=_rndY; + + const V2i n = V2i + ( + tl[0] + (_rndX % (br[0]-tl[0])), + tl[1] + (_rndY % (br[1]-tl[1])) + ); + + tryPatch(patchError,sizeA,w,V2i(x,y),n,N,E,Omega,omegaBest,lambda); + } + + #undef RANDI + } + } + } +} + +template +void ebsynthCpu(int numStyleChannels, + int numGuideChannels, + int sourceWidth, + int sourceHeight, + void* sourceStyleData, + void* sourceGuideData, + int targetWidth, + int targetHeight, + void* targetGuideData, + void* targetModulationData, + float* styleWeights, + float* guideWeights, + float uniformityWeight, + int patchSize, + int voteMode, + int numPyramidLevels, + int* numSearchVoteItersPerLevel, + int* numPatchMatchItersPerLevel, + int* stopThresholdPerLevel, + void* outputNnfData, + void* outputImageData) +{ + const int levelCount = numPyramidLevels; + + struct PyramidLevel + { + PyramidLevel() { } + + int sourceWidth; + int sourceHeight; + int targetWidth; + int targetHeight; + + Array2> sourceStyle; + Array2> sourceGuide; + Array2> targetStyle; + Array2> targetStyle2; + //Array2 mask; + //Array2 mask2; + Array2> targetGuide; + Array2> targetModulation; + Array2> NNF; + //Array2> NNF2; + Array2 E; + //Array2 Omega; + }; + + std::vector pyramid(levelCount); + for(int level=0;level>(V2i(pyramid[levelCount-1].sourceWidth,pyramid[levelCount-1].sourceHeight)); + pyramid[levelCount-1].sourceGuide = Array2>(V2i(pyramid[levelCount-1].sourceWidth,pyramid[levelCount-1].sourceHeight)); + pyramid[levelCount-1].targetGuide = Array2>(V2i(pyramid[levelCount-1].targetWidth,pyramid[levelCount-1].targetHeight)); + + copy(&pyramid[levelCount-1].sourceStyle,sourceStyleData); + copy(&pyramid[levelCount-1].sourceGuide,sourceGuideData); + copy(&pyramid[levelCount-1].targetGuide,targetGuideData); + + if (targetModulationData) + { + pyramid[levelCount-1].targetModulation = Array2>(V2i(pyramid[levelCount-1].targetWidth,pyramid[levelCount-1].targetHeight)); + copy(&pyramid[levelCount-1].targetModulation,targetModulationData); + } + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + bool inExtraPass = false; + + for (int level=0;level>(levelTargetSize); + pyramid[level].targetStyle2 = Array2>(levelTargetSize); + //pyramid[level].mask = Array2(levelTargetSize); + //pyramid[level].mask2 = Array2(levelTargetSize); + pyramid[level].NNF = Array2>(levelTargetSize); + //pyramid[level].NNF2 = Array2>(levelTargetSize); + //pyramid[level].Omega = Array2(levelSourceSize); + pyramid[level].E = Array2(levelTargetSize); + + if (level>(levelSourceSize); + pyramid[level].sourceGuide = Array2>(levelSourceSize); + pyramid[level].targetGuide = Array2>(levelTargetSize); + + resampleCPU(pyramid[level].sourceStyle,pyramid[levelCount-1].sourceStyle); + resampleCPU(pyramid[level].sourceGuide,pyramid[levelCount-1].sourceGuide); + resampleCPU(pyramid[level].targetGuide,pyramid[levelCount-1].targetGuide); + + if (targetModulationData) + { + resampleCPU(pyramid[level].targetModulation,pyramid[levelCount-1].targetModulation); + pyramid[level].targetModulation = Array2>(levelTargetSize); + } + } + + ///////////////////////////////////////////////////////////////////////////// + + if (!inExtraPass) + { + A2V2i cpu_NNF; + if (level>0) + { + pyramid[level].NNF = nnfUpscale(pyramid[level-1].NNF, + patchSize, + V2i(pyramid[level].targetWidth,pyramid[level].targetHeight), + V2i(pyramid[level].sourceWidth,pyramid[level].sourceHeight)); + + pyramid[level-1].NNF = A2V2i(); + } + else + { + pyramid[level].NNF = nnfInitRandom(V2i(pyramid[level].targetWidth,pyramid[level].targetHeight), + V2i(pyramid[level].sourceWidth,pyramid[level].sourceHeight), + patchSize); + } + + ///////////////////////////////////////////////////////////////////////// + /* + Array2 cpu_Omega(pyramid[level].sourceWidth,pyramid[level].sourceHeight); + + fill(&cpu_Omega,(int)0); + for(int ay=0;ay> cpu_mask(V2i(pyramid[level].targetWidth,pyramid[level].targetHeight)); + //fill(&cpu_mask,Vec<1,unsigned char>(255)); + //copy(&pyramid[level].mask,cpu_mask); + + //////////////////////////////////////////////////////////////////////////// + + for (int voteIter=0;voteIter styleWeightsVec; + for(int i=0;i guideWeightsVec; + for(int i=0;i0) + { + /*if (targetModulationData) + { + patchmatchGPU(V2i(pyramid[level].targetWidth,pyramid[level].targetHeight), + V2i(pyramid[level].sourceWidth,pyramid[level].sourceHeight), + pyramid[level].Omega, + patchSize, + PatchSSD_Split_Modulation(pyramid[level].targetStyle, + pyramid[level].sourceStyle, + pyramid[level].targetGuide, + pyramid[level].sourceGuide, + pyramid[level].targetModulation, + styleWeightsVec, + guideWeightsVec), + uniformityWeight, + numPatchMatchItersPerLevel[level], + numGpuThreadsPerBlock, + pyramid[level].NNF, + pyramid[level].NNF2, + pyramid[level].E, + pyramid[level].mask, + rngStates); + } + else*/ + { + patchmatch(V2i(pyramid[level].targetWidth,pyramid[level].targetHeight), + V2i(pyramid[level].sourceWidth,pyramid[level].sourceHeight), + patchSize, + PatchSSD_Split(pyramid[level].targetStyle, + pyramid[level].sourceStyle, + pyramid[level].targetGuide, + pyramid[level].sourceGuide, + styleWeightsVec, + guideWeightsVec), + uniformityWeight, + numPatchMatchItersPerLevel[level], + -1, + pyramid[level].NNF, + pyramid[level].E); + } + } + /* + else + { + if (targetModulationData) + { + krnlEvalErrorPass<<>>(patchSize, + PatchSSD_Split_Modulation(pyramid[level].targetStyle, + pyramid[level].sourceStyle, + pyramid[level].targetGuide, + pyramid[level].sourceGuide, + pyramid[level].targetModulation, + styleWeightsVec, + guideWeightsVec), + pyramid[level].NNF, + pyramid[level].E); + } + else + { + krnlEvalErrorPass<<>>(patchSize, + PatchSSD_Split(pyramid[level].targetStyle, + pyramid[level].sourceStyle, + pyramid[level].targetGuide, + pyramid[level].sourceGuide, + styleWeightsVec, + guideWeightsVec), + pyramid[level].NNF, + pyramid[level].E); + } + checkCudaError( cudaDeviceSynchronize() ); + } + */ + { + //if (voteMode==EBSYNTH_VOTEMODE_PLAIN) + { + krnlVotePlain(pyramid[level].targetStyle2, + pyramid[level].sourceStyle, + pyramid[level].NNF, + patchSize); + } + /*else if (voteMode==EBSYNTH_VOTEMODE_WEIGHTED) + { + krnlVoteWeighted<<>>(pyramid[level].targetStyle2, + pyramid[level].sourceStyle, + pyramid[level].NNF, + pyramid[level].E, + patchSize); + }*/ + + std::swap(pyramid[level].targetStyle2,pyramid[level].targetStyle); + + /* + if (voteIter>>(pyramid[level].mask, + pyramid[level].targetStyle, + pyramid[level].targetStyle2, + stopThresholdPerLevel[level]); + checkCudaError( cudaDeviceSynchronize() ); + + krnlDilateMask<<>>(pyramid[level].mask2, + pyramid[level].mask, + patchSize); + std::swap(pyramid[level].mask2,pyramid[level].mask); + checkCudaError( cudaDeviceSynchronize() ); + } + */ + } + } + + if (level==levelCount-1) + { + if (outputNnfData!=NULL) { copy(&outputNnfData,pyramid[level].NNF); } + copy(&outputImageData,pyramid[level].targetStyle); + } + + pyramid[level].sourceStyle = Array2>(); + pyramid[level].sourceGuide = Array2>(); + pyramid[level].targetGuide = Array2>(); + pyramid[level].targetStyle = Array2>(); + pyramid[level].targetStyle2 = Array2>(); + //pyramid[level].mask = Array2(); + //pyramid[level].mask2 = Array2(); + //pyramid[level].NNF2 = Array2>(); + //pyramid[level].Omega = Array2(); + pyramid[level].E = Array2(); + if (targetModulationData) { pyramid[level].targetModulation = Array2>(); } + } + + pyramid[levelCount-1].NNF = Array2>(); +} + +void ebsynthRunCpu(int numStyleChannels, + int numGuideChannels, + int sourceWidth, + int sourceHeight, + void* sourceStyleData, + void* sourceGuideData, + int targetWidth, + int targetHeight, + void* targetGuideData, + void* targetModulationData, + float* styleWeights, + float* guideWeights, + float uniformityWeight, + int patchSize, + int voteMode, + int numPyramidLevels, + int* numSearchVoteItersPerLevel, + int* numPatchMatchItersPerLevel, + int* stopThresholdPerLevel, + void* outputNnfData, + void* outputImageData) +{ + void (*const dispatchEbsynth[EBSYNTH_MAX_GUIDE_CHANNELS][EBSYNTH_MAX_STYLE_CHANNELS])(int,int,int,int,void*,void*,int,int,void*,void*,float*,float*,float,int,int,int,int*,int*,int*,void*,void*) = + { + { ebsynthCpu<1, 1>, ebsynthCpu<2, 1>, ebsynthCpu<3, 1>, ebsynthCpu<4, 1>, ebsynthCpu<5, 1>, ebsynthCpu<6, 1>, ebsynthCpu<7, 1>, ebsynthCpu<8, 1> }, + { ebsynthCpu<1, 2>, ebsynthCpu<2, 2>, ebsynthCpu<3, 2>, ebsynthCpu<4, 2>, ebsynthCpu<5, 2>, ebsynthCpu<6, 2>, ebsynthCpu<7, 2>, ebsynthCpu<8, 2> }, + { ebsynthCpu<1, 3>, ebsynthCpu<2, 3>, ebsynthCpu<3, 3>, ebsynthCpu<4, 3>, ebsynthCpu<5, 3>, ebsynthCpu<6, 3>, ebsynthCpu<7, 3>, ebsynthCpu<8, 3> }, + { ebsynthCpu<1, 4>, ebsynthCpu<2, 4>, ebsynthCpu<3, 4>, ebsynthCpu<4, 4>, ebsynthCpu<5, 4>, ebsynthCpu<6, 4>, ebsynthCpu<7, 4>, ebsynthCpu<8, 4> }, + { ebsynthCpu<1, 5>, ebsynthCpu<2, 5>, ebsynthCpu<3, 5>, ebsynthCpu<4, 5>, ebsynthCpu<5, 5>, ebsynthCpu<6, 5>, ebsynthCpu<7, 5>, ebsynthCpu<8, 5> }, + { ebsynthCpu<1, 6>, ebsynthCpu<2, 6>, ebsynthCpu<3, 6>, ebsynthCpu<4, 6>, ebsynthCpu<5, 6>, ebsynthCpu<6, 6>, ebsynthCpu<7, 6>, ebsynthCpu<8, 6> }, + { ebsynthCpu<1, 7>, ebsynthCpu<2, 7>, ebsynthCpu<3, 7>, ebsynthCpu<4, 7>, ebsynthCpu<5, 7>, ebsynthCpu<6, 7>, ebsynthCpu<7, 7>, ebsynthCpu<8, 7> }, + { ebsynthCpu<1, 8>, ebsynthCpu<2, 8>, ebsynthCpu<3, 8>, ebsynthCpu<4, 8>, ebsynthCpu<5, 8>, ebsynthCpu<6, 8>, ebsynthCpu<7, 8>, ebsynthCpu<8, 8> }, + { ebsynthCpu<1, 9>, ebsynthCpu<2, 9>, ebsynthCpu<3, 9>, ebsynthCpu<4, 9>, ebsynthCpu<5, 9>, ebsynthCpu<6, 9>, ebsynthCpu<7, 9>, ebsynthCpu<8, 9> }, + { ebsynthCpu<1,10>, ebsynthCpu<2,10>, ebsynthCpu<3,10>, ebsynthCpu<4,10>, ebsynthCpu<5,10>, ebsynthCpu<6,10>, ebsynthCpu<7,10>, ebsynthCpu<8,10> }, + { ebsynthCpu<1,11>, ebsynthCpu<2,11>, ebsynthCpu<3,11>, ebsynthCpu<4,11>, ebsynthCpu<5,11>, ebsynthCpu<6,11>, ebsynthCpu<7,11>, ebsynthCpu<8,11> }, + { ebsynthCpu<1,12>, ebsynthCpu<2,12>, ebsynthCpu<3,12>, ebsynthCpu<4,12>, ebsynthCpu<5,12>, ebsynthCpu<6,12>, ebsynthCpu<7,12>, ebsynthCpu<8,12> }, + { ebsynthCpu<1,13>, ebsynthCpu<2,13>, ebsynthCpu<3,13>, ebsynthCpu<4,13>, ebsynthCpu<5,13>, ebsynthCpu<6,13>, ebsynthCpu<7,13>, ebsynthCpu<8,13> }, + { ebsynthCpu<1,14>, ebsynthCpu<2,14>, ebsynthCpu<3,14>, ebsynthCpu<4,14>, ebsynthCpu<5,14>, ebsynthCpu<6,14>, ebsynthCpu<7,14>, ebsynthCpu<8,14> }, + { ebsynthCpu<1,15>, ebsynthCpu<2,15>, ebsynthCpu<3,15>, ebsynthCpu<4,15>, ebsynthCpu<5,15>, ebsynthCpu<6,15>, ebsynthCpu<7,15>, ebsynthCpu<8,15> }, + { ebsynthCpu<1,16>, ebsynthCpu<2,16>, ebsynthCpu<3,16>, ebsynthCpu<4,16>, ebsynthCpu<5,16>, ebsynthCpu<6,16>, ebsynthCpu<7,16>, ebsynthCpu<8,16> }, + { ebsynthCpu<1,17>, ebsynthCpu<2,17>, ebsynthCpu<3,17>, ebsynthCpu<4,17>, ebsynthCpu<5,17>, ebsynthCpu<6,17>, ebsynthCpu<7,17>, ebsynthCpu<8,17> }, + { ebsynthCpu<1,18>, ebsynthCpu<2,18>, ebsynthCpu<3,18>, ebsynthCpu<4,18>, ebsynthCpu<5,18>, ebsynthCpu<6,18>, ebsynthCpu<7,18>, ebsynthCpu<8,18> }, + { ebsynthCpu<1,19>, ebsynthCpu<2,19>, ebsynthCpu<3,19>, ebsynthCpu<4,19>, ebsynthCpu<5,19>, ebsynthCpu<6,19>, ebsynthCpu<7,19>, ebsynthCpu<8,19> }, + { ebsynthCpu<1,20>, ebsynthCpu<2,20>, ebsynthCpu<3,20>, ebsynthCpu<4,20>, ebsynthCpu<5,20>, ebsynthCpu<6,20>, ebsynthCpu<7,20>, ebsynthCpu<8,20> }, + { ebsynthCpu<1,21>, ebsynthCpu<2,21>, ebsynthCpu<3,21>, ebsynthCpu<4,21>, ebsynthCpu<5,21>, ebsynthCpu<6,21>, ebsynthCpu<7,21>, ebsynthCpu<8,21> }, + { ebsynthCpu<1,22>, ebsynthCpu<2,22>, ebsynthCpu<3,22>, ebsynthCpu<4,22>, ebsynthCpu<5,22>, ebsynthCpu<6,22>, ebsynthCpu<7,22>, ebsynthCpu<8,22> }, + { ebsynthCpu<1,23>, ebsynthCpu<2,23>, ebsynthCpu<3,23>, ebsynthCpu<4,23>, ebsynthCpu<5,23>, ebsynthCpu<6,23>, ebsynthCpu<7,23>, ebsynthCpu<8,23> }, + { ebsynthCpu<1,24>, ebsynthCpu<2,24>, ebsynthCpu<3,24>, ebsynthCpu<4,24>, ebsynthCpu<5,24>, ebsynthCpu<6,24>, ebsynthCpu<7,24>, ebsynthCpu<8,24> } + }; + + if (numStyleChannels>=1 && numStyleChannels<=EBSYNTH_MAX_STYLE_CHANNELS && + numGuideChannels>=1 && numGuideChannels<=EBSYNTH_MAX_GUIDE_CHANNELS) + { + dispatchEbsynth[numGuideChannels-1][numStyleChannels-1](numStyleChannels, + numGuideChannels, + sourceWidth, + sourceHeight, + sourceStyleData, + sourceGuideData, + targetWidth, + targetHeight, + targetGuideData, + targetModulationData, + styleWeights, + guideWeights, + uniformityWeight, + patchSize, + voteMode, + numPyramidLevels, + numSearchVoteItersPerLevel, + numPatchMatchItersPerLevel, + stopThresholdPerLevel, + outputNnfData, + outputImageData); + } +} + +int ebsynthBackendAvailableCpu() +{ + return 1; +} diff --git a/src/ebsynth_cpu.h b/src/ebsynth_cpu.h new file mode 100644 index 0000000..03dbfcf --- /dev/null +++ b/src/ebsynth_cpu.h @@ -0,0 +1,32 @@ +// This software is in the public domain. Where that dedication is not +// recognized, you are granted a perpetual, irrevocable license to copy +// and modify this file as you see fit. + +#ifndef EBSYNTH_CPU_H_ +#define EBSYNTH_CPU_H_ + +void ebsynthRunCpu(int numStyleChannels, + int numGuideChannels, + int sourceWidth, + int sourceHeight, + void* sourceStyleData, + void* sourceGuideData, + int targetWidth, + int targetHeight, + void* targetGuideData, + void* targetModulationData, + float* styleWeights, + float* guideWeights, + float uniformityWeight, + int patchSize, + int voteMode, + int numPyramidLevels, + int* numSearchVoteItersPerLevel, + int* numPatchMatchItersPerLevel, + int* stopThresholdPerLevel, + void* outputNnfData, + void* outputImageData); + +int ebsynthBackendAvailableCpu(); + +#endif diff --git a/src/ebsynth.cu b/src/ebsynth_cuda.cu similarity index 56% rename from src/ebsynth.cu rename to src/ebsynth_cuda.cu index 582d6bb..b0ef1bb 100644 --- a/src/ebsynth.cu +++ b/src/ebsynth_cuda.cu @@ -3,11 +3,369 @@ // and modify this file as you see fit. #include "ebsynth.h" -#include "patchmatch_gpu.h" +#include "ebsynth_cuda_texarray2.h" +#include "ebsynth_cuda_memarray2.h" + +#include +#include +#include #define FOR(A,X,Y) for(int Y=0;Y V1f; +typedef Array2> A2V1f; + +struct pcgState +{ + uint64_t state; + uint64_t increment; +}; + +__device__ void pcgAdvance(pcgState* rng) +{ + rng->state = rng->state * 6364136223846793005ULL + rng->increment; +} + +__device__ uint32_t pcgOutput(uint64_t state) +{ + return (uint32_t)(((state >> 22u) ^ state) >> ((state >> 61u) + 22u)); +} + +__device__ uint32_t pcgRand(pcgState* rng) +{ + uint64_t oldstate = rng->state; + pcgAdvance(rng); + return pcgOutput(oldstate); +} + +__device__ void pcgInit(pcgState* rng,uint64_t seed,uint64_t stream) +{ + rng->state = 0U; + rng->increment = (stream << 1u) | 1u; + pcgAdvance(rng); + rng->state += seed; + pcgAdvance(rng); +} + +__global__ void krnlInitRngStates(const int width, + const int height, + pcgState* rngStates) +{ + const int x = blockDim.x*blockIdx.x + threadIdx.x; + const int y = blockDim.y*blockIdx.y + threadIdx.y; + + if (x>>(width,height,gpuRngStates); + + return gpuRngStates; +} + +template +__global__ void krnlEvalErrorPass(const int patchWidth, + FUNC patchError, + const TexArray2<2,int> NNF, + TexArray2<1,float> E) +{ + const int x = blockDim.x*blockIdx.x + threadIdx.x; + const int y = blockDim.y*blockIdx.y + threadIdx.y; + + if (x& Omega,const int patchWidth,const int bx,const int by,const int incdec) +{ + const int r = patchWidth/2; + + for(int oy=-r;oy<=+r;oy++) + for(int ox=-r;ox<=+r;ox++) + { + const int x = bx+ox; + const int y = by+oy; + atomicAdd(&Omega.data[x+y*Omega.width],incdec); + //Omega.data[x+y*Omega.width] += incdec; + } +} + +int __device__ patchOmega(const int patchWidth,const int bx,const int by,const MemArray2& Omega) +{ + const int r = patchWidth/2; + + int sum = 0; + + for(int oy=-r;oy<=+r;oy++) + for(int ox=-r;ox<=+r;ox++) + { + const int x = bx+ox; + const int y = by+oy; + sum += Omega.data[x+y*Omega.width]; /// XXX: atomic read instead ?? + } + + return sum; +} + +template +__device__ void tryPatch(const V2i& sizeA, + const V2i& sizeB, + MemArray2& Omega, + const int patchWidth, + FUNC patchError, + const float lambda, + const int ax, + const int ay, + const int bx, + const int by, + V2i& nbest, + float& ebest) +{ + const float omegaBest = (float(sizeA(0)*sizeA(1)) / + float(sizeB(0)*sizeB(1))) * float(patchWidth*patchWidth); + + const float curOcc = (float(patchOmega(patchWidth,nbest(0),nbest(1),Omega))/float(patchWidth*patchWidth))/omegaBest; + const float newOcc = (float(patchOmega(patchWidth, bx, by,Omega))/float(patchWidth*patchWidth))/omegaBest; + + const float curErr = ebest; + const float newErr = patchError(patchWidth,ax,ay,bx,by,curErr+lambda*curOcc); + + if ((newErr+lambda*newOcc) < (curErr+lambda*curOcc)) + { + updateOmega(Omega,patchWidth, bx, by,+1); + updateOmega(Omega,patchWidth,nbest(0),nbest(1),-1); + nbest = V2i(bx,by); + ebest = newErr; + } +} + +template +__device__ void tryNeighborsOffset(const int x, + const int y, + const int ox, + const int oy, + V2i& nbest, + float& ebest, + const V2i& sizeA, + const V2i& sizeB, + MemArray2& Omega, + const int patchWidth, + FUNC patchError, + const float lambda, + const TexArray2<2,int>& NNF) +{ + const int hpw = patchWidth/2; + + const V2i on = NNF(x+ox,y+oy); + const int nx = on(0)-ox; + const int ny = on(1)-oy; + + if (nx>=hpw && nx=hpw && ny +__global__ void krnlPropagationPass(const V2i sizeA, + const V2i sizeB, + MemArray2 Omega, + const int patchWidth, + FUNC patchError, + const float lambda, + const int r, + const TexArray2<2,int> NNF, + TexArray2<2,int> NNF2, + TexArray2<1,float> E, + TexArray2<1,unsigned char> mask) +{ + const int x = blockDim.x*blockIdx.x + threadIdx.x; + const int y = blockDim.y*blockIdx.y + threadIdx.y; + + if (x +__device__ void tryRandomOffsetInRadius(const int r, + const V2i& sizeA, + const V2i& sizeB, + MemArray2& Omega, + const int patchWidth, + FUNC patchError, + const float lambda, + const int x, + const int y, + const V2i& norg, + V2i& nbest, + float& ebest, + pcgState* rngState) +{ + const int hpw = patchWidth/2; + + const int xmin = max(norg(0)-r,hpw); + const int xmax = min(norg(0)+r,sizeB(0)-1-hpw); + const int ymin = max(norg(1)-r,hpw); + const int ymax = min(norg(1)+r,sizeB(1)-1-hpw); + + const int nx = xmin+(pcgRand(rngState)%(xmax-xmin+1)); + const int ny = ymin+(pcgRand(rngState)%(ymax-ymin+1)); + + tryPatch(sizeA,sizeB,Omega,patchWidth,patchError,lambda,x,y,nx,ny,nbest,ebest); +} + +/* +template +__global__ void krnlRandomSearchPass(const V2i sizeA, + const V2i sizeB, + MemArray2 Omega, + const int patchWidth, + FUNC patchError, + const float lambda, + TexArray2<2,int> NNF, + TexArray2<1,float> E, + TexArray2<1,unsigned char> mask, + pcgState* rngStates) +{ + const int x = blockDim.x*blockIdx.x + threadIdx.x; + const int y = blockDim.y*blockIdx.y + threadIdx.y; + + if (x +__global__ void krnlRandomSearchPass(const V2i sizeA, + const V2i sizeB, + MemArray2 Omega, + const int patchWidth, + FUNC patchError, + const float lambda, + const int radius, + TexArray2<2,int> NNF, + TexArray2<1,float> E, + TexArray2<1,unsigned char> mask, + pcgState* rngStates) +{ + const int x = blockDim.x*blockIdx.x + threadIdx.x; + const int y = blockDim.y*blockIdx.y + threadIdx.y; + + if (x +void patchmatchGPU(const V2i sizeA, + const V2i sizeB, + MemArray2& Omega, + const int patchWidth, + FUNC patchError, + const float lambda, + const int numIters, + const int numThreadsPerBlock, + TexArray2<2,int>& NNF, + TexArray2<2,int>& NNF2, + TexArray2<1,float>& E, + TexArray2<1,unsigned char>& mask, + pcgState* rngStates) +{ + const dim3 threadsPerBlock = dim3(numThreadsPerBlock,numThreadsPerBlock); + const dim3 numBlocks = dim3((NNF.width+threadsPerBlock.x)/threadsPerBlock.x, + (NNF.height+threadsPerBlock.y)/threadsPerBlock.y); + + krnlEvalErrorPass<<>>(patchWidth,patchError,NNF,E); + + checkCudaError(cudaDeviceSynchronize()); + + for(int i=0;i>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,4,NNF,NNF2,E,mask); std::swap(NNF,NNF2); + + checkCudaError(cudaDeviceSynchronize()); + + krnlPropagationPass<<>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,2,NNF,NNF2,E,mask); std::swap(NNF,NNF2); + + checkCudaError(cudaDeviceSynchronize()); + + krnlPropagationPass<<>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,1,NNF,NNF2,E,mask); std::swap(NNF,NNF2); + + checkCudaError(cudaDeviceSynchronize()); + + for(int r=1;r>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,r,NNF,E,mask,rngStates); + } + + checkCudaError(cudaDeviceSynchronize()); + } + + krnlEvalErrorPass<<>>(patchWidth,patchError,NNF,E); + + checkCudaError(cudaDeviceSynchronize()); +} + +static A2V2i nnfInitRandom(const V2i& targetSize, const V2i& sourceSize, const int patchSize) { @@ -26,7 +384,7 @@ A2V2i nnfInitRandom(const V2i& targetSize, return NNF; } -A2V2i nnfUpscale(const A2V2i& NNF, +static A2V2i nnfUpscale(const A2V2i& NNF, const int patchSize, const V2i& targetSize, const V2i& sourceSize) @@ -381,33 +739,33 @@ struct PatchSSD_Split_Modulation } }; -V2i pyramidLevelSize(const V2i& sizeBase,const int numLevels,const int level) +static V2i pyramidLevelSize(const V2i& sizeBase,const int numLevels,const int level) { - return V2i(V2f(sizeBase)*pow(2.0f,-float(numLevels-1-level))); + return V2i(V2f(sizeBase)*std::pow(2.0f,-float(numLevels-1-level))); } template -void runEbsynth(int ebsynthBackend, - int numStyleChannels, - int numGuideChannels, - int sourceWidth, - int sourceHeight, - void* sourceStyleData, - void* sourceGuideData, - int targetWidth, - int targetHeight, - void* targetGuideData, - void* targetModulationData, - float* styleWeights, - float* guideWeights, - float uniformityWeight, - int patchSize, - int voteMode, - int numPyramidLevels, - int* numSearchVoteItersPerLevel, - int* numPatchMatchItersPerLevel, - int* stopThresholdPerLevel, - void* outputData) +void ebsynthCuda(int numStyleChannels, + int numGuideChannels, + int sourceWidth, + int sourceHeight, + void* sourceStyleData, + void* sourceGuideData, + int targetWidth, + int targetHeight, + void* targetGuideData, + void* targetModulationData, + float* styleWeights, + float* guideWeights, + float uniformityWeight, + int patchSize, + int voteMode, + int numPyramidLevels, + int* numSearchVoteItersPerLevel, + int* numPatchMatchItersPerLevel, + int* stopThresholdPerLevel, + void* outputNnfData, + void* outputImageData) { const int levelCount = numPyramidLevels; @@ -706,7 +1064,11 @@ void runEbsynth(int ebsynthBackend, } } - if (level==levelCount-1) { copy(&outputData,pyramid[pyramid.size()-1].targetStyle); } + if (level==levelCount-1) + { + if (outputNnfData!=NULL) { copy(&outputNnfData,pyramid[level].NNF); } + copy(&outputImageData,pyramid[level].targetStyle); + } pyramid[level].sourceStyle.destroy(); pyramid[level].sourceGuide.destroy(); @@ -726,62 +1088,60 @@ void runEbsynth(int ebsynthBackend, checkCudaError( cudaFree(rngStates) ); } -EBSYNTH_API void ebsynthRun(int ebsynthBackend, - int numStyleChannels, - int numGuideChannels, - int sourceWidth, - int sourceHeight, - void* sourceStyleData, - void* sourceGuideData, - int targetWidth, - int targetHeight, - void* targetGuideData, - void* targetModulationData, - float* styleWeights, - float* guideWeights, - float uniformityWeight, - int patchSize, - int voteMode, - int numPyramidLevels, - int* numSearchVoteItersPerLevel, - int* numPatchMatchItersPerLevel, - int* stopThresholdPerLevel, - void* outputData - ) +void ebsynthRunCuda(int numStyleChannels, + int numGuideChannels, + int sourceWidth, + int sourceHeight, + void* sourceStyleData, + void* sourceGuideData, + int targetWidth, + int targetHeight, + void* targetGuideData, + void* targetModulationData, + float* styleWeights, + float* guideWeights, + float uniformityWeight, + int patchSize, + int voteMode, + int numPyramidLevels, + int* numSearchVoteItersPerLevel, + int* numPatchMatchItersPerLevel, + int* stopThresholdPerLevel, + void* outputNnfData, + void* outputImageData) { - void (*const dispatchEbsynth[EBSYNTH_MAX_GUIDE_CHANNELS][EBSYNTH_MAX_STYLE_CHANNELS])(int,int,int,int,int,void*,void*,int,int,void*,void*,float*,float*,float,int,int,int,int*,int*,int*,void*) = + void (*const dispatchEbsynth[EBSYNTH_MAX_GUIDE_CHANNELS][EBSYNTH_MAX_STYLE_CHANNELS])(int,int,int,int,void*,void*,int,int,void*,void*,float*,float*,float,int,int,int,int*,int*,int*,void*,void*) = { - { runEbsynth<1, 1>, runEbsynth<2, 1>, runEbsynth<3, 1>, runEbsynth<4, 1>, runEbsynth<5, 1>, runEbsynth<6, 1>, runEbsynth<7, 1>, runEbsynth<8, 1> }, - { runEbsynth<1, 2>, runEbsynth<2, 2>, runEbsynth<3, 2>, runEbsynth<4, 2>, runEbsynth<5, 2>, runEbsynth<6, 2>, runEbsynth<7, 2>, runEbsynth<8, 2> }, - { runEbsynth<1, 3>, runEbsynth<2, 3>, runEbsynth<3, 3>, runEbsynth<4, 3>, runEbsynth<5, 3>, runEbsynth<6, 3>, runEbsynth<7, 3>, runEbsynth<8, 3> }, - { runEbsynth<1, 4>, runEbsynth<2, 4>, runEbsynth<3, 4>, runEbsynth<4, 4>, runEbsynth<5, 4>, runEbsynth<6, 4>, runEbsynth<7, 4>, runEbsynth<8, 4> }, - { runEbsynth<1, 5>, runEbsynth<2, 5>, runEbsynth<3, 5>, runEbsynth<4, 5>, runEbsynth<5, 5>, runEbsynth<6, 5>, runEbsynth<7, 5>, runEbsynth<8, 5> }, - { runEbsynth<1, 6>, runEbsynth<2, 6>, runEbsynth<3, 6>, runEbsynth<4, 6>, runEbsynth<5, 6>, runEbsynth<6, 6>, runEbsynth<7, 6>, runEbsynth<8, 6> }, - { runEbsynth<1, 7>, runEbsynth<2, 7>, runEbsynth<3, 7>, runEbsynth<4, 7>, runEbsynth<5, 7>, runEbsynth<6, 7>, runEbsynth<7, 7>, runEbsynth<8, 7> }, - { runEbsynth<1, 8>, runEbsynth<2, 8>, runEbsynth<3, 8>, runEbsynth<4, 8>, runEbsynth<5, 8>, runEbsynth<6, 8>, runEbsynth<7, 8>, runEbsynth<8, 8> }, - { runEbsynth<1, 9>, runEbsynth<2, 9>, runEbsynth<3, 9>, runEbsynth<4, 9>, runEbsynth<5, 9>, runEbsynth<6, 9>, runEbsynth<7, 9>, runEbsynth<8, 9> }, - { runEbsynth<1,10>, runEbsynth<2,10>, runEbsynth<3,10>, runEbsynth<4,10>, runEbsynth<5,10>, runEbsynth<6,10>, runEbsynth<7,10>, runEbsynth<8,10> }, - { runEbsynth<1,11>, runEbsynth<2,11>, runEbsynth<3,11>, runEbsynth<4,11>, runEbsynth<5,11>, runEbsynth<6,11>, runEbsynth<7,11>, runEbsynth<8,11> }, - { runEbsynth<1,12>, runEbsynth<2,12>, runEbsynth<3,12>, runEbsynth<4,12>, runEbsynth<5,12>, runEbsynth<6,12>, runEbsynth<7,12>, runEbsynth<8,12> }, - { runEbsynth<1,13>, runEbsynth<2,13>, runEbsynth<3,13>, runEbsynth<4,13>, runEbsynth<5,13>, runEbsynth<6,13>, runEbsynth<7,13>, runEbsynth<8,13> }, - { runEbsynth<1,14>, runEbsynth<2,14>, runEbsynth<3,14>, runEbsynth<4,14>, runEbsynth<5,14>, runEbsynth<6,14>, runEbsynth<7,14>, runEbsynth<8,14> }, - { runEbsynth<1,15>, runEbsynth<2,15>, runEbsynth<3,15>, runEbsynth<4,15>, runEbsynth<5,15>, runEbsynth<6,15>, runEbsynth<7,15>, runEbsynth<8,15> }, - { runEbsynth<1,16>, runEbsynth<2,16>, runEbsynth<3,16>, runEbsynth<4,16>, runEbsynth<5,16>, runEbsynth<6,16>, runEbsynth<7,16>, runEbsynth<8,16> }, - { runEbsynth<1,17>, runEbsynth<2,17>, runEbsynth<3,17>, runEbsynth<4,17>, runEbsynth<5,17>, runEbsynth<6,17>, runEbsynth<7,17>, runEbsynth<8,17> }, - { runEbsynth<1,18>, runEbsynth<2,18>, runEbsynth<3,18>, runEbsynth<4,18>, runEbsynth<5,18>, runEbsynth<6,18>, runEbsynth<7,18>, runEbsynth<8,18> }, - { runEbsynth<1,19>, runEbsynth<2,19>, runEbsynth<3,19>, runEbsynth<4,19>, runEbsynth<5,19>, runEbsynth<6,19>, runEbsynth<7,19>, runEbsynth<8,19> }, - { runEbsynth<1,20>, runEbsynth<2,20>, runEbsynth<3,20>, runEbsynth<4,20>, runEbsynth<5,20>, runEbsynth<6,20>, runEbsynth<7,20>, runEbsynth<8,20> }, - { runEbsynth<1,21>, runEbsynth<2,21>, runEbsynth<3,21>, runEbsynth<4,21>, runEbsynth<5,21>, runEbsynth<6,21>, runEbsynth<7,21>, runEbsynth<8,21> }, - { runEbsynth<1,22>, runEbsynth<2,22>, runEbsynth<3,22>, runEbsynth<4,22>, runEbsynth<5,22>, runEbsynth<6,22>, runEbsynth<7,22>, runEbsynth<8,22> }, - { runEbsynth<1,23>, runEbsynth<2,23>, runEbsynth<3,23>, runEbsynth<4,23>, runEbsynth<5,23>, runEbsynth<6,23>, runEbsynth<7,23>, runEbsynth<8,23> }, - { runEbsynth<1,24>, runEbsynth<2,24>, runEbsynth<3,24>, runEbsynth<4,24>, runEbsynth<5,24>, runEbsynth<6,24>, runEbsynth<7,24>, runEbsynth<8,24> } + { ebsynthCuda<1, 1>, ebsynthCuda<2, 1>, ebsynthCuda<3, 1>, ebsynthCuda<4, 1>, ebsynthCuda<5, 1>, ebsynthCuda<6, 1>, ebsynthCuda<7, 1>, ebsynthCuda<8, 1> }, + { ebsynthCuda<1, 2>, ebsynthCuda<2, 2>, ebsynthCuda<3, 2>, ebsynthCuda<4, 2>, ebsynthCuda<5, 2>, ebsynthCuda<6, 2>, ebsynthCuda<7, 2>, ebsynthCuda<8, 2> }, + { ebsynthCuda<1, 3>, ebsynthCuda<2, 3>, ebsynthCuda<3, 3>, ebsynthCuda<4, 3>, ebsynthCuda<5, 3>, ebsynthCuda<6, 3>, ebsynthCuda<7, 3>, ebsynthCuda<8, 3> }, + { ebsynthCuda<1, 4>, ebsynthCuda<2, 4>, ebsynthCuda<3, 4>, ebsynthCuda<4, 4>, ebsynthCuda<5, 4>, ebsynthCuda<6, 4>, ebsynthCuda<7, 4>, ebsynthCuda<8, 4> }, + { ebsynthCuda<1, 5>, ebsynthCuda<2, 5>, ebsynthCuda<3, 5>, ebsynthCuda<4, 5>, ebsynthCuda<5, 5>, ebsynthCuda<6, 5>, ebsynthCuda<7, 5>, ebsynthCuda<8, 5> }, + { ebsynthCuda<1, 6>, ebsynthCuda<2, 6>, ebsynthCuda<3, 6>, ebsynthCuda<4, 6>, ebsynthCuda<5, 6>, ebsynthCuda<6, 6>, ebsynthCuda<7, 6>, ebsynthCuda<8, 6> }, + { ebsynthCuda<1, 7>, ebsynthCuda<2, 7>, ebsynthCuda<3, 7>, ebsynthCuda<4, 7>, ebsynthCuda<5, 7>, ebsynthCuda<6, 7>, ebsynthCuda<7, 7>, ebsynthCuda<8, 7> }, + { ebsynthCuda<1, 8>, ebsynthCuda<2, 8>, ebsynthCuda<3, 8>, ebsynthCuda<4, 8>, ebsynthCuda<5, 8>, ebsynthCuda<6, 8>, ebsynthCuda<7, 8>, ebsynthCuda<8, 8> }, + { ebsynthCuda<1, 9>, ebsynthCuda<2, 9>, ebsynthCuda<3, 9>, ebsynthCuda<4, 9>, ebsynthCuda<5, 9>, ebsynthCuda<6, 9>, ebsynthCuda<7, 9>, ebsynthCuda<8, 9> }, + { ebsynthCuda<1,10>, ebsynthCuda<2,10>, ebsynthCuda<3,10>, ebsynthCuda<4,10>, ebsynthCuda<5,10>, ebsynthCuda<6,10>, ebsynthCuda<7,10>, ebsynthCuda<8,10> }, + { ebsynthCuda<1,11>, ebsynthCuda<2,11>, ebsynthCuda<3,11>, ebsynthCuda<4,11>, ebsynthCuda<5,11>, ebsynthCuda<6,11>, ebsynthCuda<7,11>, ebsynthCuda<8,11> }, + { ebsynthCuda<1,12>, ebsynthCuda<2,12>, ebsynthCuda<3,12>, ebsynthCuda<4,12>, ebsynthCuda<5,12>, ebsynthCuda<6,12>, ebsynthCuda<7,12>, ebsynthCuda<8,12> }, + { ebsynthCuda<1,13>, ebsynthCuda<2,13>, ebsynthCuda<3,13>, ebsynthCuda<4,13>, ebsynthCuda<5,13>, ebsynthCuda<6,13>, ebsynthCuda<7,13>, ebsynthCuda<8,13> }, + { ebsynthCuda<1,14>, ebsynthCuda<2,14>, ebsynthCuda<3,14>, ebsynthCuda<4,14>, ebsynthCuda<5,14>, ebsynthCuda<6,14>, ebsynthCuda<7,14>, ebsynthCuda<8,14> }, + { ebsynthCuda<1,15>, ebsynthCuda<2,15>, ebsynthCuda<3,15>, ebsynthCuda<4,15>, ebsynthCuda<5,15>, ebsynthCuda<6,15>, ebsynthCuda<7,15>, ebsynthCuda<8,15> }, + { ebsynthCuda<1,16>, ebsynthCuda<2,16>, ebsynthCuda<3,16>, ebsynthCuda<4,16>, ebsynthCuda<5,16>, ebsynthCuda<6,16>, ebsynthCuda<7,16>, ebsynthCuda<8,16> }, + { ebsynthCuda<1,17>, ebsynthCuda<2,17>, ebsynthCuda<3,17>, ebsynthCuda<4,17>, ebsynthCuda<5,17>, ebsynthCuda<6,17>, ebsynthCuda<7,17>, ebsynthCuda<8,17> }, + { ebsynthCuda<1,18>, ebsynthCuda<2,18>, ebsynthCuda<3,18>, ebsynthCuda<4,18>, ebsynthCuda<5,18>, ebsynthCuda<6,18>, ebsynthCuda<7,18>, ebsynthCuda<8,18> }, + { ebsynthCuda<1,19>, ebsynthCuda<2,19>, ebsynthCuda<3,19>, ebsynthCuda<4,19>, ebsynthCuda<5,19>, ebsynthCuda<6,19>, ebsynthCuda<7,19>, ebsynthCuda<8,19> }, + { ebsynthCuda<1,20>, ebsynthCuda<2,20>, ebsynthCuda<3,20>, ebsynthCuda<4,20>, ebsynthCuda<5,20>, ebsynthCuda<6,20>, ebsynthCuda<7,20>, ebsynthCuda<8,20> }, + { ebsynthCuda<1,21>, ebsynthCuda<2,21>, ebsynthCuda<3,21>, ebsynthCuda<4,21>, ebsynthCuda<5,21>, ebsynthCuda<6,21>, ebsynthCuda<7,21>, ebsynthCuda<8,21> }, + { ebsynthCuda<1,22>, ebsynthCuda<2,22>, ebsynthCuda<3,22>, ebsynthCuda<4,22>, ebsynthCuda<5,22>, ebsynthCuda<6,22>, ebsynthCuda<7,22>, ebsynthCuda<8,22> }, + { ebsynthCuda<1,23>, ebsynthCuda<2,23>, ebsynthCuda<3,23>, ebsynthCuda<4,23>, ebsynthCuda<5,23>, ebsynthCuda<6,23>, ebsynthCuda<7,23>, ebsynthCuda<8,23> }, + { ebsynthCuda<1,24>, ebsynthCuda<2,24>, ebsynthCuda<3,24>, ebsynthCuda<4,24>, ebsynthCuda<5,24>, ebsynthCuda<6,24>, ebsynthCuda<7,24>, ebsynthCuda<8,24> } }; if (numStyleChannels>=1 && numStyleChannels<=EBSYNTH_MAX_STYLE_CHANNELS && numGuideChannels>=1 && numGuideChannels<=EBSYNTH_MAX_GUIDE_CHANNELS) { - dispatchEbsynth[numGuideChannels-1][numStyleChannels-1](ebsynthBackend, - numStyleChannels, + dispatchEbsynth[numGuideChannels-1][numStyleChannels-1](numStyleChannels, numGuideChannels, sourceWidth, sourceHeight, @@ -800,485 +1160,27 @@ EBSYNTH_API void ebsynthRun(int ebsynthBackend, numSearchVoteItersPerLevel, numPatchMatchItersPerLevel, stopThresholdPerLevel, - outputData); + outputNnfData, + outputImageData); } } -EBSYNTH_API -int ebsynthBackendAvailable(int ebsynthBackend) +int ebsynthBackendAvailableCuda() { - if (ebsynthBackend==EBSYNTH_BACKEND_CUDA) - { - int deviceCount = -1; - if (cudaGetDeviceCount(&deviceCount)!=cudaSuccess) { return 0; } + int deviceCount = -1; + if (cudaGetDeviceCount(&deviceCount)!=cudaSuccess) { return 0; } - for (int device=0;device=3) { - if (properties.major!=9999 && properties.major>=3) - { - return 1; - } + return 1; } } } return 0; } - -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#include -#include - -#include -#include -#include - -#include "jzq.h" - -template -bool tryToParseArg(const std::vector& args,int* inout_argi,const char* name,bool* out_fail,FUNC handler) -{ - int& argi = *inout_argi; - bool& fail = *out_fail; - - if (argi<0 || argi>=args.size()) { fail = true; return false; } - - if (args[argi]==name) - { - argi++; - fail = !handler(); - return true; - } - - fail = false; return false; -} - -bool tryToParseIntArg(const std::vector& args,int* inout_argi,const char* name,int* out_value,bool* out_fail) -{ - return tryToParseArg(args,inout_argi,name,out_fail,[&] - { - int& argi = *inout_argi; - if (argi& args,int* inout_argi,const char* name,float* out_value,bool* out_fail) -{ - return tryToParseArg(args,inout_argi,name,out_fail,[&] - { - int& argi = *inout_argi; - if (argi& args,int* inout_argi,const char* name,std::string* out_value,bool* out_fail) -{ - return tryToParseArg(args,inout_argi,name,out_fail,[&] - { - int& argi = *inout_argi; - if (argi& args,int* inout_argi,const char* name,std::pair* out_value,bool* out_fail) -{ - return tryToParseArg(args,inout_argi,name,out_fail,[&] - { - int& argi = *inout_argi; - if ((argi+1)\n"); - printf(" -guide \n"); - printf(" -output \n"); - printf(" -weight \n"); - printf(" -uniformity \n"); - printf(" -patchsize \n"); - printf(" -pyramidlevels \n"); - printf(" -searchvoteiters \n"); - printf(" -patchmatchiters \n"); - printf(" -stopthreshold \n"); - printf("\n"); - return 1; - } - - std::string styleFileName; - float styleWeight = NAN; - std::string outputFileName = "output.png"; - - struct Guide - { - std::string sourceFileName; - std::string targetFileName; - float weight; - - int sourceWidth; - int sourceHeight; - unsigned char* sourceData; - - int targetWidth; - int targetHeight; - unsigned char* targetData; - - int numChannels; - }; - - std::vector guides; - - float uniformityWeight = 3500; - int patchSize = 5; - int numPyramidLevels = -1; - int numSearchVoteIters = 6; - int numPatchMatchIters = 4; - int stopThreshold = 5; - - std::string backend; - - { - std::vector args(argc); - for(int i=0;i guidePair; - - if (tryToParseStringArg(args,&argi,"-style",&styleFileName,&fail)) - { - styleWeight = NAN; - precedingStyleOrGuideWeight = &styleWeight; - argi++; - } - else if (tryToParseStringPairArg(args,&argi,"-guide",&guidePair,&fail)) - { - Guide guide; - guide.sourceFileName = guidePair.first; - guide.targetFileName = guidePair.second; - guide.weight = NAN; - guides.push_back(guide); - precedingStyleOrGuideWeight = &guides[guides.size()-1].weight; - argi++; - } - else if (tryToParseStringArg(args,&argi,"-output",&outputFileName,&fail)) - { - argi++; - } - else if (tryToParseFloatArg(args,&argi,"-weight",&weight,&fail)) - { - if (precedingStyleOrGuideWeight!=0) { *precedingStyleOrGuideWeight = weight; } - else { printf("error: at least one -style or -guide option must precede the -weight option!\n"); return 1; } - argi++; - } - else if (tryToParseFloatArg(args,&argi,"-uniformity",&uniformityWeight,&fail)) { argi++; } - else if (tryToParseIntArg(args,&argi,"-patchsize",&patchSize,&fail)) - { - if (patchSize<3) { printf("error: patchsize is too small!\n"); return 1; } - if (patchSize%2==0) { printf("error: patchsize must be an odd number!\n"); return 1; } - argi++; - } - else if (tryToParseIntArg(args,&argi,"-pyramidlevels",&numPyramidLevels,&fail)) - { - if (numPyramidLevels<1) { printf("error: bad argument for -pyramidlevels!\n"); return 1; } - argi++; - } - else if (tryToParseIntArg(args,&argi,"-searchvoteiters",&numSearchVoteIters,&fail)) - { - if (numSearchVoteIters<0) { printf("error: bad argument for -searchvoteiters!\n"); return 1; } - argi++; - } - else if (tryToParseIntArg(args,&argi,"-patchmatchiters",&numPatchMatchIters,&fail)) - { - if (numPatchMatchIters<0) { printf("error: bad argument for -patchmatchiters!\n"); return 1; } - argi++; - } - else if (tryToParseIntArg(args,&argi,"-stopthreshold",&stopThreshold,&fail)) - { - if (stopThreshold<0) { printf("error: bad argument for -stopthreshold!\n"); return 1; } - argi++; - } - else - { - printf("error: unrecognized option '%s'\n",args[argi].c_str()); - fail = true; - } - } - - if (fail) { return 1; } - } - - const int numGuides = guides.size(); - - int sourceWidth = 0; - int sourceHeight = 0; - unsigned char* sourceStyleData = tryLoad(styleFileName,&sourceWidth,&sourceHeight); - const int numStyleChannelsTotal = evalNumChannels(sourceStyleData,sourceWidth*sourceHeight); - - std::vector sourceStyle(sourceWidth*sourceHeight*numStyleChannelsTotal); - for(int xy=0;xy0) { sourceStyle[xy*numStyleChannelsTotal+0] = sourceStyleData[xy*4+0]; } - if (numStyleChannelsTotal==2) { sourceStyle[xy*numStyleChannelsTotal+1] = sourceStyleData[xy*4+3]; } - else if (numStyleChannelsTotal>1) { sourceStyle[xy*numStyleChannelsTotal+1] = sourceStyleData[xy*4+1]; } - if (numStyleChannelsTotal>2) { sourceStyle[xy*numStyleChannelsTotal+2] = sourceStyleData[xy*4+2]; } - if (numStyleChannelsTotal>3) { sourceStyle[xy*numStyleChannelsTotal+3] = sourceStyleData[xy*4+3]; } - } - - int targetWidth = 0; - int targetHeight = 0; - int numGuideChannelsTotal = 0; - - for(int i=0;i0 && (guide.targetWidth!=targetWidth || guide.targetHeight!=targetHeight)) { printf("error: target guide '%s' doesn't match the resolution of '%s'\n",guide.targetFileName.c_str(),guides[0].targetFileName.c_str()); return 1; } - else if (i==0) { targetWidth = guide.targetWidth; targetHeight = guide.targetHeight; } - - guide.numChannels = std::max(evalNumChannels(guide.sourceData,sourceWidth*sourceHeight), - evalNumChannels(guide.targetData,targetWidth*targetHeight)); - - numGuideChannelsTotal += guide.numChannels; - } - - if (numStyleChannelsTotal>EBSYNTH_MAX_STYLE_CHANNELS) { printf("error: too many style channels (%d), maximum number is %d\n",numStyleChannelsTotal,EBSYNTH_MAX_STYLE_CHANNELS); return 1; } - if (numGuideChannelsTotal>EBSYNTH_MAX_GUIDE_CHANNELS) { printf("error: too many guide channels (%d), maximum number is %d\n",numGuideChannelsTotal,EBSYNTH_MAX_GUIDE_CHANNELS); return 1; } - - std::vector sourceGuides(sourceWidth*sourceHeight*numGuideChannelsTotal); - for(int xy=0;xy0) { sourceGuides[xy*numGuideChannelsTotal+c+0] = guides[i].sourceData[xy*4+0]; } - if (numChannels==2) { sourceGuides[xy*numGuideChannelsTotal+c+1] = guides[i].sourceData[xy*4+3]; } - else if (numChannels>1) { sourceGuides[xy*numGuideChannelsTotal+c+1] = guides[i].sourceData[xy*4+1]; } - if (numChannels>2) { sourceGuides[xy*numGuideChannelsTotal+c+2] = guides[i].sourceData[xy*4+2]; } - if (numChannels>3) { sourceGuides[xy*numGuideChannelsTotal+c+3] = guides[i].sourceData[xy*4+3]; } - - c += numChannels; - } - } - - std::vector targetGuides(targetWidth*targetHeight*numGuideChannelsTotal); - for(int xy=0;xy0) { targetGuides[xy*numGuideChannelsTotal+c+0] = guides[i].targetData[xy*4+0]; } - if (numChannels==2) { targetGuides[xy*numGuideChannelsTotal+c+1] = guides[i].targetData[xy*4+3]; } - else if (numChannels>1) { targetGuides[xy*numGuideChannelsTotal+c+1] = guides[i].targetData[xy*4+1]; } - if (numChannels>2) { targetGuides[xy*numGuideChannelsTotal+c+2] = guides[i].targetData[xy*4+2]; } - if (numChannels>3) { targetGuides[xy*numGuideChannelsTotal+c+3] = guides[i].targetData[xy*4+3]; } - - c += numChannels; - } - } - - std::vector styleWeights(numStyleChannelsTotal); - if (isnan(styleWeight)) { styleWeight = 1.0f; } - for(int i=0;i guideWeights(numGuideChannelsTotal); - { - int c = 0; - for(int i=0;i=0;level--) - { - if (min(pyramidLevelSize(std::min(V2i(sourceWidth,sourceHeight),V2i(targetWidth,targetHeight)),level)) >= (2*patchSize+1)) - { - maxPyramidLevels = level+1; - break; - } - } - - if (numPyramidLevels==-1) { numPyramidLevels = maxPyramidLevels; } - numPyramidLevels = std::min(numPyramidLevels,maxPyramidLevels); - - std::vector numSearchVoteItersPerLevel(numPyramidLevels); - std::vector numPatchMatchItersPerLevel(numPyramidLevels); - std::vector stopThresholdPerLevel(numPyramidLevels); - for(int i=0;i output(targetWidth*targetHeight*numStyleChannelsTotal); - - printf("uniformity: %.0f\n",uniformityWeight); - printf("patchsize: %d\n",patchSize); - printf("pyramidlevels: %d\n",numPyramidLevels); - printf("searchvoteiters: %d\n",numSearchVoteIters); - printf("patchmatchiters: %d\n",numPatchMatchIters); - printf("stopthreshold: %d\n",stopThreshold); - - if (!ebsynthBackendAvailable(EBSYNTH_BACKEND_CUDA)) { printf("error: the CUDA backend is not available!\n"); return 1; } - - ebsynthRun(EBSYNTH_BACKEND_CUDA, - numStyleChannelsTotal, - numGuideChannelsTotal, - sourceWidth, - sourceHeight, - sourceStyle.data(), - sourceGuides.data(), - targetWidth, - targetHeight, - targetGuides.data(), - NULL, - styleWeights.data(), - guideWeights.data(), - uniformityWeight, - patchSize, - EBSYNTH_VOTEMODE_PLAIN, - numPyramidLevels, - numSearchVoteItersPerLevel.data(), - numPatchMatchItersPerLevel.data(), - stopThresholdPerLevel.data(), - output.data()); - - stbi_write_png(outputFileName.c_str(),targetWidth,targetHeight,numStyleChannelsTotal,output.data(),numStyleChannelsTotal*targetWidth); - - printf("result was written to %s\n",outputFileName.c_str()); - - stbi_image_free(sourceStyleData); - - for(int i=0;i bool checkCudaError_(T result,char const* const func,const char* const file,int const line) diff --git a/src/memarray2.h b/src/ebsynth_cuda_memarray2.h similarity index 93% rename from src/memarray2.h rename to src/ebsynth_cuda_memarray2.h index 3c45bfd..8de0319 100644 --- a/src/memarray2.h +++ b/src/ebsynth_cuda_memarray2.h @@ -2,11 +2,11 @@ // recognized, you are granted a perpetual, irrevocable license to copy // and modify this file as you see fit. -#ifndef MEMARRAY2_H_ -#define MEMARRAY2_H_ +#ifndef EBSYNTH_CUDA_MEMARRAY2_H_ +#define EBSYNTH_CUDA_MEMARRAY2_H_ #include "jzq.h" -//#include "cudacheck.h" +#include "ebsynth_cuda_check.h" template struct MemArray2 diff --git a/src/texarray2.h b/src/ebsynth_cuda_texarray2.h similarity index 98% rename from src/texarray2.h rename to src/ebsynth_cuda_texarray2.h index de9678d..0427f77 100644 --- a/src/texarray2.h +++ b/src/ebsynth_cuda_texarray2.h @@ -2,11 +2,11 @@ // recognized, you are granted a perpetual, irrevocable license to copy // and modify this file as you see fit. -#ifndef TEXARRAY2_H_ -#define TEXARRAY2_H_ +#ifndef EBSYNTH_CUDA_TEXARRAY2_H_ +#define EBSYNTH_CUDA_TEXARRAY2_H_ #include "jzq.h" -#include "cudacheck.h" +#include "ebsynth_cuda_check.h" #include diff --git a/src/ebsynth_nocuda.cpp b/src/ebsynth_nocuda.cpp new file mode 100644 index 0000000..b87b64a --- /dev/null +++ b/src/ebsynth_nocuda.cpp @@ -0,0 +1,33 @@ +// This software is in the public domain. Where that dedication is not +// recognized, you are granted a perpetual, irrevocable license to copy +// and modify this file as you see fit. + +void ebsynthRunCuda(int numStyleChannels, + int numGuideChannels, + int sourceWidth, + int sourceHeight, + void* sourceStyleData, + void* sourceGuideData, + int targetWidth, + int targetHeight, + void* targetGuideData, + void* targetModulationData, + float* styleWeights, + float* guideWeights, + float uniformityWeight, + int patchSize, + int voteMode, + int numPyramidLevels, + int* numSearchVoteItersPerLevel, + int* numPatchMatchItersPerLevel, + int* stopThresholdPerLevel, + void* outputNnfData, + void* outputImageData) +{ + +} + +int ebsynthBackendAvailableCuda() +{ + return 0; +} diff --git a/src/jzq.h b/src/jzq.h index 391fae9..507526d 100644 --- a/src/jzq.h +++ b/src/jzq.h @@ -1,1901 +1,1994 @@ -// This software is in the public domain. Where that dedication is not -// recognized, you are granted a perpetual, irrevocable license to copy -// and modify this file as you see fit. - -#ifndef JZQ_H_ -#define JZQ_H_ - -#include -#include -#include -#include -#include -#include -#include - -template struct zero { static __host__ __device__ T value(); }; - -template inline T clamp(const T& x,const T& xmin,const T& xmax); -template inline T lerp(const T& a,const T& b,const T& t); - -inline std::string spf(const std::string fmt,...); - -template -struct Vec -{ - T v[N]; - - __host__ __device__ Vec(); - template __host__ __device__ explicit Vec(const Vec& u); - explicit __host__ __device__ Vec(T v0); - - __host__ __device__ Vec(T v0,T v1); - __host__ __device__ Vec(T v0,T v1,T v2); - __host__ __device__ Vec(T v0,T v1,T v2,T v3); - __host__ __device__ Vec(T v0,T v1,T v2,T v3,T v4); - __host__ __device__ Vec(T v0,T v1,T v2,T v3,T v4,T v5); - - __host__ __device__ T& operator()(int i); - __host__ __device__ const T& operator()(int i) const; - __host__ __device__ T& operator[](int i); - __host__ __device__ const T& operator[](int i) const; - - __host__ __device__ Vec operator*=(const Vec& u); - __host__ __device__ Vec operator+=(const Vec& u); - - __host__ __device__ Vec operator*=(T s); - __host__ __device__ Vec operator+=(T s); -}; - -template Vec __host__ __device__ operator-(const Vec& u); -template Vec __host__ __device__ operator+(const Vec& u,const Vec& v); -template Vec __host__ __device__ operator-(const Vec& u,const Vec& v); -template Vec __host__ __device__ operator-(const Vec& u,const T v); -template Vec __host__ __device__ operator*(const Vec& u,const Vec& v); -template Vec __host__ __device__ operator/(const Vec& u,const Vec& v); -template Vec __host__ __device__ operator*(const T s,const Vec& u); -template Vec __host__ __device__ operator*(const Vec& u,const T s); -template Vec __host__ __device__ operator/(const Vec& u,const T s); - -template Vec __host__ __device__ operator<(const Vec& u,const Vec& v); -template Vec __host__ __device__ operator>(const Vec& u,const Vec& v); -template Vec __host__ __device__ operator<=(const Vec& u,const Vec& v); -template Vec __host__ __device__ operator>=(const Vec& u,const Vec& v); -template Vec __host__ __device__ operator==(const Vec& u,const Vec& v); -template Vec __host__ __device__ operator!=(const Vec& u,const Vec& v); - -template inline T dot(const Vec& u,const Vec& v); -template inline T cross(const Vec<2,T> &a,const Vec<2,T> &b); -template inline Vec<3,T> cross(const Vec<3,T> &a,const Vec<3,T> &b); -template inline T norm(const Vec& u); -template inline Vec normalize(const Vec& u); -template inline T min(const Vec& u); -template inline T max(const Vec& u); -template inline T sum(const Vec& u); -namespace std -{ -template inline Vec min(const Vec& u,const Vec& v); -template inline Vec max(const Vec& u,const Vec& v); -} -template inline Vec abs(const Vec& x); - -template inline bool any(const Vec& u); -template inline bool all(const Vec& u); - -template -struct Mat -{ - T m[M][N]; - - Mat(); - - Mat(T a00,T a01, - T a10,T a11); - - Mat(T a00,T a01,T a02, - T a10,T a11,T a12, - T a20,T a21,T a22); - - Mat(T a00,T a01,T a02,T a03, - T a10,T a11,T a12,T a13, - T a20,T a21,T a22,T a23, - T a30,T a31,T a32,T a33); - - T& operator()(int i,int j); - const T& operator()(int i,int j) const; - - T* data(); - const T* data() const; -}; - -template Mat operator*(const Mat& A,const Mat& B); - -template Vec operator*(const Mat& A,const Vec& u); -template Vec operator*(const Vec& u,const Mat& A); - -template Mat transpose(const Mat& A); -template T trace(const Mat& A); -template Mat inverse(const Mat& A); - -template -class Array2 -{ -public: - Array2(); - Array2(int width,int height); - explicit Array2(const Vec<2,int>& size); - Array2(const Array2& a); - ~Array2(); - - Array2& operator=(const Array2& a); - - inline T& operator[](int i); - inline const T& operator[](int i) const; - inline T& operator()(int i,int j); - inline const T& operator()(int i,int j) const; - inline T& operator()(const Vec<2,int>& ij); - inline const T& operator()(const Vec<2,int>& ij) const; - - Vec<2,int> size() const; - int size(int dim) const; - int width() const; - int height() const; - int numel() const; - T* data(); - const T* data() const; - void clear(); - void swap(Array2& b); - bool empty() const; - -private: - Vec<2,int> s; - T* d; -}; - -template Vec<2,int> size(const Array2& a); -template int size(const Array2& a,int dim); -template int numel(const Array2& a); -template void clear(Array2* a); -template void swap(Array2& a,Array2& b); -template T min(const Array2& a); -template T max(const Array2& a); -template Vec<2,T> minmax(const Array2& a); -template Vec<2,int> argmin(const Array2& a); -template Vec<2,int> argmax(const Array2& a); -template T sum(const Array2& a); -template void fill(Array2* a,const T& value); - -template Array2 apply(const Array2& a,F fun); - -template -class Array3 -{ -public: - Array3(); - explicit Array3(const Vec<3,int>& size); - Array3(int width,int height,int depth); - Array3(const Array3& a); - ~Array3(); - - Array3& operator=(const Array3& a); - - inline T& operator[](int i); - inline const T& operator[](int i) const; - inline T& operator()(int i,int j,int k); - inline const T& operator()(int i,int j,int k) const; - inline T& operator()(const Vec<3,int>& ijk); - inline const T& operator()(const Vec<3,int>& ijk) const; - - Vec<3,int> size() const; - int size(int dim) const; - int width() const; - int height() const; - int depth() const; - int numel() const; - T* data(); - const T* data() const; - void clear(); - void swap(Array3& b); - -private: - Vec<3,int> s; - T* d; -}; - -template Vec<3,int> size(const Array3& a); -template int size(const Array3& a,int dim); -template int numel(const Array3& a); -template void clear(Array3* a); -template void swap(Array3& a,Array3& b); - -typedef Vec<2,double> Vec2d; -typedef Vec<2,float> Vec2f; -typedef Vec<2,int> Vec2i; -typedef Vec<2,unsigned int> Vec2ui; -typedef Vec<2,short> Vec2s; -typedef Vec<2,unsigned short> Vec2us; -typedef Vec<2,char> Vec2c; -typedef Vec<2,unsigned char> Vec2uc; - -typedef Vec<3,double> Vec3d; -typedef Vec<3,float> Vec3f; -typedef Vec<3,int> Vec3i; -typedef Vec<3,unsigned int> Vec3ui; -typedef Vec<3,short> Vec3s; -typedef Vec<3,unsigned short> Vec3us; -typedef Vec<3,char> Vec3c; -typedef Vec<3,unsigned char> Vec3uc; - -typedef Vec<4,double> Vec4d; -typedef Vec<4,float> Vec4f; -typedef Vec<4,int> Vec4i; -typedef Vec<4,unsigned int> Vec4ui; -typedef Vec<4,short> Vec4s; -typedef Vec<4,unsigned short> Vec4us; -typedef Vec<4,char> Vec4c; -typedef Vec<4,unsigned char> Vec4uc; - -typedef Vec<5,double> Vec5d; -typedef Vec<5,float> Vec5f; -typedef Vec<5,int> Vec5i; -typedef Vec<5,unsigned int> Vec5ui; -typedef Vec<5,short> Vec5s; -typedef Vec<5,unsigned short> Vec5us; -typedef Vec<5,char> Vec5c; -typedef Vec<5,unsigned char> Vec5uc; - -typedef Vec<6,double> Vec6d; -typedef Vec<6,float> Vec6f; -typedef Vec<6,int> Vec6i; -typedef Vec<6,unsigned int> Vec6ui; -typedef Vec<6,short> Vec6s; -typedef Vec<6,unsigned short> Vec6us; -typedef Vec<6,char> Vec6c; -typedef Vec<6,unsigned char> Vec6uc; - -typedef Vec<2,double> V2d; -typedef Vec<2,float> V2f; -typedef Vec<2,int> V2i; -typedef Vec<2,unsigned int> V2ui; -typedef Vec<2,short> V2s; -typedef Vec<2,unsigned short> V2us; -typedef Vec<2,char> V2c; -typedef Vec<2,unsigned char> V2uc; - -typedef Vec<3,double> V3d; -typedef Vec<3,float> V3f; -typedef Vec<3,int> V3i; -typedef Vec<3,unsigned int> V3ui; -typedef Vec<3,short> V3s; -typedef Vec<3,unsigned short> V3us; -typedef Vec<3,char> V3c; -typedef Vec<3,unsigned char> V3uc; - -typedef Vec<4,double> V4d; -typedef Vec<4,float> V4f; -typedef Vec<4,int> V4i; -typedef Vec<4,unsigned int> V4ui; -typedef Vec<4,short> V4s; -typedef Vec<4,unsigned short> V4us; -typedef Vec<4,char> V4c; -typedef Vec<4,unsigned char> V4uc; - -typedef Vec<5,double> V5d; -typedef Vec<5,float> V5f; -typedef Vec<5,int> V5i; -typedef Vec<5,unsigned int> V5ui; -typedef Vec<5,short> V5s; -typedef Vec<5,unsigned short> V5us; -typedef Vec<5,char> V5c; -typedef Vec<5,unsigned char> V5uc; - -typedef Vec<6,double> V6d; -typedef Vec<6,float> V6f; -typedef Vec<6,int> V6i; -typedef Vec<6,unsigned int> V6ui; -typedef Vec<6,short> V6s; -typedef Vec<6,unsigned short> V6us; -typedef Vec<6,char> V6c; -typedef Vec<6,unsigned char> V6uc; - -typedef Mat<2,2,float> Mat2x2f; -typedef Mat<2,3,float> Mat2x3f; -typedef Mat<2,4,float> Mat2x4f; -typedef Mat<2,5,float> Mat2x5f; -typedef Mat<2,6,float> Mat2x6f; -typedef Mat<2,7,float> Mat2x7f; -typedef Mat<2,8,float> Mat2x8f; -typedef Mat<3,2,float> Mat3x2f; -typedef Mat<3,3,float> Mat3x3f; -typedef Mat<3,4,float> Mat3x4f; -typedef Mat<3,5,float> Mat3x5f; -typedef Mat<3,6,float> Mat3x6f; -typedef Mat<3,7,float> Mat3x7f; -typedef Mat<3,8,float> Mat3x8f; -typedef Mat<4,2,float> Mat4x2f; -typedef Mat<4,3,float> Mat4x3f; -typedef Mat<4,4,float> Mat4x4f; -typedef Mat<4,5,float> Mat4x5f; -typedef Mat<4,6,float> Mat4x6f; -typedef Mat<4,7,float> Mat4x7f; -typedef Mat<4,8,float> Mat4x8f; -typedef Mat<5,2,float> Mat5x2f; -typedef Mat<5,3,float> Mat5x3f; -typedef Mat<5,4,float> Mat5x4f; -typedef Mat<5,5,float> Mat5x5f; -typedef Mat<5,6,float> Mat5x6f; -typedef Mat<5,7,float> Mat5x7f; -typedef Mat<5,8,float> Mat5x8f; -typedef Mat<6,2,float> Mat6x2f; -typedef Mat<6,3,float> Mat6x3f; -typedef Mat<6,4,float> Mat6x4f; -typedef Mat<6,5,float> Mat6x5f; -typedef Mat<6,6,float> Mat6x6f; -typedef Mat<6,7,float> Mat6x7f; -typedef Mat<6,8,float> Mat6x8f; -typedef Mat<7,2,float> Mat7x2f; -typedef Mat<7,3,float> Mat7x3f; -typedef Mat<7,4,float> Mat7x4f; -typedef Mat<7,5,float> Mat7x5f; -typedef Mat<7,6,float> Mat7x6f; -typedef Mat<7,7,float> Mat7x7f; -typedef Mat<7,8,float> Mat7x8f; -typedef Mat<8,2,float> Mat8x2f; -typedef Mat<8,3,float> Mat8x3f; -typedef Mat<8,4,float> Mat8x4f; -typedef Mat<8,5,float> Mat8x5f; -typedef Mat<8,6,float> Mat8x6f; -typedef Mat<8,7,float> Mat8x7f; -typedef Mat<8,8,float> Mat8x8f; - -typedef Mat<2,2,double> Mat2x2d; -typedef Mat<2,3,double> Mat2x3d; -typedef Mat<2,4,double> Mat2x4d; -typedef Mat<2,5,double> Mat2x5d; -typedef Mat<2,6,double> Mat2x6d; -typedef Mat<2,7,double> Mat2x7d; -typedef Mat<2,8,double> Mat2x8d; -typedef Mat<3,2,double> Mat3x2d; -typedef Mat<3,3,double> Mat3x3d; -typedef Mat<3,4,double> Mat3x4d; -typedef Mat<3,5,double> Mat3x5d; -typedef Mat<3,6,double> Mat3x6d; -typedef Mat<3,7,double> Mat3x7d; -typedef Mat<3,8,double> Mat3x8d; -typedef Mat<4,2,double> Mat4x2d; -typedef Mat<4,3,double> Mat4x3d; -typedef Mat<4,4,double> Mat4x4d; -typedef Mat<4,5,double> Mat4x5d; -typedef Mat<4,6,double> Mat4x6d; -typedef Mat<4,7,double> Mat4x7d; -typedef Mat<4,8,double> Mat4x8d; -typedef Mat<5,2,double> Mat5x2d; -typedef Mat<5,3,double> Mat5x3d; -typedef Mat<5,4,double> Mat5x4d; -typedef Mat<5,5,double> Mat5x5d; -typedef Mat<5,6,double> Mat5x6d; -typedef Mat<5,7,double> Mat5x7d; -typedef Mat<5,8,double> Mat5x8d; -typedef Mat<6,2,double> Mat6x2d; -typedef Mat<6,3,double> Mat6x3d; -typedef Mat<6,4,double> Mat6x4d; -typedef Mat<6,5,double> Mat6x5d; -typedef Mat<6,6,double> Mat6x6d; -typedef Mat<6,7,double> Mat6x7d; -typedef Mat<6,8,double> Mat6x8d; -typedef Mat<7,2,double> Mat7x2d; -typedef Mat<7,3,double> Mat7x3d; -typedef Mat<7,4,double> Mat7x4d; -typedef Mat<7,5,double> Mat7x5d; -typedef Mat<7,6,double> Mat7x6d; -typedef Mat<7,7,double> Mat7x7d; -typedef Mat<7,8,double> Mat7x8d; -typedef Mat<8,2,double> Mat8x2d; -typedef Mat<8,3,double> Mat8x3d; -typedef Mat<8,4,double> Mat8x4d; -typedef Mat<8,5,double> Mat8x5d; -typedef Mat<8,6,double> Mat8x6d; -typedef Mat<8,7,double> Mat8x7d; -typedef Mat<8,8,double> Mat8x8d; - -typedef Array2 Array2d; -typedef Array2 Array2f; -typedef Array2 Array2i; -typedef Array2 Array2ui; -typedef Array2 Array2s; -typedef Array2 Array2us; -typedef Array2 Array2c; -typedef Array2 Array2uc; - -typedef Array2< Vec<2,double> > Array2V2d; -typedef Array2< Vec<2,float> > Array2V2f; -typedef Array2< Vec<2,int> > Array2V2i; -typedef Array2< Vec<2,unsigned int> > Array2V2ui; -typedef Array2< Vec<2,short> > Array2V2s; -typedef Array2< Vec<2,unsigned short> > Array2V2us; -typedef Array2< Vec<2,char> > Array2V2c; -typedef Array2< Vec<2,unsigned char> > Array2V2uc; - -typedef Array2< Vec<3,double> > Array2V3d; -typedef Array2< Vec<3,float> > Array2V3f; -typedef Array2< Vec<3,int> > Array2V3i; -typedef Array2< Vec<3,unsigned int> > Array2V3ui; -typedef Array2< Vec<3,short> > Array2V3s; -typedef Array2< Vec<3,unsigned short> > Array2V3us; -typedef Array2< Vec<3,char> > Array2V3c; -typedef Array2< Vec<3,unsigned char> > Array2V3uc; - -typedef Array2< Vec<4,double> > Array2V4d; -typedef Array2< Vec<4,float> > Array2V4f; -typedef Array2< Vec<4,int> > Array2V4i; -typedef Array2< Vec<4,unsigned int> > Array2V4ui; -typedef Array2< Vec<4,short> > Array2V4s; -typedef Array2< Vec<4,unsigned short> > Array2V4us; -typedef Array2< Vec<4,char> > Array2V4c; -typedef Array2< Vec<4,unsigned char> > Array2V4uc; - -typedef Array2 A2d; -typedef Array2 A2f; -typedef Array2 A2i; -typedef Array2 A2ui; -typedef Array2 A2s; -typedef Array2 A2us; -typedef Array2 A2c; -typedef Array2 A2uc; - -typedef Array2< Vec<2,double> > A2V2d; -typedef Array2< Vec<2,float> > A2V2f; -typedef Array2< Vec<2,int> > A2V2i; -typedef Array2< Vec<2,unsigned int> > A2V2ui; -typedef Array2< Vec<2,short> > A2V2s; -typedef Array2< Vec<2,unsigned short> > A2V2us; -typedef Array2< Vec<2,char> > A2V2c; -typedef Array2< Vec<2,unsigned char> > A2V2uc; - -typedef Array2< Vec<3,double> > A2V3d; -typedef Array2< Vec<3,float> > A2V3f; -typedef Array2< Vec<3,int> > A2V3i; -typedef Array2< Vec<3,unsigned int> > A2V3ui; -typedef Array2< Vec<3,short> > A2V3s; -typedef Array2< Vec<3,unsigned short> > A2V3us; -typedef Array2< Vec<3,char> > A2V3c; -typedef Array2< Vec<3,unsigned char> > A2V3uc; - -typedef Array2< Vec<4,double> > A2V4d; -typedef Array2< Vec<4,float> > A2V4f; -typedef Array2< Vec<4,int> > A2V4i; -typedef Array2< Vec<4,unsigned int> > A2V4ui; -typedef Array2< Vec<4,short> > A2V4s; -typedef Array2< Vec<4,unsigned short> > A2V4us; -typedef Array2< Vec<4,char> > A2V4c; -typedef Array2< Vec<4,unsigned char> > A2V4uc; - -typedef Array3 Array3d; -typedef Array3 Array3f; -typedef Array3 Array3i; -typedef Array3 Array3ui; -typedef Array3 Array3s; -typedef Array3 Array3us; -typedef Array3 Array3c; -typedef Array3 Array3uc; - -typedef Array3< Vec<2,double> > Array3V2d; -typedef Array3< Vec<2,float> > Array3V2f; -typedef Array3< Vec<2,int> > Array3V2i; -typedef Array3< Vec<2,unsigned int> > Array3V2ui; -typedef Array3< Vec<2,short> > Array3V2s; -typedef Array3< Vec<2,unsigned short> > Array3V2us; -typedef Array3< Vec<2,char> > Array3V2c; -typedef Array3< Vec<2,unsigned char> > Array3V2uc; - -typedef Array3< Vec<3,double> > Array3V3d; -typedef Array3< Vec<3,float> > Array3V3f; -typedef Array3< Vec<3,int> > Array3V3i; -typedef Array3< Vec<3,unsigned int> > Array3V3ui; -typedef Array3< Vec<3,short> > Array3V3s; -typedef Array3< Vec<3,unsigned short> > Array3V3us; -typedef Array3< Vec<3,char> > Array3V3c; -typedef Array3< Vec<3,unsigned char> > Array3V3uc; - -typedef Array3< Vec<4,double> > Array3V4d; -typedef Array3< Vec<4,float> > Array3V4f; -typedef Array3< Vec<4,int> > Array3V4i; -typedef Array3< Vec<4,unsigned int> > Array3V4ui; -typedef Array3< Vec<4,short> > Array3V4s; -typedef Array3< Vec<4,unsigned short> > Array3V4us; -typedef Array3< Vec<4,char> > Array3V4c; -typedef Array3< Vec<4,unsigned char> > Array3V4uc; - -typedef Array3 A3d; -typedef Array3 A3f; -typedef Array3 A3i; -typedef Array3 A3ui; -typedef Array3 A3s; -typedef Array3 A3us; -typedef Array3 A3c; -typedef Array3 A3uc; - -typedef Array3< Vec<2,double> > A3V2d; -typedef Array3< Vec<2,float> > A3V2f; -typedef Array3< Vec<2,int> > A3V2i; -typedef Array3< Vec<2,unsigned int> > A3V2ui; -typedef Array3< Vec<2,short> > A3V2s; -typedef Array3< Vec<2,unsigned short> > A3V2us; -typedef Array3< Vec<2,char> > A3V2c; -typedef Array3< Vec<2,unsigned char> > A3V2uc; - -typedef Array3< Vec<3,double> > A3V3d; -typedef Array3< Vec<3,float> > A3V3f; -typedef Array3< Vec<3,int> > A3V3i; -typedef Array3< Vec<3,unsigned int> > A3V3ui; -typedef Array3< Vec<3,short> > A3V3s; -typedef Array3< Vec<3,unsigned short> > A3V3us; -typedef Array3< Vec<3,char> > A3V3c; -typedef Array3< Vec<3,unsigned char> > A3V3uc; - -typedef Array3< Vec<4,double> > A3V4d; -typedef Array3< Vec<4,float> > A3V4f; -typedef Array3< Vec<4,int> > A3V4i; -typedef Array3< Vec<4,unsigned int> > A3V4ui; -typedef Array3< Vec<4,short> > A3V4s; -typedef Array3< Vec<4,unsigned short> > A3V4us; -typedef Array3< Vec<4,char> > A3V4c; -typedef Array3< Vec<4,unsigned char> > A3V4uc; - -template<> struct zero { static __host__ __device__ char value() { return 0; } }; -template<> struct zero { static __host__ __device__ unsigned char value() { return 0; } }; -template<> struct zero { static __host__ __device__ short value() { return 0; } }; -template<> struct zero { static __host__ __device__ unsigned short value() { return 0; } }; -template<> struct zero { static __host__ __device__ int value() { return 0; } }; -template<> struct zero { static __host__ __device__ unsigned int value() { return 0; } }; -template<> struct zero { static __host__ __device__ float value() { return 0.0f; } }; -template<> struct zero { static __host__ __device__ double value() { return 0.0; } }; - -template -struct zero> -{ - static __host__ __device__ Vec value() - { - Vec z; - for(int i=0;i::value(); } - return z; - } -}; - -template -struct zero> -{ - static __host__ __device__ Mat value() - { - Mat z; - for(int i=0;i::value(); - } - return z; - } -}; - -template inline -T clamp(const T& x,const T& xmin,const T& xmax) -{ - return std::min(std::max(x,xmin),xmax); -} - -template inline -T lerp(const T& a,const T& b,const T& t) -{ - return (1.0-t)*a+t*b; -} - -inline std::string spf(const std::string fmt,...) -{ - int size = 1024; - std::vector buf; - va_list ap; - - while(1) - { - if(size>16*1024*1024) { return std::string(""); } - - buf.resize(size); - - va_start(ap,fmt); - const int n = vsnprintf(&buf[0],size-1,fmt.c_str(),ap); - va_end(ap); - - if(n>-1 && n < size) - { - break; - } - else if(n>-1) - { - size = n + 1; - } - else - { - size = 2*size; - } - } - - return std::string(&buf[0]); -} - -template -__host__ __device__ -Vec::Vec() -{ -} - -template -__host__ __device__ -Vec::Vec(T v0) -{ - assert(N==1); - v[0]=v0; -} - -template -__host__ __device__ -Vec::Vec(T v0,T v1) -{ - assert(N==2); - v[0]=v0; v[1]=v1; -} - -template -__host__ __device__ -Vec::Vec(T v0,T v1,T v2) -{ - assert(N==3); - v[0]=v0; v[1]=v1; v[2]=v2; -} - -template -__host__ __device__ -Vec::Vec(T v0,T v1,T v2,T v3) -{ - assert(N==4); - v[0]=v0; v[1]=v1; v[2]=v2; v[3]=v3; -} - -template -__host__ __device__ -Vec::Vec(T v0,T v1,T v2,T v3,T v4) -{ - assert(N==5); - v[0]=v0; v[1]=v1; v[2]=v2; v[3]=v3; v[4]=v4; -} - -template -__host__ __device__ -Vec::Vec(T v0,T v1,T v2,T v3,T v4,T v5) -{ - assert(N==6); - v[0]=v0; v[1]=v1; v[2]=v2; v[3]=v3; v[4]=v4; v[5]=v5; -} - -template template -__host__ __device__ -Vec::Vec(const Vec& u) -{ - for(int i=0;i(u.v[i]); - } -} - -template -__host__ __device__ -T& Vec::operator()(int i) -{ - assert(i>=0 && i -__host__ __device__ -const T& Vec::operator()(int i) const -{ - assert(i>=0 && i -__host__ __device__ -T& Vec::operator[](int i) -{ - assert(i>=0 && i -__host__ __device__ -const T& Vec::operator[](int i) const -{ - assert(i>=0 && i -__host__ __device__ -Vec Vec::operator*=(const Vec& u) -{ - for(int i=0;i -__host__ __device__ -Vec Vec::operator+=(const Vec& u) -{ - for(int i=0;i -__host__ __device__ -Vec Vec::operator*=(T s) -{ - for(int i=0;i -__host__ __device__ -Vec Vec::operator+=(T s) -{ - for(int i=0;i -__host__ __device__ -Vec operator-(const Vec& u) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator+(const Vec& u,const Vec& v) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator-(const Vec& u,const Vec& v) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator-(const Vec& u,const T v) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator*(const Vec& u,const Vec& v) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator/(const Vec& u,const Vec& v) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator*(const T s,const Vec& u) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator*(const Vec& u,const T s) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator/(const Vec& u,const T s) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator<(const Vec& u,const Vec& v) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator>(const Vec& u,const Vec& v) -{ - Vec r; - for(int i=0;iv(i); - return r; -} - -template -__host__ __device__ -Vec operator<=(const Vec& u,const Vec& v) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator>=(const Vec& u,const Vec& v) -{ - Vec r; - for(int i=0;i=v(i); - return r; -} - -template -__host__ __device__ -Vec operator==(const Vec& u,const Vec& v) -{ - Vec r; - for(int i=0;i -__host__ __device__ -Vec operator!=(const Vec& u,const Vec& v) -{ - Vec r; - for(int i=0;i -inline T dot(const Vec& u,const Vec& v) -{ - assert(N>0); - T sumprod = u(0)*v(0); - for(int i=1;i -inline T cross(const Vec<2,T> &a,const Vec<2,T> &b) -{ - return a[0]*b[1]-a[1]*b[0]; -} - -template -inline Vec<3,T> cross(const Vec<3,T> &a,const Vec<3,T> &b) -{ - return Vec<3,T>(a[1]*b[2]-a[2]*b[1], - a[2]*b[0]-a[0]*b[2], - a[0]*b[1]-a[1]*b[0]); -} - -template -inline T norm(const Vec& u) -{ - return std::sqrt(dot(u,u)); -} - -template -inline Vec normalize(const Vec& u) -{ - return u/norm(u); -} - -template -inline bool any(const Vec& u) -{ - for(int i=0;i -inline bool all(const Vec& u) -{ - for(int i=0;i -inline T min(const Vec& u) -{ - assert(N>0); - - T minval = u(0); - - for(int i=1;i -inline T max(const Vec& u) -{ - assert(N>0); - - T maxval = u(0); - - for(int i=1;i maxval) - { - maxval = u(i); - } - } - - return maxval; -} - -template -inline T sum(const Vec& u) -{ - assert(N>0); - - T sumval = u(0); - - for(int i=1;i Vec -inline min(const Vec& u,const Vec& v) -{ - assert(N>0); - - Vec w; - - for(int i=0;i Vec -inline max(const Vec& u,const Vec& v) -{ - assert(N>0); - - Vec w; - - for(int i=0;i Vec -inline abs(const Vec& x) -{ - Vec out; - for(int i=0;i -Mat::Mat() {} - -template -Mat::Mat(T a00,T a01, - T a10,T a11) -{ - assert(M==2 && N==2); - - m[0][0] = a00; m[0][1] = a01; - m[1][0] = a10; m[1][1] = a11; -} - -template -Mat::Mat(T a00,T a01,T a02, - T a10,T a11,T a12, - T a20,T a21,T a22) -{ - assert(M==3 && N==3); - - m[0][0] = a00; m[0][1] = a01; m[0][2] = a02; - m[1][0] = a10; m[1][1] = a11; m[1][2] = a12; - m[2][0] = a20; m[2][1] = a21; m[2][2] = a22; -} - -template -Mat::Mat(T a00,T a01,T a02,T a03, - T a10,T a11,T a12,T a13, - T a20,T a21,T a22,T a23, - T a30,T a31,T a32,T a33) -{ - assert(M==4 && N==4); - - m[0][0] = a00; m[0][1] = a01; m[0][2] = a02; m[0][3] = a03; - m[1][0] = a10; m[1][1] = a11; m[1][2] = a12; m[1][3] = a13; - m[2][0] = a20; m[2][1] = a21; m[2][2] = a22; m[2][3] = a23; - m[3][0] = a30; m[3][1] = a31; m[3][2] = a32; m[3][3] = a33; -} - -template -T& Mat::operator()(int i,int j) -{ - assert(0<=i && i -const T& Mat::operator()(int i,int j) const -{ - assert(0<=i && i -T* Mat::data() -{ - return (T*)(&m[0][0]); -} - -template -const T* Mat::data() const -{ - return (T*)(&m[0][0]); -} - -template -Mat operator*(const Mat& A,const Mat& B) -{ - assert(N1==M2); - Mat C; - - fori(M1) - forj(N2) - { - T dot = 0; - fork(N1) dot += A(i,k) * B(k,j); - C(i,j) = dot; - } - - return C; -} - -template -Vec operator*(const Mat& A,const Vec& u) -{ - Vec v; - - fori(M) - { - T dot = 0; - forj(N) dot += A(i,j) * u(j); - v(i) = dot; - } - - return v; -} - -template -Vec operator*(const Vec& u,const Mat& A) -{ - Vec v; - - forj(N) - { - T dot = 0; - fori(M) dot += A(i,j) * u(i); - v(j) = dot; - } - - return v; -} - -/* -template -Mat identity() -{ - Mat A; - forij(N,N) A(i,j) = ((i==j) ? 1 : 0); - return A; -} -*/ - -template -Mat transpose(const Mat& A) -{ - Mat At; - - forij(N,M) At(i,j) = A(j,i); - - return At; -} - -template -T trace(const Mat& A) -{ - T sum = 0; - - fori(N) sum += A(i,i); - - return sum; -} - -template -Mat inverse(const Mat& A) -{ - Mat invA; - - invA = A; - - Vec colIndex; - Vec rowIndex; - Vec pivoted; - - fori(N) pivoted(i) = false; - - int i1, i2, row = 0, col = 0; - T save; - - for (int i0 = 0; i0 < N; i0++) - { - T fMax = 0.0f; - for (i1 = 0; i1 < N; i1++) - { - if (!pivoted(i1)) - { - for (i2 = 0; i2 < N; i2++) - { - if (!pivoted(i2)) - { - T fs = abs(invA(i1,i2)); - if (fs > fMax) - { - fMax = fs; - row = i1; - col = i2; - } - } - } - } - } - - //assert(fmax > eps) - - pivoted(col) = true; - - if (row != col) - { - forj(N) { T tmp = invA(row,j); invA(row,j) = invA(col,j); invA(col,j) = tmp; } - } - - rowIndex(i0) = row; - colIndex(i0) = col; - - T inv = ((T)1.0)/invA(col,col); - invA(col,col) = (T)1.0; - for (i2 = 0; i2 < N; i2++) - { - invA(col,i2) *= inv; - } - - for (i1 = 0; i1 < N; i1++) - { - if (i1 != col) - { - save = invA(i1,col); - invA(i1,col) = (T)0.0; - for (i2 = 0; i2 < N; i2++) - { - invA(i1,i2) -= invA(col,i2)*save; - } - } - } - } - - for (i1 = N-1; i1 >= 0; i1--) - { - if (rowIndex(i1) != colIndex(i1)) - { - for (i2 = 0; i2 < N; i2++) - { - save = invA(i2,rowIndex(i1)); - invA(i2,rowIndex(i1)) = invA(i2,colIndex(i1)); - invA(i2,colIndex(i1)) = save; - } - } - } - - return invA; -} - -#undef fori -#undef forj -#undef fork - -template -Array2::Array2() : s(0,0),d(0) {} - -template -Array2::Array2(int width,int height) -{ - assert(width>0 && height>0); - s = Vec2i(width,height); - d = new T[s(0)*s(1)]; -} - -template -Array2::Array2(const Vec2i& size) -{ - // XXX: predelat na neco jako assert(all(s>0)); - assert(size(0)>0 && size(1)>0); - s = size; - d = new T[s(0)*s(1)]; -} - -template -Array2::Array2(const Array2& a) -{ - // printf("COPY CONSTRUCTOR\n"); - s = a.s; - - if (s(0)>0 && s(1)>0) - { - d = new T[s(0)*s(1)]; - - // XXX: optimize this: - for(int i=0;i -Array2& Array2::operator=(const Array2& a) -{ - // printf("ASSIGNMENT\n"); - // printf("slow copy\n"); - if (this!=&a) - { - if (s(0)==a.s(0) && s(1)==a.s(1)) - { - // XXX: optimize this: - for(int i=0;i0 && a.s(1)>0) - { - d = new T[s(0)*s(1)]; - //memcpy(d,a.d,numel()*sizeof(T)); //XXX this will break down when T is not POD !!! - // XXX: optimize this: - for(int i=0;i -Array2::~Array2() -{ - delete[] d; -} - -template -inline T& Array2::operator[](int i) -{ - assert(i>=0 && i -inline const T& Array2::operator[](int i) const -{ - assert(i>=0 && i -inline T& Array2::operator()(int i,int j) -{ - assert(d!=0); - assert(i>=0 && i=0 && j -inline const T& Array2::operator()(int i,int j) const -{ - assert(d!=0); - assert(i>=0 && i=0 && j -inline T& Array2::operator()(const Vec<2,int>& ij) -{ - assert(d!=0); - assert(ij(0)>=0 && ij(0)=0 && ij(1) -inline const T& Array2::operator()(const Vec<2,int>& ij) const -{ - assert(d!=0); - assert(ij(0)>=0 && ij(0)=0 && ij(1) -Vec2i Array2::size() const -{ - return s; -} - -template -int Array2::size(int dim) const -{ - assert(dim==0 || dim==1); - return size()(dim); -} - -template -int Array2::width() const -{ - return size(0); -} - -template -int Array2::height() const -{ - return size(1); -} - -template -int Array2::numel() const -{ - return size(0)*size(1); -} - -template -T* Array2::data() -{ - return d; -} - -template -const T* Array2::data() const -{ - return d; -} - -template -bool Array2::empty() const -{ - return (numel()==0); -} - -template -void Array2::clear() -{ - delete[] d; - s = Vec2i(0,0); - d = 0; -} - -template -void Array2::swap(Array2& b) -{ - Vec2i tmp_s = s; - s = b.s; - b.s = tmp_s; - - T* tmp_d = d; - d = b.d; - b.d = tmp_d; -} - -template -Vec2i size(const Array2& a) -{ - return a.size(); -} - -template -int size(const Array2& a,int dim) -{ - return a.size(dim); -} - -template -int numel(const Array2& a) -{ - return a.numel(); -} - -template -void clear(Array2* a) -{ - a->clear(); -} - -template -void swap(Array2& a,Array2& b) -{ - a.swap(b); -} - -template -T min(const Array2& a) -{ - assert(numel(a)>0); - - const int n = numel(a); - - const T* d = a.data(); - - T minval = d[0]; - - for(int i=1;i -T max(const Array2& a) -{ - assert(numel(a)>0); - - const int n = numel(a); - - const T* d = a.data(); - - T maxval = d[0]; - - for(int i=1;i -Vec<2,T> minmax(const Array2& a) -{ - assert(numel(a)>0); - - const int n = numel(a); - - const T* d = a.data(); - - T minval = d[0]; - T maxval = d[0]; - - for(int i=1;i(minval,maxval); -} - -template -Vec2i argmin(const Array2& a) -{ - assert(numel(a)>0); - - T minValue = a(0,0); - Vec2i minIndex = Vec2i(0,0); - - for(int j=0;j -Vec2i argmax(const Array2& a) -{ - assert(numel(a)>0); - - T maxValue = a(0,0); - Vec2i maxIndex = Vec2i(0,0); - - for(int j=0;j -T sum(const Array2& a) -{ - assert(numel(a)>0); - - const int n = numel(a); - - const T* d = a.data(); - - T sumval = d[0]; - - for(int i=1;i -void fill(Array2* a,const T& value) -{ - assert(a!=0); - assert(a->numel()>0); - - const int n = a->numel(); - T* d = a->data(); - - for(int i=0;i -Array2 apply(const Array2& a,F fun) -{ - assert(numel(a) > 0); - - Array2 fun_a(size(a)); - - const int n = numel(a); - - for(int i=0;i -Array3::Array3() : s(0,0,0),d(0) {} - -template -Array3::Array3(int width,int height,int depth) -{ - assert(width>0 && height>0 && depth>0); - s = Vec3i(width,height,depth); - d = new T[s(0)*s(1)*s(2)]; -} - -template -Array3::Array3(const Vec3i& size) -{ - // XXX: predelat na neco jako assert(all(s>0)); - assert(size(0)>0 && size(1)>0 && size(2)>0); - s = size; - d = new T[s(0)*s(1)*s(2)]; -} - -template -Array3::Array3(const Array3& a) -{ - // printf("COPY CONSTRUCTOR\n"); - s = a.s; - - if (s(0)>0 && s(1)>0 && s(2)>0) - { - d = new T[s(0)*s(1)*s(2)]; - - // XXX: optimize this: - for(int i=0;i -Array3& Array3::operator=(const Array3& a) -{ - // printf("ASSIGNMENT\n"); - // printf("slow copy\n"); - if (this!=&a) - { - if (s(0)==a.s(0) && s(1)==a.s(1) && s(2)==a.s(2)) - { - // XXX: optimize this: - for(int i=0;i0 && a.s(1)>0 && a.s(2)>0) - { - d = new T[s(0)*s(1)*s(2)]; - // XXX: optimize this: - for(int i=0;i -Array3::~Array3() -{ - delete[] d; -} - -template -inline T& Array3::operator[](int i) -{ - assert(i>=0 && i -inline const T& Array3::operator[](int i) const -{ - assert(i>=0 && i -inline T& Array3::operator()(int i,int j,int k) -{ - assert(d!=0); - assert(i>=0 && i=0 && j=0 && k -inline const T& Array3::operator()(int i,int j,int k) const -{ - assert(d!=0); - assert(i>=0 && i=0 && j=0 && k -inline T& Array3::operator()(const Vec<3,int>& ijk) -{ - assert(d!=0); - assert(ijk(0)>=0 && ijk(0)=0 && ijk(1)=0 && ijk(2) -inline const T& Array3::operator()(const Vec<3,int>& ijk) const -{ - assert(d!=0); - assert(ijk(0)>=0 && ijk(0)=0 && ijk(1)=0 && ijk(2) -Vec3i Array3::size() const -{ - return s; -} - -template -int Array3::size(int dim) const -{ - assert(dim==0 || dim==1 || dim==2); - return size()(dim); -} - -template -int Array3::width() const -{ - return size(0); -} - -template -int Array3::height() const -{ - return size(1); -} - -template -int Array3::depth() const -{ - return size(2); -} - -template -int Array3::numel() const -{ - return size(0)*size(1)*size(2); -} - -template -T* Array3::data() -{ - return d; -} - -template -const T* Array3::data() const -{ - return d; -} - -template -void Array3::clear() -{ - delete[] d; - s = Vec3i(0,0,0); - d = 0; -} - -template -void Array3::swap(Array3& b) -{ - Vec3i tmp_s = s; - s = b.s; - b.s = tmp_s; - - T* tmp_d = d; - d = b.d; - b.d = tmp_d; -} - -template -Vec3i size(const Array3& a) -{ - return a.size(); -} - -template -int size(const Array3& a,int dim) -{ - return a.size(dim); -} - -template -int numel(const Array3& a) -{ - return a.numel(); -} - -template -void clear(Array3* a) -{ - a->clear(); -} - -template -void swap(Array3& a,Array3& b) -{ - a.swap(b); -} - -#endif +// This software is in the public domain. Where that dedication is not +// recognized, you are granted a perpetual, irrevocable license to copy +// and modify this file as you see fit. + +#ifndef JZQ_H_ +#define JZQ_H_ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __CUDACC__ + #define JZQ_DECORATOR __host__ __device__ +#else + #define JZQ_DECORATOR +#endif + +template struct zero { static JZQ_DECORATOR T value(); }; + +template JZQ_DECORATOR inline T clamp(const T& x,const T& xmin,const T& xmax); +template JZQ_DECORATOR inline T lerp(const T& a,const T& b,float t); + +inline std::string spf(const std::string fmt,...); + +template +struct Vec +{ + T v[N]; + + JZQ_DECORATOR Vec(); + template JZQ_DECORATOR explicit Vec(const Vec& u); + explicit JZQ_DECORATOR Vec(T v0); + + JZQ_DECORATOR Vec(T v0,T v1); + JZQ_DECORATOR Vec(T v0,T v1,T v2); + JZQ_DECORATOR Vec(T v0,T v1,T v2,T v3); + JZQ_DECORATOR Vec(T v0,T v1,T v2,T v3,T v4); + JZQ_DECORATOR Vec(T v0,T v1,T v2,T v3,T v4,T v5); + + JZQ_DECORATOR T& operator()(int i); + JZQ_DECORATOR const T& operator()(int i) const; + JZQ_DECORATOR T& operator[](int i); + JZQ_DECORATOR const T& operator[](int i) const; + + JZQ_DECORATOR Vec operator*=(const Vec& u); + JZQ_DECORATOR Vec operator+=(const Vec& u); + + JZQ_DECORATOR Vec operator*=(T s); + JZQ_DECORATOR Vec operator+=(T s); +}; + +template Vec JZQ_DECORATOR operator-(const Vec& u); +template Vec JZQ_DECORATOR operator+(const Vec& u,const Vec& v); +template Vec JZQ_DECORATOR operator-(const Vec& u,const Vec& v); +template Vec JZQ_DECORATOR operator-(const Vec& u,const T v); +template Vec JZQ_DECORATOR operator*(const Vec& u,const Vec& v); +template Vec JZQ_DECORATOR operator/(const Vec& u,const Vec& v); +template Vec JZQ_DECORATOR operator*(const T s,const Vec& u); +template Vec JZQ_DECORATOR operator*(const Vec& u,const T s); +template Vec JZQ_DECORATOR operator/(const Vec& u,const T s); + +template Vec JZQ_DECORATOR operator<(const Vec& u,const Vec& v); +template Vec JZQ_DECORATOR operator>(const Vec& u,const Vec& v); +template Vec JZQ_DECORATOR operator<=(const Vec& u,const Vec& v); +template Vec JZQ_DECORATOR operator>=(const Vec& u,const Vec& v); +template Vec JZQ_DECORATOR operator==(const Vec& u,const Vec& v); +template Vec JZQ_DECORATOR operator!=(const Vec& u,const Vec& v); + +template JZQ_DECORATOR inline T dot(const Vec& u,const Vec& v); +template JZQ_DECORATOR inline T cross(const Vec<2,T> &a,const Vec<2,T> &b); +template JZQ_DECORATOR inline Vec<3,T> cross(const Vec<3,T> &a,const Vec<3,T> &b); +template JZQ_DECORATOR inline T norm(const Vec& u); +template JZQ_DECORATOR inline Vec normalize(const Vec& u); +template JZQ_DECORATOR inline T min(const Vec& u); +template JZQ_DECORATOR inline T max(const Vec& u); +template JZQ_DECORATOR inline T sum(const Vec& u); +namespace std +{ +template inline Vec min(const Vec& u,const Vec& v); +template inline Vec max(const Vec& u,const Vec& v); +} +template inline Vec abs(const Vec& x); + +template inline bool any(const Vec& u); +template inline bool all(const Vec& u); + +template +struct Mat +{ + T m[M][N]; + + Mat(); + + Mat(T a00,T a01, + T a10,T a11); + + Mat(T a00,T a01,T a02, + T a10,T a11,T a12, + T a20,T a21,T a22); + + Mat(T a00,T a01,T a02,T a03, + T a10,T a11,T a12,T a13, + T a20,T a21,T a22,T a23, + T a30,T a31,T a32,T a33); + + T& operator()(int i,int j); + const T& operator()(int i,int j) const; + + T* data(); + const T* data() const; +}; + +template Mat operator*(const Mat& A,const Mat& B); + +template Vec operator*(const Mat& A,const Vec& u); +template Vec operator*(const Vec& u,const Mat& A); + +template Mat transpose(const Mat& A); +template T trace(const Mat& A); +template Mat inverse(const Mat& A); + +template +class Array2 +{ +public: + Array2(); + Array2(int width,int height); + explicit Array2(const Vec<2,int>& size); + Array2(const Array2& a); + ~Array2(); + + Array2& operator=(const Array2& a); + + inline T& operator[](int i); + inline const T& operator[](int i) const; + inline T& operator()(int i,int j); + inline const T& operator()(int i,int j) const; + inline T& operator()(const Vec<2,int>& ij); + inline const T& operator()(const Vec<2,int>& ij) const; + + Vec<2,int> size() const; + int size(int dim) const; + int width() const; + int height() const; + int numel() const; + T* data(); + const T* data() const; + void clear(); + void swap(Array2& b); + bool empty() const; + +private: + Vec<2,int> s; + T* d; +}; + +template Vec<2,int> size(const Array2& a); +template int size(const Array2& a,int dim); +template int numel(const Array2& a); +template void clear(Array2* a); +template void swap(Array2& a,Array2& b); +template T min(const Array2& a); +template T max(const Array2& a); +template Vec<2,T> minmax(const Array2& a); +template Vec<2,int> argmin(const Array2& a); +template Vec<2,int> argmax(const Array2& a); +template T sum(const Array2& a); +template void fill(Array2* a,const T& value); + +template Array2 apply(const Array2& a,F fun); + +template +class Array3 +{ +public: + Array3(); + explicit Array3(const Vec<3,int>& size); + Array3(int width,int height,int depth); + Array3(const Array3& a); + ~Array3(); + + Array3& operator=(const Array3& a); + + inline T& operator[](int i); + inline const T& operator[](int i) const; + inline T& operator()(int i,int j,int k); + inline const T& operator()(int i,int j,int k) const; + inline T& operator()(const Vec<3,int>& ijk); + inline const T& operator()(const Vec<3,int>& ijk) const; + + Vec<3,int> size() const; + int size(int dim) const; + int width() const; + int height() const; + int depth() const; + int numel() const; + T* data(); + const T* data() const; + void clear(); + void swap(Array3& b); + bool empty() const; + +private: + Vec<3,int> s; + T* d; +}; + +template Vec<3,int> size(const Array3& a); +template int size(const Array3& a,int dim); +template int numel(const Array3& a); +template void clear(Array3* a); +template void swap(Array3& a,Array3& b); + +typedef Vec<2,double> Vec2d; +typedef Vec<2,float> Vec2f; +typedef Vec<2,int> Vec2i; +typedef Vec<2,unsigned int> Vec2ui; +typedef Vec<2,short> Vec2s; +typedef Vec<2,unsigned short> Vec2us; +typedef Vec<2,char> Vec2c; +typedef Vec<2,unsigned char> Vec2uc; + +typedef Vec<3,double> Vec3d; +typedef Vec<3,float> Vec3f; +typedef Vec<3,int> Vec3i; +typedef Vec<3,unsigned int> Vec3ui; +typedef Vec<3,short> Vec3s; +typedef Vec<3,unsigned short> Vec3us; +typedef Vec<3,char> Vec3c; +typedef Vec<3,unsigned char> Vec3uc; + +typedef Vec<4,double> Vec4d; +typedef Vec<4,float> Vec4f; +typedef Vec<4,int> Vec4i; +typedef Vec<4,unsigned int> Vec4ui; +typedef Vec<4,short> Vec4s; +typedef Vec<4,unsigned short> Vec4us; +typedef Vec<4,char> Vec4c; +typedef Vec<4,unsigned char> Vec4uc; + +typedef Vec<5,double> Vec5d; +typedef Vec<5,float> Vec5f; +typedef Vec<5,int> Vec5i; +typedef Vec<5,unsigned int> Vec5ui; +typedef Vec<5,short> Vec5s; +typedef Vec<5,unsigned short> Vec5us; +typedef Vec<5,char> Vec5c; +typedef Vec<5,unsigned char> Vec5uc; + +typedef Vec<6,double> Vec6d; +typedef Vec<6,float> Vec6f; +typedef Vec<6,int> Vec6i; +typedef Vec<6,unsigned int> Vec6ui; +typedef Vec<6,short> Vec6s; +typedef Vec<6,unsigned short> Vec6us; +typedef Vec<6,char> Vec6c; +typedef Vec<6,unsigned char> Vec6uc; + +typedef Vec<2,double> V2d; +typedef Vec<2,float> V2f; +typedef Vec<2,int> V2i; +typedef Vec<2,unsigned int> V2ui; +typedef Vec<2,short> V2s; +typedef Vec<2,unsigned short> V2us; +typedef Vec<2,char> V2c; +typedef Vec<2,unsigned char> V2uc; + +typedef Vec<3,double> V3d; +typedef Vec<3,float> V3f; +typedef Vec<3,int> V3i; +typedef Vec<3,unsigned int> V3ui; +typedef Vec<3,short> V3s; +typedef Vec<3,unsigned short> V3us; +typedef Vec<3,char> V3c; +typedef Vec<3,unsigned char> V3uc; + +typedef Vec<4,double> V4d; +typedef Vec<4,float> V4f; +typedef Vec<4,int> V4i; +typedef Vec<4,unsigned int> V4ui; +typedef Vec<4,short> V4s; +typedef Vec<4,unsigned short> V4us; +typedef Vec<4,char> V4c; +typedef Vec<4,unsigned char> V4uc; + +typedef Vec<5,double> V5d; +typedef Vec<5,float> V5f; +typedef Vec<5,int> V5i; +typedef Vec<5,unsigned int> V5ui; +typedef Vec<5,short> V5s; +typedef Vec<5,unsigned short> V5us; +typedef Vec<5,char> V5c; +typedef Vec<5,unsigned char> V5uc; + +typedef Vec<6,double> V6d; +typedef Vec<6,float> V6f; +typedef Vec<6,int> V6i; +typedef Vec<6,unsigned int> V6ui; +typedef Vec<6,short> V6s; +typedef Vec<6,unsigned short> V6us; +typedef Vec<6,char> V6c; +typedef Vec<6,unsigned char> V6uc; + +typedef Mat<2,2,float> Mat2x2f; +typedef Mat<2,3,float> Mat2x3f; +typedef Mat<2,4,float> Mat2x4f; +typedef Mat<2,5,float> Mat2x5f; +typedef Mat<2,6,float> Mat2x6f; +typedef Mat<2,7,float> Mat2x7f; +typedef Mat<2,8,float> Mat2x8f; +typedef Mat<3,2,float> Mat3x2f; +typedef Mat<3,3,float> Mat3x3f; +typedef Mat<3,4,float> Mat3x4f; +typedef Mat<3,5,float> Mat3x5f; +typedef Mat<3,6,float> Mat3x6f; +typedef Mat<3,7,float> Mat3x7f; +typedef Mat<3,8,float> Mat3x8f; +typedef Mat<4,2,float> Mat4x2f; +typedef Mat<4,3,float> Mat4x3f; +typedef Mat<4,4,float> Mat4x4f; +typedef Mat<4,5,float> Mat4x5f; +typedef Mat<4,6,float> Mat4x6f; +typedef Mat<4,7,float> Mat4x7f; +typedef Mat<4,8,float> Mat4x8f; +typedef Mat<5,2,float> Mat5x2f; +typedef Mat<5,3,float> Mat5x3f; +typedef Mat<5,4,float> Mat5x4f; +typedef Mat<5,5,float> Mat5x5f; +typedef Mat<5,6,float> Mat5x6f; +typedef Mat<5,7,float> Mat5x7f; +typedef Mat<5,8,float> Mat5x8f; +typedef Mat<6,2,float> Mat6x2f; +typedef Mat<6,3,float> Mat6x3f; +typedef Mat<6,4,float> Mat6x4f; +typedef Mat<6,5,float> Mat6x5f; +typedef Mat<6,6,float> Mat6x6f; +typedef Mat<6,7,float> Mat6x7f; +typedef Mat<6,8,float> Mat6x8f; +typedef Mat<7,2,float> Mat7x2f; +typedef Mat<7,3,float> Mat7x3f; +typedef Mat<7,4,float> Mat7x4f; +typedef Mat<7,5,float> Mat7x5f; +typedef Mat<7,6,float> Mat7x6f; +typedef Mat<7,7,float> Mat7x7f; +typedef Mat<7,8,float> Mat7x8f; +typedef Mat<8,2,float> Mat8x2f; +typedef Mat<8,3,float> Mat8x3f; +typedef Mat<8,4,float> Mat8x4f; +typedef Mat<8,5,float> Mat8x5f; +typedef Mat<8,6,float> Mat8x6f; +typedef Mat<8,7,float> Mat8x7f; +typedef Mat<8,8,float> Mat8x8f; + +typedef Mat<2,2,double> Mat2x2d; +typedef Mat<2,3,double> Mat2x3d; +typedef Mat<2,4,double> Mat2x4d; +typedef Mat<2,5,double> Mat2x5d; +typedef Mat<2,6,double> Mat2x6d; +typedef Mat<2,7,double> Mat2x7d; +typedef Mat<2,8,double> Mat2x8d; +typedef Mat<3,2,double> Mat3x2d; +typedef Mat<3,3,double> Mat3x3d; +typedef Mat<3,4,double> Mat3x4d; +typedef Mat<3,5,double> Mat3x5d; +typedef Mat<3,6,double> Mat3x6d; +typedef Mat<3,7,double> Mat3x7d; +typedef Mat<3,8,double> Mat3x8d; +typedef Mat<4,2,double> Mat4x2d; +typedef Mat<4,3,double> Mat4x3d; +typedef Mat<4,4,double> Mat4x4d; +typedef Mat<4,5,double> Mat4x5d; +typedef Mat<4,6,double> Mat4x6d; +typedef Mat<4,7,double> Mat4x7d; +typedef Mat<4,8,double> Mat4x8d; +typedef Mat<5,2,double> Mat5x2d; +typedef Mat<5,3,double> Mat5x3d; +typedef Mat<5,4,double> Mat5x4d; +typedef Mat<5,5,double> Mat5x5d; +typedef Mat<5,6,double> Mat5x6d; +typedef Mat<5,7,double> Mat5x7d; +typedef Mat<5,8,double> Mat5x8d; +typedef Mat<6,2,double> Mat6x2d; +typedef Mat<6,3,double> Mat6x3d; +typedef Mat<6,4,double> Mat6x4d; +typedef Mat<6,5,double> Mat6x5d; +typedef Mat<6,6,double> Mat6x6d; +typedef Mat<6,7,double> Mat6x7d; +typedef Mat<6,8,double> Mat6x8d; +typedef Mat<7,2,double> Mat7x2d; +typedef Mat<7,3,double> Mat7x3d; +typedef Mat<7,4,double> Mat7x4d; +typedef Mat<7,5,double> Mat7x5d; +typedef Mat<7,6,double> Mat7x6d; +typedef Mat<7,7,double> Mat7x7d; +typedef Mat<7,8,double> Mat7x8d; +typedef Mat<8,2,double> Mat8x2d; +typedef Mat<8,3,double> Mat8x3d; +typedef Mat<8,4,double> Mat8x4d; +typedef Mat<8,5,double> Mat8x5d; +typedef Mat<8,6,double> Mat8x6d; +typedef Mat<8,7,double> Mat8x7d; +typedef Mat<8,8,double> Mat8x8d; + +typedef Array2 Array2d; +typedef Array2 Array2f; +typedef Array2 Array2i; +typedef Array2 Array2ui; +typedef Array2 Array2s; +typedef Array2 Array2us; +typedef Array2 Array2c; +typedef Array2 Array2uc; + +typedef Array2< Vec<2,double> > Array2V2d; +typedef Array2< Vec<2,float> > Array2V2f; +typedef Array2< Vec<2,int> > Array2V2i; +typedef Array2< Vec<2,unsigned int> > Array2V2ui; +typedef Array2< Vec<2,short> > Array2V2s; +typedef Array2< Vec<2,unsigned short> > Array2V2us; +typedef Array2< Vec<2,char> > Array2V2c; +typedef Array2< Vec<2,unsigned char> > Array2V2uc; + +typedef Array2< Vec<3,double> > Array2V3d; +typedef Array2< Vec<3,float> > Array2V3f; +typedef Array2< Vec<3,int> > Array2V3i; +typedef Array2< Vec<3,unsigned int> > Array2V3ui; +typedef Array2< Vec<3,short> > Array2V3s; +typedef Array2< Vec<3,unsigned short> > Array2V3us; +typedef Array2< Vec<3,char> > Array2V3c; +typedef Array2< Vec<3,unsigned char> > Array2V3uc; + +typedef Array2< Vec<4,double> > Array2V4d; +typedef Array2< Vec<4,float> > Array2V4f; +typedef Array2< Vec<4,int> > Array2V4i; +typedef Array2< Vec<4,unsigned int> > Array2V4ui; +typedef Array2< Vec<4,short> > Array2V4s; +typedef Array2< Vec<4,unsigned short> > Array2V4us; +typedef Array2< Vec<4,char> > Array2V4c; +typedef Array2< Vec<4,unsigned char> > Array2V4uc; + +typedef Array2 A2d; +typedef Array2 A2f; +typedef Array2 A2i; +typedef Array2 A2ui; +typedef Array2 A2s; +typedef Array2 A2us; +typedef Array2 A2c; +typedef Array2 A2uc; + +typedef Array2< Vec<2,double> > A2V2d; +typedef Array2< Vec<2,float> > A2V2f; +typedef Array2< Vec<2,int> > A2V2i; +typedef Array2< Vec<2,unsigned int> > A2V2ui; +typedef Array2< Vec<2,short> > A2V2s; +typedef Array2< Vec<2,unsigned short> > A2V2us; +typedef Array2< Vec<2,char> > A2V2c; +typedef Array2< Vec<2,unsigned char> > A2V2uc; + +typedef Array2< Vec<3,double> > A2V3d; +typedef Array2< Vec<3,float> > A2V3f; +typedef Array2< Vec<3,int> > A2V3i; +typedef Array2< Vec<3,unsigned int> > A2V3ui; +typedef Array2< Vec<3,short> > A2V3s; +typedef Array2< Vec<3,unsigned short> > A2V3us; +typedef Array2< Vec<3,char> > A2V3c; +typedef Array2< Vec<3,unsigned char> > A2V3uc; + +typedef Array2< Vec<4,double> > A2V4d; +typedef Array2< Vec<4,float> > A2V4f; +typedef Array2< Vec<4,int> > A2V4i; +typedef Array2< Vec<4,unsigned int> > A2V4ui; +typedef Array2< Vec<4,short> > A2V4s; +typedef Array2< Vec<4,unsigned short> > A2V4us; +typedef Array2< Vec<4,char> > A2V4c; +typedef Array2< Vec<4,unsigned char> > A2V4uc; + +typedef Array3 Array3d; +typedef Array3 Array3f; +typedef Array3 Array3i; +typedef Array3 Array3ui; +typedef Array3 Array3s; +typedef Array3 Array3us; +typedef Array3 Array3c; +typedef Array3 Array3uc; + +typedef Array3< Vec<2,double> > Array3V2d; +typedef Array3< Vec<2,float> > Array3V2f; +typedef Array3< Vec<2,int> > Array3V2i; +typedef Array3< Vec<2,unsigned int> > Array3V2ui; +typedef Array3< Vec<2,short> > Array3V2s; +typedef Array3< Vec<2,unsigned short> > Array3V2us; +typedef Array3< Vec<2,char> > Array3V2c; +typedef Array3< Vec<2,unsigned char> > Array3V2uc; + +typedef Array3< Vec<3,double> > Array3V3d; +typedef Array3< Vec<3,float> > Array3V3f; +typedef Array3< Vec<3,int> > Array3V3i; +typedef Array3< Vec<3,unsigned int> > Array3V3ui; +typedef Array3< Vec<3,short> > Array3V3s; +typedef Array3< Vec<3,unsigned short> > Array3V3us; +typedef Array3< Vec<3,char> > Array3V3c; +typedef Array3< Vec<3,unsigned char> > Array3V3uc; + +typedef Array3< Vec<4,double> > Array3V4d; +typedef Array3< Vec<4,float> > Array3V4f; +typedef Array3< Vec<4,int> > Array3V4i; +typedef Array3< Vec<4,unsigned int> > Array3V4ui; +typedef Array3< Vec<4,short> > Array3V4s; +typedef Array3< Vec<4,unsigned short> > Array3V4us; +typedef Array3< Vec<4,char> > Array3V4c; +typedef Array3< Vec<4,unsigned char> > Array3V4uc; + +typedef Array3 A3d; +typedef Array3 A3f; +typedef Array3 A3i; +typedef Array3 A3ui; +typedef Array3 A3s; +typedef Array3 A3us; +typedef Array3 A3c; +typedef Array3 A3uc; + +typedef Array3< Vec<2,double> > A3V2d; +typedef Array3< Vec<2,float> > A3V2f; +typedef Array3< Vec<2,int> > A3V2i; +typedef Array3< Vec<2,unsigned int> > A3V2ui; +typedef Array3< Vec<2,short> > A3V2s; +typedef Array3< Vec<2,unsigned short> > A3V2us; +typedef Array3< Vec<2,char> > A3V2c; +typedef Array3< Vec<2,unsigned char> > A3V2uc; + +typedef Array3< Vec<3,double> > A3V3d; +typedef Array3< Vec<3,float> > A3V3f; +typedef Array3< Vec<3,int> > A3V3i; +typedef Array3< Vec<3,unsigned int> > A3V3ui; +typedef Array3< Vec<3,short> > A3V3s; +typedef Array3< Vec<3,unsigned short> > A3V3us; +typedef Array3< Vec<3,char> > A3V3c; +typedef Array3< Vec<3,unsigned char> > A3V3uc; + +typedef Array3< Vec<4,double> > A3V4d; +typedef Array3< Vec<4,float> > A3V4f; +typedef Array3< Vec<4,int> > A3V4i; +typedef Array3< Vec<4,unsigned int> > A3V4ui; +typedef Array3< Vec<4,short> > A3V4s; +typedef Array3< Vec<4,unsigned short> > A3V4us; +typedef Array3< Vec<4,char> > A3V4c; +typedef Array3< Vec<4,unsigned char> > A3V4uc; + +template<> struct zero { static JZQ_DECORATOR char value() { return 0; } }; +template<> struct zero { static JZQ_DECORATOR unsigned char value() { return 0; } }; +template<> struct zero { static JZQ_DECORATOR short value() { return 0; } }; +template<> struct zero { static JZQ_DECORATOR unsigned short value() { return 0; } }; +template<> struct zero { static JZQ_DECORATOR int value() { return 0; } }; +template<> struct zero { static JZQ_DECORATOR unsigned int value() { return 0; } }; +template<> struct zero { static JZQ_DECORATOR float value() { return 0.0f; } }; +template<> struct zero { static JZQ_DECORATOR double value() { return 0.0; } }; + +template +struct zero> +{ + static JZQ_DECORATOR Vec value() + { + Vec z; + for(int i=0;i::value(); } + return z; + } +}; + +template +struct zero> +{ + static JZQ_DECORATOR Mat value() + { + Mat z; + for(int i=0;i::value(); + } + return z; + } +}; + +template JZQ_DECORATOR inline +T clamp(const T& x,const T& xmin,const T& xmax) +{ + return std::min(std::max(x,xmin),xmax); +} + +template JZQ_DECORATOR inline +T lerp(const T& a,const T& b,float t) +{ + return (1.0f-t)*a+t*b; +} + +inline std::string spf(const std::string fmt,...) +{ + int size = 1024; + std::vector buf; + va_list ap; + + while(1) + { + if(size>16*1024*1024) { return std::string(""); } + + buf.resize(size); + + va_start(ap,fmt); + const int n = vsnprintf(&buf[0],size-1,fmt.c_str(),ap); + va_end(ap); + + if(n>-1 && n < size) + { + break; + } + else if(n>-1) + { + size = n + 1; + } + else + { + size = 2*size; + } + } + + return std::string(&buf[0]); +} + +template +JZQ_DECORATOR +Vec::Vec() +{ +} + +template +JZQ_DECORATOR +Vec::Vec(T v0) +{ + assert(N==1); + v[0]=v0; +} + +template +JZQ_DECORATOR +Vec::Vec(T v0,T v1) +{ + assert(N==2); + v[0]=v0; v[1]=v1; +} + +template +JZQ_DECORATOR +Vec::Vec(T v0,T v1,T v2) +{ + assert(N==3); + v[0]=v0; v[1]=v1; v[2]=v2; +} + +template +JZQ_DECORATOR +Vec::Vec(T v0,T v1,T v2,T v3) +{ + assert(N==4); + v[0]=v0; v[1]=v1; v[2]=v2; v[3]=v3; +} + +template +JZQ_DECORATOR +Vec::Vec(T v0,T v1,T v2,T v3,T v4) +{ + assert(N==5); + v[0]=v0; v[1]=v1; v[2]=v2; v[3]=v3; v[4]=v4; +} + +template +JZQ_DECORATOR +Vec::Vec(T v0,T v1,T v2,T v3,T v4,T v5) +{ + assert(N==6); + v[0]=v0; v[1]=v1; v[2]=v2; v[3]=v3; v[4]=v4; v[5]=v5; +} + +template template +JZQ_DECORATOR +Vec::Vec(const Vec& u) +{ + for(int i=0;i(u.v[i]); + } +} + +template +JZQ_DECORATOR +T& Vec::operator()(int i) +{ + assert(i>=0 && i +JZQ_DECORATOR +const T& Vec::operator()(int i) const +{ + assert(i>=0 && i +JZQ_DECORATOR +T& Vec::operator[](int i) +{ + assert(i>=0 && i +JZQ_DECORATOR +const T& Vec::operator[](int i) const +{ + assert(i>=0 && i +JZQ_DECORATOR +Vec Vec::operator*=(const Vec& u) +{ + for(int i=0;i +JZQ_DECORATOR +Vec Vec::operator+=(const Vec& u) +{ + for(int i=0;i +JZQ_DECORATOR +Vec Vec::operator*=(T s) +{ + for(int i=0;i +JZQ_DECORATOR +Vec Vec::operator+=(T s) +{ + for(int i=0;i +JZQ_DECORATOR +Vec operator-(const Vec& u) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator+(const Vec& u,const Vec& v) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator-(const Vec& u,const Vec& v) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator-(const Vec& u,const T v) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator*(const Vec& u,const Vec& v) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator/(const Vec& u,const Vec& v) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator*(const T s,const Vec& u) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator*(const Vec& u,const T s) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator/(const Vec& u,const T s) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator<(const Vec& u,const Vec& v) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator>(const Vec& u,const Vec& v) +{ + Vec r; + for(int i=0;iv(i); + return r; +} + +template +JZQ_DECORATOR +Vec operator<=(const Vec& u,const Vec& v) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator>=(const Vec& u,const Vec& v) +{ + Vec r; + for(int i=0;i=v(i); + return r; +} + +template +JZQ_DECORATOR +Vec operator==(const Vec& u,const Vec& v) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR +Vec operator!=(const Vec& u,const Vec& v) +{ + Vec r; + for(int i=0;i +JZQ_DECORATOR inline T dot(const Vec& u,const Vec& v) +{ + assert(N>0); + T sumprod = u(0)*v(0); + for(int i=1;i +JZQ_DECORATOR inline T cross(const Vec<2,T> &a,const Vec<2,T> &b) +{ + return a[0]*b[1]-a[1]*b[0]; +} + +template +JZQ_DECORATOR inline Vec<3,T> cross(const Vec<3,T> &a,const Vec<3,T> &b) +{ + return Vec<3,T>(a[1]*b[2]-a[2]*b[1], + a[2]*b[0]-a[0]*b[2], + a[0]*b[1]-a[1]*b[0]); +} + +template +JZQ_DECORATOR inline T norm(const Vec& u) +{ + return std::sqrt(dot(u,u)); +} + +template +JZQ_DECORATOR inline Vec normalize(const Vec& u) +{ + return u/norm(u); +} + +template +JZQ_DECORATOR inline bool any(const Vec& u) +{ + for(int i=0;i +JZQ_DECORATOR inline bool all(const Vec& u) +{ + for(int i=0;i +JZQ_DECORATOR inline T min(const Vec& u) +{ + assert(N>0); + + T minval = u(0); + + for(int i=1;i +JZQ_DECORATOR inline T max(const Vec& u) +{ + assert(N>0); + + T maxval = u(0); + + for(int i=1;i maxval) + { + maxval = u(i); + } + } + + return maxval; +} + +template +JZQ_DECORATOR inline T sum(const Vec& u) +{ + assert(N>0); + + T sumval = u(0); + + for(int i=1;i Vec +inline min(const Vec& u,const Vec& v) +{ + assert(N>0); + + Vec w; + + for(int i=0;i Vec +inline max(const Vec& u,const Vec& v) +{ + assert(N>0); + + Vec w; + + for(int i=0;i Vec +inline abs(const Vec& x) +{ + Vec out; + for(int i=0;i +Mat::Mat() {} + +template +Mat::Mat(T a00,T a01, + T a10,T a11) +{ + assert(M==2 && N==2); + + m[0][0] = a00; m[0][1] = a01; + m[1][0] = a10; m[1][1] = a11; +} + +template +Mat::Mat(T a00,T a01,T a02, + T a10,T a11,T a12, + T a20,T a21,T a22) +{ + assert(M==3 && N==3); + + m[0][0] = a00; m[0][1] = a01; m[0][2] = a02; + m[1][0] = a10; m[1][1] = a11; m[1][2] = a12; + m[2][0] = a20; m[2][1] = a21; m[2][2] = a22; +} + +template +Mat::Mat(T a00,T a01,T a02,T a03, + T a10,T a11,T a12,T a13, + T a20,T a21,T a22,T a23, + T a30,T a31,T a32,T a33) +{ + assert(M==4 && N==4); + + m[0][0] = a00; m[0][1] = a01; m[0][2] = a02; m[0][3] = a03; + m[1][0] = a10; m[1][1] = a11; m[1][2] = a12; m[1][3] = a13; + m[2][0] = a20; m[2][1] = a21; m[2][2] = a22; m[2][3] = a23; + m[3][0] = a30; m[3][1] = a31; m[3][2] = a32; m[3][3] = a33; +} + +template +T& Mat::operator()(int i,int j) +{ + assert(0<=i && i +const T& Mat::operator()(int i,int j) const +{ + assert(0<=i && i +T* Mat::data() +{ + return (T*)(&m[0][0]); +} + +template +const T* Mat::data() const +{ + return (T*)(&m[0][0]); +} + +template +Mat operator*(const Mat& A,const Mat& B) +{ + assert(N1==M2); + Mat C; + + fori(M1) + forj(N2) + { + T dot = 0; + fork(N1) dot += A(i,k) * B(k,j); + C(i,j) = dot; + } + + return C; +} + +template +Vec operator*(const Mat& A,const Vec& u) +{ + Vec v; + + fori(M) + { + T dot = 0; + forj(N) dot += A(i,j) * u(j); + v(i) = dot; + } + + return v; +} + +template +Vec operator*(const Vec& u,const Mat& A) +{ + Vec v; + + forj(N) + { + T dot = 0; + fori(M) dot += A(i,j) * u(i); + v(j) = dot; + } + + return v; +} + +/* +template +Mat identity() +{ + Mat A; + forij(N,N) A(i,j) = ((i==j) ? 1 : 0); + return A; +} +*/ + +template +Mat transpose(const Mat& A) +{ + Mat At; + + forij(N,M) At(i,j) = A(j,i); + + return At; +} + +template +T trace(const Mat& A) +{ + T sum = 0; + + fori(N) sum += A(i,i); + + return sum; +} + +template +Mat inverse(const Mat& A) +{ + Mat invA; + + invA = A; + + Vec colIndex; + Vec rowIndex; + Vec pivoted; + + fori(N) pivoted(i) = false; + + int i1, i2, row = 0, col = 0; + T save; + + for (int i0 = 0; i0 < N; i0++) + { + T fMax = 0.0f; + for (i1 = 0; i1 < N; i1++) + { + if (!pivoted(i1)) + { + for (i2 = 0; i2 < N; i2++) + { + if (!pivoted(i2)) + { + T fs = abs(invA(i1,i2)); + if (fs > fMax) + { + fMax = fs; + row = i1; + col = i2; + } + } + } + } + } + + //assert(fmax > eps) + + pivoted(col) = true; + + if (row != col) + { + forj(N) { T tmp = invA(row,j); invA(row,j) = invA(col,j); invA(col,j) = tmp; } + } + + rowIndex(i0) = row; + colIndex(i0) = col; + + T inv = ((T)1.0)/invA(col,col); + invA(col,col) = (T)1.0; + for (i2 = 0; i2 < N; i2++) + { + invA(col,i2) *= inv; + } + + for (i1 = 0; i1 < N; i1++) + { + if (i1 != col) + { + save = invA(i1,col); + invA(i1,col) = (T)0.0; + for (i2 = 0; i2 < N; i2++) + { + invA(i1,i2) -= invA(col,i2)*save; + } + } + } + } + + for (i1 = N-1; i1 >= 0; i1--) + { + if (rowIndex(i1) != colIndex(i1)) + { + for (i2 = 0; i2 < N; i2++) + { + save = invA(i2,rowIndex(i1)); + invA(i2,rowIndex(i1)) = invA(i2,colIndex(i1)); + invA(i2,colIndex(i1)) = save; + } + } + } + + return invA; +} + +#undef fori +#undef forj +#undef fork + +template +Array2::Array2() : s(0,0),d(0) {} + +template +Array2::Array2(int width,int height) +{ + assert(width>0 && height>0); + s = Vec2i(width,height); + d = new T[s(0)*s(1)]; +} + +template +Array2::Array2(const Vec2i& size) +{ + // XXX: predelat na neco jako assert(all(s>0)); + assert(size(0)>0 && size(1)>0); + s = size; + d = new T[s(0)*s(1)]; +} + +template +Array2::Array2(const Array2& a) +{ + // printf("COPY CONSTRUCTOR\n"); + s = a.s; + + if (s(0)>0 && s(1)>0) + { + d = new T[s(0)*s(1)]; + + // XXX: optimize this: + for(int i=0;i +Array2& Array2::operator=(const Array2& a) +{ + // printf("ASSIGNMENT\n"); + // printf("slow copy\n"); + if (this!=&a) + { + if (s(0)==a.s(0) && s(1)==a.s(1)) + { + // XXX: optimize this: + for(int i=0;i0 && a.s(1)>0) + { + d = new T[s(0)*s(1)]; + //memcpy(d,a.d,numel()*sizeof(T)); //XXX this will break down when T is not POD !!! + // XXX: optimize this: + for(int i=0;i +Array2::~Array2() +{ + delete[] d; +} + +template +inline T& Array2::operator[](int i) +{ + assert(i>=0 && i +inline const T& Array2::operator[](int i) const +{ + assert(i>=0 && i +inline T& Array2::operator()(int i,int j) +{ + assert(d!=0); + assert(i>=0 && i=0 && j +inline const T& Array2::operator()(int i,int j) const +{ + assert(d!=0); + assert(i>=0 && i=0 && j +inline T& Array2::operator()(const Vec<2,int>& ij) +{ + assert(d!=0); + assert(ij(0)>=0 && ij(0)=0 && ij(1) +inline const T& Array2::operator()(const Vec<2,int>& ij) const +{ + assert(d!=0); + assert(ij(0)>=0 && ij(0)=0 && ij(1) +Vec2i Array2::size() const +{ + return s; +} + +template +int Array2::size(int dim) const +{ + assert(dim==0 || dim==1); + return size()(dim); +} + +template +int Array2::width() const +{ + return size(0); +} + +template +int Array2::height() const +{ + return size(1); +} + +template +int Array2::numel() const +{ + return size(0)*size(1); +} + +template +T* Array2::data() +{ + return d; +} + +template +const T* Array2::data() const +{ + return d; +} + +template +bool Array2::empty() const +{ + return (numel()==0); +} + +template +void Array2::clear() +{ + delete[] d; + s = Vec2i(0,0); + d = 0; +} + +template +void Array2::swap(Array2& b) +{ + Vec2i tmp_s = s; + s = b.s; + b.s = tmp_s; + + T* tmp_d = d; + d = b.d; + b.d = tmp_d; +} + +template +Vec2i size(const Array2& a) +{ + return a.size(); +} + +template +int size(const Array2& a,int dim) +{ + return a.size(dim); +} + +template +int numel(const Array2& a) +{ + return a.numel(); +} + +template +void clear(Array2* a) +{ + a->clear(); +} + +template +void swap(Array2& a,Array2& b) +{ + a.swap(b); +} + +template +T min(const Array2& a) +{ + assert(numel(a)>0); + + const int n = numel(a); + + const T* d = a.data(); + + T minval = d[0]; + + for(int i=1;i +T max(const Array2& a) +{ + assert(numel(a)>0); + + const int n = numel(a); + + const T* d = a.data(); + + T maxval = d[0]; + + for(int i=1;i +Vec<2,T> minmax(const Array2& a) +{ + assert(numel(a)>0); + + const int n = numel(a); + + const T* d = a.data(); + + T minval = d[0]; + T maxval = d[0]; + + for(int i=1;i(minval,maxval); +} + +template +Vec2i argmin(const Array2& a) +{ + assert(numel(a)>0); + + T minValue = a(0,0); + Vec2i minIndex = Vec2i(0,0); + + for(int j=0;j +Vec2i argmax(const Array2& a) +{ + assert(numel(a)>0); + + T maxValue = a(0,0); + Vec2i maxIndex = Vec2i(0,0); + + for(int j=0;j +T sum(const Array2& a) +{ + assert(numel(a)>0); + + const int n = numel(a); + + const T* d = a.data(); + + T sumval = d[0]; + + for(int i=1;i +void fill(Array2* a,const T& value) +{ + assert(a!=0); + assert(a->numel()>0); + + const int n = a->numel(); + T* d = a->data(); + + for(int i=0;i +Array2 apply(const Array2& a,F fun) +{ + assert(numel(a) > 0); + + Array2 fun_a(size(a)); + + const int n = numel(a); + + for(int i=0;i +Array3::Array3() : s(0,0,0),d(0) {} + +template +Array3::Array3(int width,int height,int depth) +{ + assert(width>0 && height>0 && depth>0); + s = Vec3i(width,height,depth); + d = new T[s(0)*s(1)*s(2)]; +} + +template +Array3::Array3(const Vec3i& size) +{ + // XXX: predelat na neco jako assert(all(s>0)); + assert(size(0)>0 && size(1)>0 && size(2)>0); + s = size; + d = new T[s(0)*s(1)*s(2)]; +} + +template +Array3::Array3(const Array3& a) +{ + // printf("COPY CONSTRUCTOR\n"); + s = a.s; + + if (s(0)>0 && s(1)>0 && s(2)>0) + { + d = new T[s(0)*s(1)*s(2)]; + + // XXX: optimize this: + for(int i=0;i +Array3& Array3::operator=(const Array3& a) +{ + // printf("ASSIGNMENT\n"); + // printf("slow copy\n"); + if (this!=&a) + { + if (s(0)==a.s(0) && s(1)==a.s(1) && s(2)==a.s(2)) + { + // XXX: optimize this: + for(int i=0;i0 && a.s(1)>0 && a.s(2)>0) + { + d = new T[s(0)*s(1)*s(2)]; + // XXX: optimize this: + for(int i=0;i +Array3::~Array3() +{ + delete[] d; +} + +template +inline T& Array3::operator[](int i) +{ + assert(i>=0 && i +inline const T& Array3::operator[](int i) const +{ + assert(i>=0 && i +inline T& Array3::operator()(int i,int j,int k) +{ + assert(d!=0); + assert(i>=0 && i=0 && j=0 && k +inline const T& Array3::operator()(int i,int j,int k) const +{ + assert(d!=0); + assert(i>=0 && i=0 && j=0 && k +inline T& Array3::operator()(const Vec<3,int>& ijk) +{ + assert(d!=0); + assert(ijk(0)>=0 && ijk(0)=0 && ijk(1)=0 && ijk(2) +inline const T& Array3::operator()(const Vec<3,int>& ijk) const +{ + assert(d!=0); + assert(ijk(0)>=0 && ijk(0)=0 && ijk(1)=0 && ijk(2) +Vec3i Array3::size() const +{ + return s; +} + +template +int Array3::size(int dim) const +{ + assert(dim==0 || dim==1 || dim==2); + return size()(dim); +} + +template +int Array3::width() const +{ + return size(0); +} + +template +int Array3::height() const +{ + return size(1); +} + +template +int Array3::depth() const +{ + return size(2); +} + +template +int Array3::numel() const +{ + return size(0)*size(1)*size(2); +} + +template +T* Array3::data() +{ + return d; +} + +template +const T* Array3::data() const +{ + return d; +} + +template +void Array3::clear() +{ + delete[] d; + s = Vec3i(0,0,0); + d = 0; +} + +template +void Array3::swap(Array3& b) +{ + Vec3i tmp_s = s; + s = b.s; + b.s = tmp_s; + + T* tmp_d = d; + d = b.d; + b.d = tmp_d; +} + +template +bool Array3::empty() const +{ + return (numel()==0); +} + +template +Vec3i size(const Array3& a) +{ + return a.size(); +} + +template +int size(const Array3& a,int dim) +{ + return a.size(dim); +} + +template +int numel(const Array3& a) +{ + return a.numel(); +} + +template +void clear(Array3* a) +{ + a->clear(); +} + +template +void swap(Array3& a,Array3& b) +{ + a.swap(b); +} + +template +void fill(Array3* a,const T& value) +{ + assert(a!=0); + assert(a->numel()>0); + + const int n = a->numel(); + T* d = a->data(); + + for(int i=0;i +Array3 a3read(const std::string& fileName) +{ + Array3 A; + if(!a3read(&A,fileName)) { return Array3(); } + return A; +} + +template +bool a3read(Array3* out_A,const std::string& fileName) +{ + FILE* f = fopen(fileName.c_str(),"rb"); + + if(!f) { return false; } + + int w,h,d,s; + + if(fread(&w,sizeof(w),1,f)!=1 || + fread(&h,sizeof(h),1,f)!=1 || + fread(&d,sizeof(d),1,f)!=1 || + fread(&s,sizeof(s),1,f)!=1 || + ((w*h*d)<1) || s!=sizeof(T)) + { + fclose(f); + return false; + } + + Array3 A(w,h,d); + + if(fread(A.data(),sizeof(T)*w*h*d,1,f)!=1) + { + fclose(f); + return false; + } + + if(out_A!=0) { *out_A = A; } + + fclose(f); + return true; +} + +template +bool a3write(const Array3& A,const std::string& fileName) +{ + if(A.numel()==0) { return false; } + + FILE* f = fopen(fileName.c_str(),"wb"); + + if(!f) { return false; } + + const int w = A.width(); + const int h = A.height(); + const int d = A.depth(); + const int s = sizeof(T); + + if(fwrite(&w,sizeof(w),1,f)!=1 || + fwrite(&h,sizeof(h),1,f)!=1 || + fwrite(&d,sizeof(d),1,f)!=1 || + fwrite(&s,sizeof(s),1,f)!=1 || + fwrite(A.data(),sizeof(T)*w*h*d,1,f)!=1) + { + fclose(f); + return false; + } + + fclose(f); + return true; +} +#endif diff --git a/src/patchmatch_gpu.h b/src/patchmatch_gpu.h deleted file mode 100644 index a1adaf1..0000000 --- a/src/patchmatch_gpu.h +++ /dev/null @@ -1,410 +0,0 @@ -// This software is in the public domain. Where that dedication is not -// recognized, you are granted a perpetual, irrevocable license to copy -// and modify this file as you see fit. - -#ifndef PATCHMATCH_GPU_H_ -#define PATCHMATCH_GPU_H_ - -#include -#include - -#include "texarray2.h" -#include "memarray2.h" - -struct pcgState -{ - uint64_t state; - uint64_t increment; -}; - -__device__ void pcgAdvance(pcgState* rng) -{ - rng->state = rng->state * 6364136223846793005ULL + rng->increment; -} - -__device__ uint32_t pcgOutput(uint64_t state) -{ - return (uint32_t)(((state >> 22u) ^ state) >> ((state >> 61u) + 22u)); -} - -__device__ uint32_t pcgRand(pcgState* rng) -{ - uint64_t oldstate = rng->state; - pcgAdvance(rng); - return pcgOutput(oldstate); -} - -__device__ void pcgInit(pcgState* rng,uint64_t seed,uint64_t stream) -{ - rng->state = 0U; - rng->increment = (stream << 1u) | 1u; - pcgAdvance(rng); - rng->state += seed; - pcgAdvance(rng); -} - -typedef Vec<1,float> V1f; -typedef Array2> A2V1f; - -__global__ void krnlInitRngStates(const int width, - const int height, - pcgState* rngStates) -{ - const int x = blockDim.x*blockIdx.x + threadIdx.x; - const int y = blockDim.y*blockIdx.y + threadIdx.y; - - if (x>>(width,height,gpuRngStates); - - return gpuRngStates; -} - -template -struct PatchSSD -{ - const TexArray2 A; - const TexArray2 B; - const Vec weights; - - PatchSSD(const TexArray2& A, - const TexArray2& B, - const Vec& weights) - - : A(A),B(B),weights(weights) {} - - __device__ float operator()(int patchWidth, - const int ax, - const int ay, - const int bx, - const int by, - const float ebest) - { - const int hpw = patchWidth/2; - float ssd = 0; - - for(int py=-hpw;py<=+hpw;py++) - { - for(int px=-hpw;px<=+hpw;px++) - { - const Vec pixelA = A(ax + px, ay + py); - const Vec pixelB = B(bx + px, by + py); - for(int i=0;iebest) { return ssd; } - } - - return ssd; - } -}; - -template -__global__ void krnlEvalErrorPass(const int patchWidth, - FUNC patchError, - const TexArray2<2,int> NNF, - TexArray2<1,float> E) -{ - const int x = blockDim.x*blockIdx.x + threadIdx.x; - const int y = blockDim.y*blockIdx.y + threadIdx.y; - - if (x& Omega,const int patchWidth,const int bx,const int by,const int incdec) -{ - const int r = patchWidth/2; - - for(int oy=-r;oy<=+r;oy++) - for(int ox=-r;ox<=+r;ox++) - { - const int x = bx+ox; - const int y = by+oy; - atomicAdd(&Omega.data[x+y*Omega.width],incdec); - //Omega.data[x+y*Omega.width] += incdec; - } -} - -int __device__ patchOmega(const int patchWidth,const int bx,const int by,const MemArray2& Omega) -{ - const int r = patchWidth/2; - - int sum = 0; - - for(int oy=-r;oy<=+r;oy++) - for(int ox=-r;ox<=+r;ox++) - { - const int x = bx+ox; - const int y = by+oy; - sum += Omega.data[x+y*Omega.width]; /// XXX: atomic read instead ?? - } - - return sum; -} - -template -__device__ void tryPatch(const V2i& sizeA, - const V2i& sizeB, - MemArray2& Omega, - const int patchWidth, - FUNC patchError, - const float lambda, - const int ax, - const int ay, - const int bx, - const int by, - V2i& nbest, - float& ebest) -{ - const float omegaBest = (float(sizeA(0)*sizeA(1)) / - float(sizeB(0)*sizeB(1))) * float(patchWidth*patchWidth); - - const float curOcc = (float(patchOmega(patchWidth,nbest(0),nbest(1),Omega))/float(patchWidth*patchWidth))/omegaBest; - const float newOcc = (float(patchOmega(patchWidth, bx, by,Omega))/float(patchWidth*patchWidth))/omegaBest; - - const float curErr = ebest; - const float newErr = patchError(patchWidth,ax,ay,bx,by,curErr+lambda*curOcc); - - if ((newErr+lambda*newOcc) < (curErr+lambda*curOcc)) - { - updateOmega(Omega,patchWidth, bx, by,+1); - updateOmega(Omega,patchWidth,nbest(0),nbest(1),-1); - nbest = V2i(bx,by); - ebest = newErr; - } -} - -template -__device__ void tryNeighborsOffset(const int x, - const int y, - const int ox, - const int oy, - V2i& nbest, - float& ebest, - const V2i& sizeA, - const V2i& sizeB, - MemArray2& Omega, - const int patchWidth, - FUNC patchError, - const float lambda, - const TexArray2<2,int>& NNF) -{ - const int hpw = patchWidth/2; - - const V2i on = NNF(x+ox,y+oy); - const int nx = on(0)-ox; - const int ny = on(1)-oy; - - if (nx>=hpw && nx=hpw && ny -__global__ void krnlPropagationPass(const V2i sizeA, - const V2i sizeB, - MemArray2 Omega, - const int patchWidth, - FUNC patchError, - const float lambda, - const int r, - const TexArray2<2,int> NNF, - TexArray2<2,int> NNF2, - TexArray2<1,float> E, - TexArray2<1,unsigned char> mask) -{ - const int x = blockDim.x*blockIdx.x + threadIdx.x; - const int y = blockDim.y*blockIdx.y + threadIdx.y; - - if (x -__device__ void tryRandomOffsetInRadius(const int r, - const V2i& sizeA, - const V2i& sizeB, - MemArray2& Omega, - const int patchWidth, - FUNC patchError, - const float lambda, - const int x, - const int y, - const V2i& norg, - V2i& nbest, - float& ebest, - pcgState* rngState) -{ - const int hpw = patchWidth/2; - - const int xmin = max(norg(0)-r,hpw); - const int xmax = min(norg(0)+r,sizeB(0)-1-hpw); - const int ymin = max(norg(1)-r,hpw); - const int ymax = min(norg(1)+r,sizeB(1)-1-hpw); - - const int nx = xmin+(pcgRand(rngState)%(xmax-xmin+1)); - const int ny = ymin+(pcgRand(rngState)%(ymax-ymin+1)); - - tryPatch(sizeA,sizeB,Omega,patchWidth,patchError,lambda,x,y,nx,ny,nbest,ebest); -} - -/* -template -__global__ void krnlRandomSearchPass(const V2i sizeA, - const V2i sizeB, - MemArray2 Omega, - const int patchWidth, - FUNC patchError, - const float lambda, - TexArray2<2,int> NNF, - TexArray2<1,float> E, - TexArray2<1,unsigned char> mask, - pcgState* rngStates) -{ - const int x = blockDim.x*blockIdx.x + threadIdx.x; - const int y = blockDim.y*blockIdx.y + threadIdx.y; - - if (x -__global__ void krnlRandomSearchPass(const V2i sizeA, - const V2i sizeB, - MemArray2 Omega, - const int patchWidth, - FUNC patchError, - const float lambda, - const int radius, - TexArray2<2,int> NNF, - TexArray2<1,float> E, - TexArray2<1,unsigned char> mask, - pcgState* rngStates) -{ - const int x = blockDim.x*blockIdx.x + threadIdx.x; - const int y = blockDim.y*blockIdx.y + threadIdx.y; - - if (x -void patchmatchGPU(const V2i sizeA, - const V2i sizeB, - MemArray2& Omega, - const int patchWidth, - FUNC patchError, - const float lambda, - const int numIters, - const int numThreadsPerBlock, - TexArray2<2,int>& NNF, - TexArray2<2,int>& NNF2, - TexArray2<1,float>& E, - TexArray2<1,unsigned char>& mask, - pcgState* rngStates) -{ - const dim3 threadsPerBlock = dim3(numThreadsPerBlock,numThreadsPerBlock); - const dim3 numBlocks = dim3((NNF.width+threadsPerBlock.x)/threadsPerBlock.x, - (NNF.height+threadsPerBlock.y)/threadsPerBlock.y); - - krnlEvalErrorPass<<>>(patchWidth,patchError,NNF,E); - - checkCudaError(cudaDeviceSynchronize()); - - for(int i=0;i>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,4,NNF,NNF2,E,mask); std::swap(NNF,NNF2); - - checkCudaError(cudaDeviceSynchronize()); - - krnlPropagationPass<<>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,2,NNF,NNF2,E,mask); std::swap(NNF,NNF2); - - checkCudaError(cudaDeviceSynchronize()); - - krnlPropagationPass<<>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,1,NNF,NNF2,E,mask); std::swap(NNF,NNF2); - - checkCudaError(cudaDeviceSynchronize()); - - for(int r=1;r>>(sizeA,sizeB,Omega,patchWidth,patchError,lambda,r,NNF,E,mask,rngStates); - } - - checkCudaError(cudaDeviceSynchronize()); - } - - krnlEvalErrorPass<<>>(patchWidth,patchError,NNF,E); - - checkCudaError(cudaDeviceSynchronize()); -} - -#endif