fft3dgpu這裡沒源碼也不知道怎麼回事了……sigma and beta has the same meaning as in fft3dfilter. Default=2.
fft3dgpu這裡沒源碼也不知道怎麼回事了……sigma and beta has the same meaning as in fft3dfilter. Default=2.
代码: 全选
FFT3dGPU::FFT3dGPU(PClip cl1,float _sigma,float _beta,int _bw,int _bh,int _bt,float sharpen,int _plane,int _mode,int border,int precision,
bool NVPerf,float _degrid,float scutoff,float svr,float smin,float smax,float kratio,int ow, int oh,int wintype,bool interlaced,
float _sigma2,float _sigma3,float _sigma4,FFTCODE fftcode,
FFT3dGPUallPlane* getdst,IScriptEnvironment* env):
GenericVideoFilter(cl1),plane(_plane<=0?1:2),sigma(_sigma/255),beta(_beta),bw(_bw),bh(_bh),height(vi.IsYUY2()?vi.height:vi.height/plane),width(vi.width/plane),img(0),
FImg(0),FImg1(0),bt(_bt>0?_bt:1),bm(_bt),useHalf(precision==FLOAT16),sharpen(sharpen!=0),mode(_mode),cacheY(0),cacheU(0),cacheV(0),imgp(0),mutex(0),
pDevice(d3ddevice.Device()),gtype(D3Dwindow.GetGPUType()),d3ddevice(D3Dwindow),NVPERF(NVPerf),fft(0),
gridsample(0),mintex(0),selectDC(0),calcgridcorrection(0),degrid(_degrid),usePattern(_sigma!=_sigma2||_sigma3!=_sigma4||_sigma!=_sigma3),
FImgDegrid(0),FImgDegridp(),current(bm==1?0:bm==4?2:1),
Pattern(0),sharp(0),wiennerfilter(0),kalmanY(0),kalmanU(0),kalmanV(0),
FImg2(0),UploadImgY(0),UploadImgY1(0),UploadImgY2(0),UploadImgU(0),UploadImgU1(0),UploadImgU2(0),
UploadImgV(0),UploadImgV1(0),UploadImgV2(0),
DownloadImgY(0),DownloadImgU(0),DownloadImgV(0),
lastn(-1),nuploaded(-100),GetDst(getdst),convert2(0),convert(0),
DI(NVPerf?new Dxinput(HM,D3Dwindow.GetWindow()):0)
{
smin/=255;
smax/=255;
LOG("FFT3dGPU constructor address: "<<std::hex<<(unsigned int)this);
imgp=0;
LOG("Thread ID: "<<std::dec<<GetCurrentThreadId()<<std::hex);
HRESULT hr=1;
if(!pDevice)
env->ThrowError("Error Creating Direct3D Device");
if(NVPerf)
{
D3Dwindow.Show();
//pDevice->GetBackBuffer(0,0, D3DBACKBUFFER_TYPE_MONO,&backbuffer );
}
if(!vi.IsPlanar()&&!vi.IsYUY2())
env->ThrowError("Only YV12 or YUY2 colorspace is supported. Use converttoyv12() or converttoyuy2() before fft3dgpu.");
/*
if(!(mutex=CreateMutex(NULL,FALSE,"fft3dgpu_mutex")))
env->ThrowError("Couldn't create mutex");
WaitForSingleObject(mutex,INFINITE);*/
D3DCAPS9 Cap;
LOG("GetCaps...")
pDevice->GetDeviceCaps(&Cap);
LOG("done");
LOG("Check pixelshader version")
if(((unsigned char*)&Cap.PixelShaderVersion)[1]<2)
env->ThrowError("Only pixelshader 2.0 or greater supported");
LOG("Setup device done");
//Setup bw and bh to be a power of two
unsigned int logn=0;
for(unsigned int i=(bw-1);i>0;i/=2,logn++);
bw=1;
for(unsigned int i=1;i<=logn;i++)
bw*=2;
if(bw<4)
bw=4;
if(bw>512)
bw=512;
logn=0;
for(unsigned int i=(bh-1);i>0;i/=2,logn++);
bh=1;
for(unsigned int i=1;i<=logn;i++)
bh*=2;
if(bh<4)
bh=4;
if(bh>512)
bh=512;
if(bm<-1||bm>4)
env->ThrowError("Valud modes for bt are -1,0,1,2,3,4");
if(mode<0||mode>2)
mode=0;
ow*=(float)bw/_bw;
ow&=0xFFFFFFFE;//makes sure that ow is even;
oh*=(float)bh/_bh;
if(oh*2>bh)
oh=bh/2;
if(ow*2>bw)
ow=bw/2;
if(precision<FLOAT16||precision>FLOAT32_ALL)
precision=FLOAT32_FFT;
if(wintype<0||wintype>2)
wintype=0;
LOG("Setup bw, bh done");
//Calculate number of times the block(bw,bh) are repeated in x and y dimension
if(mode!=2){
if(mode==0||(bw==ow*2&&bh==oh*2))//half overlap
{
if(mode==1&&interlaced)
ny=2*(height/2+bh*1.5-1)/bh;
else
ny=(height+bh*1.5-1)/bh;
nx=(width+bw*1.5-1)/bw;
}
else
{
nx=((width+ow-1)/(bw-ow)+1+1)/2;
if(mode==1&&interlaced)
ny=2*(((height/2+oh-1)/(bh-oh)+1+1)/2);
else
ny=((height+oh-1)/(bh-oh)+1+1)/2;
}
}
else
{
nx=(width+(bw-4*border)*1.5-1)/(bw-4*border);
ny=(height+(bh-4*border)*1.5-1)/(bh-4*border);
}
//Calculate total framesize
totw=nx*bw;
toth=ny*bh;
LOG("Creating textures");
if(plane==1)
{
UploadImgY=NEW TextureM(pDevice,(width+1)>>1,height,gtype->FIXED2(),hr);
UploadImgY1=NEW TextureM(pDevice,(width+1)>>1,height,gtype->FIXED2(),hr);
UploadImgY2=NEW TextureM(pDevice,(width+1)>>1,height,gtype->FIXED2(),hr);
DownloadImgY=NEW TextureRT(pDevice,(width+3)>>2,height,gtype->FIXED4(),hr);
}
else
{
UploadImgU=NEW TextureM(pDevice,(width+1)/2,height,gtype->FIXED2(),hr);
UploadImgU1=NEW TextureM(pDevice,(width+1)/2,height,gtype->FIXED2(),hr);
UploadImgU2=NEW TextureM(pDevice,(width+1)/2,height,gtype->FIXED2(),hr);
DownloadImgU=NEW TextureRT(pDevice,(width+3)/4,height,gtype->FIXED4(),hr);
UploadImgV=NEW TextureM(pDevice,(width+1)/2,height,gtype->FIXED2(),hr);
UploadImgV1=NEW TextureM(pDevice,(width+1)/2,height,gtype->FIXED2(),hr);
UploadImgV2=NEW TextureM(pDevice,(width+1)/2,height,gtype->FIXED2(),hr);
DownloadImgV=NEW TextureRT(pDevice,(width+3)/4,height,gtype->FIXED4(),hr);
}
LOG("Setup texture Img & Imgp");
if(useHalf)
if(mode!=1)
img=NEW TextureRT(pDevice,totw/2,toth,gtype->HALF4(),hr);
else
imgp=NEW pTextureRTpair(NEW TextureRT(pDevice,totw/2,toth,gtype->HALF4(),hr),NEW TextureRT(pDevice,totw/2,toth,gtype->HALF4(),hr));
else
if(mode!=1)
img=NEW TextureRT(pDevice,totw/2,toth,gtype->FLOAT4(),hr);
else
imgp=NEW pTextureRTpair(NEW TextureRT(pDevice,totw/2,toth,gtype->FLOAT4(),hr),NEW TextureRT(pDevice,totw/2,toth,gtype->FLOAT4(),hr));
if(FAILED(hr))
env->ThrowError("Failed Creating FFT3dGPU::Texture img");
LOG("Creating fft class..");
switch(fftcode)
{
case RADIX2LUT:
fft=NEW FFT2dRR(bw,nx,bh,ny,bt,pFreeFImgdPool,pDevice,gtype,precision,hr);
break;
case STOCKHAM:
fft=NEW FFT2dRR2(bw,nx,bh,ny,bt,pFreeFImgdPool,pDevice,gtype,precision,hr);
break;
case MEASURE:
fft=NEW FFT2dRR(bw,nx,bh,ny,bt,pFreeFImgdPool,pDevice,gtype,precision,hr);
NQuad::CreateVertexBuffer();
double t=0;
TextureRT* In=img?img:imgp->first;
t=MeasureFFT(fft,In);
delete fft;
double t1;
fft=NEW FFT2dRR2(bw,nx,bh,ny,bt,pFreeFImgdPool,pDevice,gtype,precision,hr);
t1=MeasureFFT(fft,In);
if(t1>t)
{
delete fft;
fft=NEW FFT2dRR(bw,nx,bh,ny,bt,pFreeFImgdPool,pDevice,gtype,precision,hr);
}
}
LOG("Creating WiennerFilter class..");
D3DXVECTOR2 sigma2;
D3DXVECTOR2 beta2;
float s=sigma*sigma*bt*bw*bh;
sigma2=D3DXVECTOR2(beta*s,s);
beta2=D3DXVECTOR2((beta-1.0)/beta,beta);
if(usePattern&&bm>-1)
{
_sigma2*=_sigma2*bt*bw*bh/(255*255);
_sigma3*=_sigma3*bt*bw*bh/(255*255);
_sigma4*=_sigma4*bt*bw*bh/(255*255);
SigmaToPatternTexture(s,_sigma2,_sigma3,_sigma4);
}
if(bm>0)
wiennerfilter=NEW psWiennerFilter(pDevice,pFreeFImgdPool->top(),beta2,sigma2,bt,degrid!=0,usePattern);
LOG("done");
if(FAILED(hr))
env->ThrowError("Faillure creating WiennerFilter");
LOG("Creating Sharpen...");
if(sharpen)
sharp=NEW Sharpen(sharpen,svr,scutoff,smin*smin*bw*bh*bt,smax*smax*bw*bh*bt,bw,nx,bh,ny,_degrid!=0,pFreeFImgdPool,hr,pDevice,gtype);
LOG("Done");
if(FAILED(hr))
env->ThrowError("Faillure creating Sharp");
//setup cache
LOG("Setup GPUCACHE...");
if(bm>1){
if(plane==1){
cacheY=NEW GPUCache(bt);
cacheY->StreamPoolPointer(pFreeFImgdPool);
}
else{
cacheU=NEW GPUCache(bt);
cacheU->StreamPoolPointer(pFreeFImgdPool);
cacheV=NEW GPUCache(bt);
cacheV->StreamPoolPointer(pFreeFImgdPool);
}
}
if(bm==0)
{
if(usePattern)
{
if(plane==1)
kalmanY=NEW KalmanFilter(pFreeFImgdPool,kratio,mode==1,pDevice,Pattern);
else
{
kalmanU=NEW KalmanFilter(pFreeFImgdPool,kratio,mode==1,pDevice,Pattern);
kalmanV=NEW KalmanFilter(pFreeFImgdPool,kratio,mode==1,pDevice,Pattern);
}
}
else
{
if(plane==1)
kalmanY=NEW KalmanFilter(pFreeFImgdPool,sigma*sigma*bw*bh,kratio,mode==1,pDevice);
else
{
kalmanU=NEW KalmanFilter(pFreeFImgdPool,sigma*sigma*bw*bh,kratio,mode==1,pDevice);
kalmanV=NEW KalmanFilter(pFreeFImgdPool,sigma*sigma*bw*bh,kratio,mode==1,pDevice);
}
}
}
LOG("done")
if(FAILED(hr))
env->ThrowError("Failed Creating ImgStream");
LOG("Setup ImgStream...");
if(mode==1)
convert2=NEW ImgStream2(bw,bh,ow,oh,plane==1?UploadImgY:UploadImgU,imgp->first,plane==1?DownloadImgY:DownloadImgU,plane!=1,wintype,interlaced,pDevice,gtype,useHalf,hr);
else
convert=NEW ImgStream(bw,nx,bh,ny,mode,width,height,pDevice,gtype,useHalf,hr,border);
LOG("done")
LOG("Push texture vector");
if(bt>=2)
if(mode!=1)
for(unsigned int i=0;i<bt;i++)
{
FImg2d.push_back(0);
pFreeFImgdPool->pop(FImg2d[i]);
FImgd.push_back(0);
pFreeFImgdPool->pop(FImgd[i]);
FImg1d.push_back(0);
pFreeFImgdPool->pop(FImg1d[i]);
}
else
for(unsigned int i=0;i<bt;i++)
{
FImg2dp.push_back(NEW pTextureRTpair());
pFreeFImgdPool->pop(*(FImg2dp[i]));
FImgdp.push_back(NEW pTextureRTpair());
pFreeFImgdPool->pop(*(FImgdp[i]));
FImg1dp.push_back(NEW pTextureRTpair());
pFreeFImgdPool->pop(*(FImg1dp[i]));
}
else
{
if(mode==1)
{
pFreeFImgdPool->pop(FImgp);
pFreeFImgdPool->pop(FImg1p);
if(bm==0)
pFreeFImgdPool->pop(FImg2p);
}
else
{
pFreeFImgdPool->pop(FImg1);
pFreeFImgdPool->pop(FImg);
if(bm==0)
pFreeFImgdPool->pop(FImg2);
}
}
//FFTtoFixed= NEW psFFTtoFixed(pDevice,img->GetRect(),false);
/*
sd=NEW TextureRT(pDevice,2,2,gtype->FLOAT(),hr);
MeanSD=NEW psMeanSD(pDevice,pFreeFImgdPool->top()->GetRect());
*/
psGridCorrection *setupgridcorrection=0;
if(degrid!=0){
mintex=NEW TextureRT(pDevice,nx,ny,gtype->FLOAT4(),hr);
selectDC= NEW psMinimize(pDevice,mintex,pFreeFImgdPool->top());
calcgridcorrection = NEW psGridCorrection(pDevice,pFreeFImgdPool->top());
setupgridcorrection = NEW psGridCorrection(pDevice,pFreeFImgdPool->top(),degrid);
}
LOG("Creating VertexBuffer");
hr=NQuad::CreateVertexBuffer();
if(FAILED(hr))
env->ThrowError("Failed creating vertexbuffer");
CalcSD=false;
//Setup degrid
if(degrid!=0){
TextureRT* s=plane==1?DownloadImgY:DownloadImgU;
s->SetAsRenderTarget();
pDevice->Clear(0,0,D3DCLEAR_TARGET,0xFFFFFFFF,0,0);
pDevice->BeginScene();
for(int i=0;i<6;i++)
{
pDevice->SetSamplerState(i,D3DSAMP_ADDRESSU,D3DTADDRESS_MIRROR);
pDevice->SetSamplerState(i,D3DSAMP_ADDRESSV,D3DTADDRESS_MIRROR);
}
TextureRT* temp=pFreeFImgdPool->top();
pFreeFImgdPool->pop();
if(mode!=1)
{
convert->ImgToStream(s,img);
//TextureRT *degrid_in=NEW TextureRT(pDevice,totw/2,toth,gtype->FLOAT4(),hr);
FImgDegrid=pFreeFImgdPool->top();
pFreeFImgdPool->pop();
fft->CalcFFT(img,FImgDegrid,true);
selectDC->Apply(FImgDegrid,mintex);
setupgridcorrection->Apply(FImgDegrid,temp,mintex);
}
else
{
convert2->ImgToTexture(s,imgp);//!!
FImgDegridp.last=pFreeFImgdPool->top();
pFreeFImgdPool->pop();
fft->CalcFFT(imgp->first,FImgDegridp.first,true);
selectDC->Apply(FImgDegridp.first,mintex);
setupgridcorrection->Apply(FImgDegridp.first,temp,mintex);
}
gridsample=NEW TextureM(pDevice,temp->GetWidth(),temp->GetHeight(),gtype->FLOAT4(),hr);
float* t=NEW float[temp->GetWidth()*temp->GetHeight()*4];
DownloadFromTexture(temp,t,0);
pFreeFImgdPool->push(temp);
UploadToTexture(gridsample,t,0);
delete[] t;
pDevice->EndScene();
//DownloadFromTexture(mintex,dg,0);
//delete s;
//delete dg;
delete setupgridcorrection;
}
pingd=&FImgp;
pongd=&FImg1p;
lastd=&FImg2p;
ping=FImg;
pong=FImg1;
lastt=FImg2;
backbuffer=0;
if(NVPERF)
pDevice->GetBackBuffer(0,0,D3DBACKBUFFER_TYPE_MONO,&backbuffer);
//ReleaseMutex(mutex);
LOG("End constructor"<<std::endl<<std::dec);
}
代码: 全选
AVSValue __cdecl Create_fft3dGPU(AVSValue args, void* user_data, IScriptEnvironment* env){
int plane=args[7].AsInt(0);
FFTCODE fftcode=args[25].Defined()?args[25].AsBool()?RADIX2LUT:STOCKHAM:MEASURE;
float sigma=args[1].AsFloat(2);
bool allplane=(plane==4);
if(plane==2||plane==3)
plane=1;
PClip retval;
FFT3dGPUallPlane* getdst=0;
if(allplane)
{
getdst=NEW FFT3dGPUallPlane(args[0].AsClip(),env);
plane=0;
}
LOG("CREATE_fft3dGPU"<<std::endl)
bool d=args[8].AsInt(1)==1;//mode=1?
float degrid=(float)args[12].AsFloat(d?1.0:0.0);
retval=NEW FFT3dGPU(args[0].AsClip()//Input Clip
,sigma,//sigma
args[2].AsFloat(1),//beta
args[3].AsInt(32),//bw
args[4].AsInt(32),//bh
args[5].AsInt(3),//bt
args[6].AsFloat(0.0),//sharpen
plane,//plane
args[8].AsInt(1),//mode
args[9].AsInt(1),//border
args[10].AsInt(FLOAT16),//precision
args[11].AsBool(false),//NVperf
//args[12].AsBool(true),//reduce cpu
degrid,//degrid
args[13].AsFloat(0.3),//scutoff
args[14].AsFloat(1.0),//svr
args[15].AsFloat(4.0),//smin
args[16].AsFloat(20.0),//smax
args[17].AsFloat(2.0),//kratio
args[18].AsInt(args[3].AsInt(32)/2),//ow
args[19].AsInt(args[4].AsInt(32)/2),//oh
args[20].AsInt(0),//wintype
args[21].AsBool(false),//interlaced
args[22].AsFloat(sigma),//sigma2
args[23].AsFloat(sigma),//sigma3
args[24].AsFloat(sigma),//sigma4
fftcode,//fftcode
getdst,//getdst
env//env
);
if(allplane)
{
//AVSValue r1=env->Invoke("InternalCache",retval.AsClip());
plane=1;
PClip chroma=NEW FFT3dGPU(args[0].AsClip()//Input Clip
,sigma,//sigma
args[2].AsFloat(1),//beta
args[3].AsInt(32),//bw
args[4].AsInt(32),//bh
args[5].AsInt(3),//bt
args[6].AsFloat(0.0),//sharpen
plane,//plane
args[8].AsInt(1),//mode
args[9].AsInt(1),//border
args[10].AsInt(FLOAT16),//precision
args[11].AsBool(false),//NVperf
//args[12].AsBool(true),//reduce cpu
degrid,//degrid
args[13].AsFloat(0.3),//scutoff
args[14].AsFloat(1.0),//svr
args[15].AsFloat(4.0),//smin
args[16].AsFloat(20.0),//smax
args[17].AsFloat(2.0),//kratio
args[18].AsInt(args[3].AsInt(32)/2),//ow
args[19].AsInt(args[4].AsInt(32)/2),//oh
args[20].AsInt(0),//wintype
args[21].AsBool(false),//interlaced
args[22].AsFloat(sigma),//sigma2
args[23].AsFloat(sigma),//sigma3
args[24].AsFloat(sigma),//sigma4
fftcode,//fftcode
getdst,
env//env
);
getdst->SetChromaAndLumaClip(retval,chroma);
retval=getdst;
}
LOG("CREATE_fft3dGPU done"<<std::endl)
return retval;
}