Skip to content

Commit

Permalink
hwa: add copyMode property. add more 0copy formats for vda
Browse files Browse the repository at this point in the history
  • Loading branch information
wang-bin committed Apr 10, 2015
1 parent 86c86cf commit a014ace
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 60 deletions.
25 changes: 14 additions & 11 deletions src/codec/video/VideoDecoderDXVA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 223,11 @@ static const dxva2_mode_t dxva2_modes[] = {
{ "Windows Media Video 9 post processing", &DXVA2_ModeWMV9_A, 0 },

/* VC-1 */
{ "VC-1 variable-length decoder", &DXVA2_ModeVC1_D, QTAV_CODEC_ID(VC1) },
{ "VC-1 variable-length decoder", &DXVA2_ModeVC1_D, QTAV_CODEC_ID(WMV3) },
//https://github.com/afedchin/xbmc/commit/dd4dd69528e10696f8b1b23367d6630adc01e618
{ "VC-1 variable-length decoder", &DXVA2_ModeVC1_D2010, QTAV_CODEC_ID(VC1) },
{ "VC-1 variable-length decoder", &DXVA2_ModeVC1_D2010, QTAV_CODEC_ID(WMV3) },
{ "VC-1 variable-length decoder", &DXVA2_ModeVC1_D, QTAV_CODEC_ID(VC1) },
{ "VC-1 variable-length decoder", &DXVA2_ModeVC1_D, QTAV_CODEC_ID(WMV3) },
{ "VC-1 variable-length decoder 2 (Intel)", &DXVA_Intel_VC1_ClearVideo_2, 0 },
{ "VC-1 variable-length decoder (Intel)", &DXVA_Intel_VC1_ClearVideo, 0 },

Expand Down Expand Up @@ -575,8 576,8 @@ bool VideoDecoderDXVAPrivate::D3dCreateDeviceEx()
description = QString().sprintf("DXVA2 (%.*s, vendor %lu(%s), device %lu, revision %lu)",
sizeof(d3dai.Description), d3dai.Description,
d3dai.VendorId, qPrintable(vendor), d3dai.DeviceId, d3dai.Revision);
if (copy_uswc)
copy_uswc = vendor.toLower() == "intel";
//if (copy_uswc)
// copy_uswc = vendor.toLower() == "intel";
qDebug("DXVA2 description: %s", description.toUtf8().constData());

D3DPRESENT_PARAMETERS d3dpp;
Expand Down Expand Up @@ -637,8 638,8 @@ bool VideoDecoderDXVAPrivate::D3dCreateDeviceFallback()
description = QString().sprintf("DXVA2 (%.*s, vendor %lu(%s), device %lu, revision %lu)",
sizeof(d3dai.Description), d3dai.Description,
d3dai.VendorId, qPrintable(vendor), d3dai.DeviceId, d3dai.Revision);
if (copy_uswc)
copy_uswc = vendor.toLower() == "intel";
//if (copy_uswc)
// copy_uswc = vendor.toLower() == "intel";
qDebug("DXVA2 description: %s", description.toUtf8().constData());

D3DPRESENT_PARAMETERS d3dpp;
Expand Down Expand Up @@ -730,8 731,10 @@ bool VideoDecoderDXVAPrivate::DxFindVideoServiceConversion(GUID *input, D3DFORMA
if (mode) {
qDebug("- '%s' is supported by hardware", mode->name);
} else {
qDebug("- Unknown GUID = X-x-x-XXXX",
(unsigned)g.Data1, g.Data2, g.Data3);
qDebug("- Unknown GUID = X-x-x-x-x-x-x-x-x-x-x",
(unsigned)g.Data1, g.Data2, g.Data3
, g->Data4[0], g->Data4[1]
, g->Data4[2], g->Data4[3], g->Data4[4], g->Data4[5], g->Data4[6], g->Data4[7]);
}
}
/* Try all supported mode by our priority */
Expand Down Expand Up @@ -852,14 855,14 @@ bool VideoDecoderDXVAPrivate::DxCreateVideoDecoder(int codec_id, int w, int h)
dsc.OutputFrameFreq = dsc.InputSampleFreq;
dsc.UABProtectionLevel = FALSE;
dsc.Reserved = 0;

// see xbmc
/* FIXME I am unsure we can let unknown everywhere */
DXVA2_ExtendedFormat *ext = &dsc.SampleFormat;
ext->SampleFormat = 0;//DXVA2_SampleUnknown;
ext->SampleFormat = 0;//DXVA2_SampleProgressiveFrame;//xbmc. DXVA2_SampleUnknown;
ext->VideoChromaSubsampling = 0;//DXVA2_VideoChromaSubsampling_Unknown;
ext->NominalRange = 0;//DXVA2_NominalRange_Unknown;
ext->VideoTransferMatrix = 0;//DXVA2_VideoTransferMatrix_Unknown;
ext->VideoLighting = 0;//DXVA2_VideoLighting_Unknown;
ext->VideoLighting = 0;//DXVA2_VideoLighting_dim;//xbmc. DXVA2_VideoLighting_Unknown;
ext->VideoPrimaries = 0;//DXVA2_VideoPrimaries_Unknown;
ext->VideoTransferFunction = 0;//DXVA2_VideoTransFunc_Unknown;

Expand Down
23 changes: 15 additions & 8 deletions src/codec/video/VideoDecoderFFmpegHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,31 190,38 @@ int VideoDecoderFFmpegHWPrivate::codedHeight(AVCodecContext *avctx) const

bool VideoDecoderFFmpegHWPrivate::initUSWC(int lineSize)
{
if (!copy_uswc)
if (copy_mode != VideoDecoderFFmpegHW::OptimizedCopy)
return false;
return gpu_mem.initCache(lineSize);
}

void VideoDecoderFFmpegHWPrivate::releaseUSWC()
{
if (copy_uswc)
if (copy_mode == VideoDecoderFFmpegHW::OptimizedCopy)
gpu_mem.cleanCache();
}

VideoDecoderFFmpegHW::VideoDecoderFFmpegHW(VideoDecoderFFmpegHWPrivate &d):
VideoDecoderFFmpegBase(d)
{
setProperty("detail_SSE4", tr("Optimized copy decoded data from USWC memory using SSE4.1 if possible"));
setProperty("detail_copyMode", tr("ZeroCopy: fastest. Direct rendering without data copy between CPU and GPU") ". " tr("Not implemented for all codecs")
"\n" tr("LazyCopy: no explicitly additional copy") ". " tr("Not implemented for all codecs")
"\n" tr("OptimizedCopy: copy from USWC memory optimized by SSE4.1")
"\n" tr("GenericCopy: slowest. Generic cpu copy"));
}

void VideoDecoderFFmpegHW::setSSE4(bool y)
void VideoDecoderFFmpegHW::setCopyMode(CopyMode value)
{
d_func().copy_uswc = y;
DPTR_D(VideoDecoderFFmpegHW);
if (d.copy_mode == value)
return;
d_func().copy_mode = value;
emit copyModeChanged();
}

bool VideoDecoderFFmpegHW::isSSE4() const
VideoDecoderFFmpegHW::CopyMode VideoDecoderFFmpegHW::copyMode() const
{
return d_func().copy_uswc;
return d_func().copy_mode;
}

VideoFrame VideoDecoderFFmpegHW::copyToFrame(const VideoFormat& fmt, int surface_h, quint8 *src[], int pitch[], bool swapUV)
Expand All @@ -238,7 245,7 @@ VideoFrame VideoDecoderFFmpegHW::copyToFrame(const VideoFormat& fmt, int surface
std::swap(pitch[1], pitch[2]);
}
VideoFrame frame;
if (d.copy_uswc && d.gpu_mem.isReady()) {
if (copyMode() == VideoDecoderFFmpegHW::OptimizedCopy && d.gpu_mem.isReady()) {
int yuv_size = 0;
for (int i = 0; i < nb_planes; i) {
yuv_size = pitch[i]*h[i];
Expand Down
15 changes: 12 additions & 3 deletions src/codec/video/VideoDecoderFFmpegHW.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 32,22 @@ class VideoDecoderFFmpegHW : public VideoDecoderFFmpegBase
Q_OBJECT
Q_DISABLE_COPY(VideoDecoderFFmpegHW)
DPTR_DECLARE_PRIVATE(VideoDecoderFFmpegHW)
Q_PROPERTY(bool SSE4 READ isSSE4 WRITE setSSE4)
Q_PROPERTY(CopyMode copyMode READ copyMode WRITE setCopyMode NOTIFY copyModeChanged)
Q_ENUMS(CopyMode)
public:
enum CopyMode {
ZeroCopy,
LazyCopy,
OptimizedCopy,
GenericCopy
};
virtual bool prepare() Q_DECL_OVERRIDE;
VideoFrame copyToFrame(const VideoFormat& fmt, int surface_h, quint8* src[], int pitch[], bool swapUV);
// properties
void setSSE4(bool value);
bool isSSE4() const;
void setCopyMode(CopyMode value);
CopyMode copyMode() const;
Q_SIGNALS:
void copyModeChanged();
protected:
VideoDecoderFFmpegHW(VideoDecoderFFmpegHWPrivate &d);
private:
Expand Down
6 changes: 3 additions & 3 deletions src/codec/video/VideoDecoderFFmpegHW_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 22,7 @@
#ifndef QTAV_VIDEODECODERFFMPEGHW_P_H
#define QTAV_VIDEODECODERFFMPEGHW_P_H

#include "VideoDecoderFFmpegBase.h"
#include "VideoDecoderFFmpegHW.h"
#include "utils/GPUMemCopy.h"

/*!
Expand All @@ -38,7 38,7 @@ class VideoDecoderFFmpegHWPrivate : public VideoDecoderFFmpegBasePrivate
public:
VideoDecoderFFmpegHWPrivate()
: VideoDecoderFFmpegBasePrivate()
, copy_uswc(true)
, copy_mode(VideoDecoderFFmpegHW::OptimizedCopy)
{
get_format = 0;
get_buffer = 0;
Expand Down Expand Up @@ -83,7 83,7 @@ class VideoDecoderFFmpegHWPrivate : public VideoDecoderFFmpegBasePrivate
QString description;
// false for not intel gpu. my test result is intel gpu is supper fast and lower cpu usage if use optimized uswc copy. but nv is worse.
// TODO: flag enable, disable, auto
bool copy_uswc;
VideoDecoderFFmpegHW::CopyMode copy_mode;
GPUMemCopy gpu_mem;
};

Expand Down
4 changes: 2 additions & 2 deletions src/codec/video/VideoDecoderVAAPI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,8 480,8 @@ bool VideoDecoderVAAPIPrivate::open()
return false;
}
vendor = vaQueryVendorString(disp);
if (!vendor.toLower().contains("intel"))
copy_uswc = false;
//if (!vendor.toLower().contains("intel"))
// copy_uswc = false;

//disable_derive = !copy_uswc;
description = QString("VA API version %1.%2; Vendor: %3;").arg(version_major).arg(version_minor).arg(vendor);
Expand Down
60 changes: 27 additions & 33 deletions src/codec/video/VideoDecoderVDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 57,6 @@ class VideoDecoderVDA : public VideoDecoderFFmpegHW
Q_OBJECT
DPTR_DECLARE_PRIVATE(VideoDecoderVDA)
Q_PROPERTY(PixelFormat format READ format WRITE setFormat NOTIFY formatChanged)
Q_PROPERTY(bool zeroCopy READ isZeroCopy WRITE setZeroCopy NOTIFY zeroCopyChanged)
Q_ENUMS(PixelFormat)
public:
enum PixelFormat {
Expand All @@ -74,11 73,8 @@ class VideoDecoderVDA : public VideoDecoderFFmpegHW
// QObject properties
void setFormat(PixelFormat fmt);
PixelFormat format() const;
void setZeroCopy(bool value);
bool isZeroCopy() const;
Q_SIGNALS:
void formatChanged();
void zeroCopyChanged();
};

extern VideoDecoderId VideoDecoderId_VDA;
Expand All @@ -95,10 91,9 @@ class VideoDecoderVDAPrivate : public VideoDecoderFFmpegHWPrivate
public:
VideoDecoderVDAPrivate()
: VideoDecoderFFmpegHWPrivate()
, zero_copy(true)
, out_fmt(VideoDecoderVDA::UYVY)
{
copy_uswc = false;
copy_mode = VideoDecoderFFmpegHW::ZeroCopy;
description = "VDA";
memset(&hw_ctx, 0, sizeof(hw_ctx));
}
Expand All @@ -111,8 106,6 @@ class VideoDecoderVDAPrivate : public VideoDecoderFFmpegHWPrivate
virtual void releaseBuffer(void *opaque, uint8_t *data);
virtual AVPixelFormat vaPixelFormat() const { return QTAV_PIX_FMT_C(VDA_VLD);}


bool zero_copy;
VideoDecoderVDA::PixelFormat out_fmt;
struct vda_context hw_ctx;
};
Expand Down Expand Up @@ -239,7 232,7 @@ VideoFrame VideoDecoderVDA::frame()
if (type != GLTextureSurface)
return 0;
// https://www.opengl.org/registry/specs/APPLE/rgb_422.txt
// drop GL_YCBCR_422_APPLE use RGB: https://github.com/elupus/xbmc/commit/cb8028841c71d833865ba25541733b0032b798a8
// TODO: check extension GL_APPLE_rgb_422 and rectangle?
IOSurfaceRef surface = CVPixelBufferGetIOSurface(cvbuf);
int w = IOSurfaceGetWidth(surface);
int h = IOSurfaceGetHeight(surface);
Expand All @@ -257,8 250,15 @@ VideoFrame VideoDecoderVDA::frame()
h /= 2;
iformat = format = GL_LUMINANCE_ALPHA;
}
} else if (pixfmt == UYVY) {
} else if (pixfmt == UYVY || pixfmt == YUYV) {
w /= 2; //rgba texture
} else if (pixfmt == YUV420P) {
dtype = GL_UNSIGNED_BYTE;
iformat = format = GL_LUMINANCE;
if (plane > 0) {
w /= 2;
h /= 2;
}
}
//https://github.com/xbmc/xbmc/pull/5703
//OpenGLHelper::glActiveTexture(GL_TEXTURE0 plane); //0 must active?
Expand All @@ -284,9 284,9 @@ VideoFrame VideoDecoderVDA::frame()

uint8_t *src[3];
int pitch[3];
bool gl = false;
if (isZeroCopy()) {// TODO: NV12 zero copy test
gl = true;
const bool zero_copy = copyMode() == VideoDecoderFFmpegHW::ZeroCopy;
if (zero_copy) {
// make sure VideoMaterial can correctly setup parameters
switch (format()) {
case UYVY:
pitch[0] = 2*width(); //
Expand All @@ -296,33 296,41 @@ VideoFrame VideoDecoderVDA::frame()
pitch[0] = width();
pitch[1] = width();
break;
case YUV420P:
pitch[0] = width();
pitch[1] = pitch[2] = width()/2;
break;
case YUYV:
pitch[0] = 2*width(); //
//pixfmt = VideoFormat::Format_YVYU; //
break;
default:
gl = false;
break;
}
}
const VideoFormat fmt(pixfmt);
if (!gl) {
if (!zero_copy) {
CVPixelBufferLockBaseAddress(cv_buffer, 0);
for (int i = 0; i <fmt.planeCount(); i) {
// get address results in internal copy
src[i] = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(cv_buffer, i);
pitch[i] = CVPixelBufferGetBytesPerRowOfPlane(cv_buffer, i);
}
CVPixelBufferUnlockBaseAddress(cv_buffer, 0);
//CVPixelBufferRelease(cv_buffer); // release when video frame is destroyed
}
VideoFrame f;
if (isZeroCopy()) {
if (zero_copy || copyMode() == VideoDecoderFFmpegHW::LazyCopy) {
f = VideoFrame(width(), height(), fmt);
f.setBits(src);
f.setBytesPerLine(pitch);
f.setTimestamp(double(d.frame->pkt_pts)/1000.0);
if (zero_copy)
f.setMetaData("target", "rect");
} else {
f = copyToFrame(fmt, d.height, src, pitch, false);
}
f.setMetaData("surface_interop", QVariant::fromValue(VideoSurfaceInteropPtr(new SurfaceInteropCVBuffer(cv_buffer, gl))));
if (gl)
f.setMetaData("target", "rect");
f.setMetaData("surface_interop", QVariant::fromValue(VideoSurfaceInteropPtr(new SurfaceInteropCVBuffer(cv_buffer, zero_copy))));
return f;
}

Expand All @@ -347,20 355,6 @@ VideoDecoderVDA::PixelFormat VideoDecoderVDA::format() const
return d_func().out_fmt;
}

void VideoDecoderVDA::setZeroCopy(bool value)
{
DPTR_D(VideoDecoderVDA);
if (d.zero_copy == value)
return;
d.zero_copy = value;
Q_EMIT zeroCopyChanged();
}

bool VideoDecoderVDA::isZeroCopy() const
{
return d_func().zero_copy;
}

bool VideoDecoderVDAPrivate::setup(AVCodecContext *avctx)
{
const int w = codedWidth(avctx);
Expand Down

0 comments on commit a014ace

Please sign in to comment.