mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2026-06-06 07:45:56 +08:00
[nvdec, android] proper detection and support for GPU decoder
This commit is contained in:
parent
f4f6f5831d
commit
2875a4db89
@ -269,7 +269,7 @@ class SettingsFragmentPresenter(
|
||||
// TODO(crueter): sub-submenus?
|
||||
private fun addGraphicsSettings(sl: ArrayList<SettingsItem>) {
|
||||
sl.apply {
|
||||
// add(IntSetting.RENDERER_NVDEC_EMULATION.key)
|
||||
add(IntSetting.RENDERER_NVDEC_EMULATION.key)
|
||||
|
||||
add(IntSetting.RENDERER_RESOLUTION.key)
|
||||
add(IntSetting.RENDERER_VSYNC.key)
|
||||
|
||||
@ -26,52 +26,39 @@ void Decoder::Decode() {
|
||||
}
|
||||
|
||||
const auto packet_data = ComposeFrame();
|
||||
// Send assembled bitstream to decoder.
|
||||
if (!decode_api.SendPacket(packet_data)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Only receive/store visible frames.
|
||||
if (vp9_hidden_frame) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Receive output frames from decoder.
|
||||
auto frame = decode_api.ReceiveFrame();
|
||||
|
||||
if (!frame) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (IsInterlaced()) {
|
||||
auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = GetInterlacedOffsets();
|
||||
auto frame_copy = frame;
|
||||
|
||||
if (!frame.get()) {
|
||||
LOG_ERROR(HW_GPU,
|
||||
"Nvdec {} failed to decode interlaced frame for top {:#X} bottom 0x{:X}", id,
|
||||
luma_top, luma_bottom);
|
||||
}
|
||||
|
||||
if (UsingDecodeOrder()) {
|
||||
host1x.frame_queue.PushDecodeOrder(id, luma_top, std::move(frame));
|
||||
host1x.frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy));
|
||||
} else {
|
||||
host1x.frame_queue.PushPresentOrder(id, luma_top, std::move(frame));
|
||||
host1x.frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy));
|
||||
}
|
||||
FFmpeg::FrameOffsets offsets{};
|
||||
offsets.hidden = vp9_hidden_frame;
|
||||
offsets.interlaced = IsInterlaced();
|
||||
if (offsets.interlaced) {
|
||||
std::tie(offsets.luma, offsets.luma_bottom, offsets.chroma, offsets.chroma_bottom) =
|
||||
GetInterlacedOffsets();
|
||||
} else {
|
||||
auto [luma_offset, chroma_offset] = GetProgressiveOffsets();
|
||||
std::tie(offsets.luma, offsets.chroma) = GetProgressiveOffsets();
|
||||
}
|
||||
|
||||
if (!frame.get()) {
|
||||
LOG_ERROR(HW_GPU, "Nvdec {} failed to decode progressive frame for luma {:#X}", id,
|
||||
luma_offset);
|
||||
}
|
||||
if (!decode_api.SendPacket(packet_data, offsets, GetFrameDimensions())) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto push = [&](u64 luma, std::shared_ptr<FFmpeg::Frame> frame) {
|
||||
if (UsingDecodeOrder()) {
|
||||
host1x.frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame));
|
||||
host1x.frame_queue.PushDecodeOrder(id, luma, std::move(frame));
|
||||
} else {
|
||||
host1x.frame_queue.PushPresentOrder(id, luma_offset, std::move(frame));
|
||||
host1x.frame_queue.PushPresentOrder(id, luma, std::move(frame));
|
||||
}
|
||||
};
|
||||
|
||||
while (auto result = decode_api.ReceiveFrame()) {
|
||||
auto& [frame, o] = *result;
|
||||
if (o.hidden || !frame) {
|
||||
continue;
|
||||
}
|
||||
if (o.interlaced) {
|
||||
auto frame_copy = frame;
|
||||
push(o.luma, std::move(frame));
|
||||
push(o.luma_bottom, std::move(frame_copy));
|
||||
} else {
|
||||
push(o.luma, std::move(frame));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,6 +45,9 @@ protected:
|
||||
virtual std::tuple<u64, u64> GetProgressiveOffsets() = 0;
|
||||
virtual std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() = 0;
|
||||
virtual bool IsInterlaced() = 0;
|
||||
virtual std::optional<FFmpeg::FrameDimensions> GetFrameDimensions() {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
FFmpeg::DecodeApi decode_api;
|
||||
Host1x::Host1x& host1x;
|
||||
|
||||
@ -50,6 +50,16 @@ bool H264::IsInterlaced() {
|
||||
current_context.h264_parameter_set.luma_bot_offset.Address() != 0;
|
||||
}
|
||||
|
||||
std::optional<FFmpeg::FrameDimensions> H264::GetFrameDimensions() {
|
||||
const auto& params = current_context.h264_parameter_set;
|
||||
const s32 width = static_cast<s32>(params.pic_width_in_mbs) * 16;
|
||||
const s32 height = static_cast<s32>(params.frame_height_in_mbs) * 16;
|
||||
if (width <= 0 || height <= 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return FFmpeg::FrameDimensions{width, height};
|
||||
}
|
||||
|
||||
std::span<const u8> H264::ComposeFrame() {
|
||||
host1x.gmmu_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_context, sizeof(H264DecoderContext));
|
||||
const s64 frame_number = current_context.h264_parameter_set.frame_number.Value();
|
||||
|
||||
@ -79,6 +79,7 @@ public:
|
||||
std::tuple<u64, u64> GetProgressiveOffsets() override;
|
||||
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
|
||||
bool IsInterlaced() override;
|
||||
std::optional<FFmpeg::FrameDimensions> GetFrameDimensions() override;
|
||||
|
||||
std::string_view GetCurrentCodecName() const override {
|
||||
return "H264";
|
||||
|
||||
@ -4,6 +4,10 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging.h"
|
||||
#include "common/scope_exit.h"
|
||||
@ -83,6 +87,57 @@ std::string AVError(int errnum) {
|
||||
return errbuf;
|
||||
}
|
||||
|
||||
#ifdef ANDROID
|
||||
// Match a 3- or 4-byte annex-B start code at `i`. Returns its length, or 0.
|
||||
size_t MatchStartCode(std::span<const u8> data, size_t i) {
|
||||
const size_t n = data.size();
|
||||
if (i + 3 < n && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 0 && data[i + 3] == 1) {
|
||||
return 4;
|
||||
}
|
||||
if (i + 2 < n && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1) {
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Pull SPS (NAL type 7) + PPS (NAL type 8) out of an annex-B frame into an
|
||||
// extradata buffer, each prefixed with a 4-byte start code. Eden synthesizes
|
||||
// these inline into the very first frame; h264_mediacodec wants them at open.
|
||||
std::vector<u8> ExtractH264AnnexBExtradata(std::span<const u8> packet) {
|
||||
std::vector<u8> extradata;
|
||||
const size_t size = packet.size();
|
||||
size_t i = 0;
|
||||
while (i < size) {
|
||||
const size_t sc = MatchStartCode(packet, i);
|
||||
if (sc == 0) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
const size_t nal_start = i + sc;
|
||||
if (nal_start >= size) {
|
||||
break;
|
||||
}
|
||||
const u8 nal_type = packet[nal_start] & 0x1F;
|
||||
|
||||
size_t j = nal_start + 1;
|
||||
while (j < size && MatchStartCode(packet, j) == 0) {
|
||||
++j;
|
||||
}
|
||||
|
||||
if (nal_type == 7 || nal_type == 8) {
|
||||
constexpr u8 start[4] = {0, 0, 0, 1};
|
||||
extradata.insert(extradata.end(), start, start + sizeof(start));
|
||||
extradata.insert(extradata.end(), packet.begin() + nal_start, packet.begin() + j);
|
||||
} else if (nal_type == 1 || nal_type == 5) {
|
||||
break;
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
return extradata;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
Packet::Packet(std::span<const u8> data) {
|
||||
@ -117,7 +172,26 @@ Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
|
||||
return AV_CODEC_ID_NONE;
|
||||
}
|
||||
}();
|
||||
m_codec = avcodec_find_decoder(av_codec);
|
||||
|
||||
#ifdef ANDROID
|
||||
// FFmpeg exposes MediaCodec via dedicated decoders rather than as a
|
||||
// hw_config on the regular ones.
|
||||
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
|
||||
const char* mc_name = nullptr;
|
||||
switch (av_codec) {
|
||||
case AV_CODEC_ID_H264: mc_name = "h264_mediacodec"; break;
|
||||
case AV_CODEC_ID_VP8: mc_name = "vp8_mediacodec"; break;
|
||||
case AV_CODEC_ID_VP9: mc_name = "vp9_mediacodec"; break;
|
||||
default: break;
|
||||
}
|
||||
if (mc_name) {
|
||||
m_codec = avcodec_find_decoder_by_name(mc_name);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!m_codec) {
|
||||
m_codec = avcodec_find_decoder(av_codec);
|
||||
}
|
||||
}
|
||||
|
||||
bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
|
||||
@ -205,6 +279,9 @@ DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} {
|
||||
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
|
||||
m_codec_context->thread_count = 0;
|
||||
m_codec_context->thread_type &= ~FF_THREAD_FRAME;
|
||||
// Forwarded into MediaCodec as KEY_LOW_LATENCY on Android.
|
||||
m_codec_context->flags |= AV_CODEC_FLAG_LOW_DELAY;
|
||||
m_codec_context->flags2 |= AV_CODEC_FLAG2_FAST;
|
||||
}
|
||||
|
||||
DecoderContext::~DecoderContext() {
|
||||
@ -218,7 +295,19 @@ void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, A
|
||||
m_codec_context->pix_fmt = hw_pix_fmt;
|
||||
}
|
||||
|
||||
bool DecoderContext::OpenContext(const Decoder& decoder) {
|
||||
bool DecoderContext::OpenContext(const Decoder& decoder, std::span<const u8> extradata) {
|
||||
if (!extradata.empty()) {
|
||||
av_freep(&m_codec_context->extradata);
|
||||
m_codec_context->extradata = static_cast<u8*>(
|
||||
av_mallocz(extradata.size() + AV_INPUT_BUFFER_PADDING_SIZE));
|
||||
if (!m_codec_context->extradata) {
|
||||
LOG_ERROR(HW_GPU, "Failed to allocate extradata");
|
||||
return false;
|
||||
}
|
||||
std::memcpy(m_codec_context->extradata, extradata.data(), extradata.size());
|
||||
m_codec_context->extradata_size = static_cast<int>(extradata.size());
|
||||
}
|
||||
|
||||
if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
|
||||
return false;
|
||||
@ -278,6 +367,12 @@ void DecodeApi::Reset() {
|
||||
m_hardware_context.reset();
|
||||
m_decoder_context.reset();
|
||||
m_decoder.reset();
|
||||
m_opened = false;
|
||||
m_needs_h264_extradata = false;
|
||||
m_next_pts = 0;
|
||||
while (!m_pending_offsets.empty()) {
|
||||
m_pending_offsets.pop();
|
||||
}
|
||||
}
|
||||
|
||||
bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
|
||||
@ -293,23 +388,69 @@ bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
|
||||
m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
|
||||
}
|
||||
|
||||
// Open the decoder context.
|
||||
#ifdef ANDROID
|
||||
// h264_mediacodec needs SPS/PPS in extradata at open. We pull them from
|
||||
// the first frame's bitstream in SendPacket.
|
||||
m_needs_h264_extradata = m_decoder->GetCodec() &&
|
||||
std::string_view(m_decoder->GetCodec()->name) == "h264_mediacodec";
|
||||
if (m_needs_h264_extradata) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!m_decoder_context->OpenContext(*m_decoder)) {
|
||||
this->Reset();
|
||||
return false;
|
||||
}
|
||||
m_opened = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DecodeApi::SendPacket(std::span<const u8> packet_data) {
|
||||
bool DecodeApi::SendPacket(std::span<const u8> packet_data, const FrameOffsets& offsets,
|
||||
std::optional<FrameDimensions> dimensions) {
|
||||
if (!m_opened) {
|
||||
std::vector<u8> extradata;
|
||||
#ifdef ANDROID
|
||||
if (m_needs_h264_extradata) {
|
||||
extradata = ExtractH264AnnexBExtradata(packet_data);
|
||||
if (extradata.empty()) {
|
||||
return true;
|
||||
}
|
||||
if (dimensions) {
|
||||
auto* ctx = m_decoder_context->GetCodecContext();
|
||||
ctx->width = dimensions->width;
|
||||
ctx->height = dimensions->height;
|
||||
ctx->coded_width = dimensions->width;
|
||||
ctx->coded_height = dimensions->height;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!m_decoder_context->OpenContext(*m_decoder, extradata)) {
|
||||
this->Reset();
|
||||
return false;
|
||||
}
|
||||
m_opened = true;
|
||||
}
|
||||
m_pending_offsets.push(offsets);
|
||||
FFmpeg::Packet packet(packet_data);
|
||||
packet.GetPacket()->pts = m_next_pts;
|
||||
packet.GetPacket()->dts = m_next_pts;
|
||||
++m_next_pts;
|
||||
return m_decoder_context->SendPacket(packet);
|
||||
}
|
||||
|
||||
std::shared_ptr<Frame> DecodeApi::ReceiveFrame() {
|
||||
// Receive raw frame from decoder.
|
||||
return m_decoder_context->ReceiveFrame();
|
||||
std::optional<DecodeApi::DecodedFrame> DecodeApi::ReceiveFrame() {
|
||||
auto frame = m_decoder_context->ReceiveFrame();
|
||||
if (!frame) {
|
||||
return std::nullopt;
|
||||
}
|
||||
FrameOffsets offsets{};
|
||||
if (!m_pending_offsets.empty()) {
|
||||
offsets = m_pending_offsets.front();
|
||||
m_pending_offsets.pop();
|
||||
}
|
||||
return DecodedFrame{std::move(frame), offsets};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -179,7 +179,7 @@ public:
|
||||
~DecoderContext();
|
||||
|
||||
void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
|
||||
bool OpenContext(const Decoder& decoder);
|
||||
bool OpenContext(const Decoder& decoder, std::span<const u8> extradata = {});
|
||||
bool SendPacket(const Packet& packet);
|
||||
std::shared_ptr<Frame> ReceiveFrame();
|
||||
|
||||
@ -198,6 +198,20 @@ private:
|
||||
bool m_decode_order{};
|
||||
};
|
||||
|
||||
struct FrameOffsets {
|
||||
bool interlaced{};
|
||||
bool hidden{};
|
||||
u64 luma{};
|
||||
u64 chroma{};
|
||||
u64 luma_bottom{};
|
||||
u64 chroma_bottom{};
|
||||
};
|
||||
|
||||
struct FrameDimensions {
|
||||
s32 width{};
|
||||
s32 height{};
|
||||
};
|
||||
|
||||
class DecodeApi {
|
||||
public:
|
||||
YUZU_NON_COPYABLE(DecodeApi);
|
||||
@ -213,13 +227,23 @@ public:
|
||||
return m_decoder_context->UsingDecodeOrder();
|
||||
}
|
||||
|
||||
bool SendPacket(std::span<const u8> packet_data);
|
||||
std::shared_ptr<Frame> ReceiveFrame();
|
||||
bool SendPacket(std::span<const u8> packet_data, const FrameOffsets& offsets,
|
||||
std::optional<FrameDimensions> dimensions = std::nullopt);
|
||||
|
||||
struct DecodedFrame {
|
||||
std::shared_ptr<Frame> frame;
|
||||
FrameOffsets offsets;
|
||||
};
|
||||
std::optional<DecodedFrame> ReceiveFrame();
|
||||
|
||||
private:
|
||||
std::optional<FFmpeg::Decoder> m_decoder;
|
||||
std::optional<FFmpeg::DecoderContext> m_decoder_context;
|
||||
std::optional<FFmpeg::HardwareContext> m_hardware_context;
|
||||
bool m_opened{};
|
||||
bool m_needs_h264_extradata{};
|
||||
s64 m_next_pts{};
|
||||
std::queue<FrameOffsets> m_pending_offsets;
|
||||
};
|
||||
|
||||
} // namespace FFmpeg
|
||||
|
||||
@ -31,6 +31,7 @@ Nvdec::Nvdec(Host1x& host1x_, s32 id_, u32 syncpt)
|
||||
|
||||
Nvdec::~Nvdec() {
|
||||
LOG_INFO(HW_GPU, "Destroying nvdec {}", id);
|
||||
host1x.frame_queue.Close(id);
|
||||
}
|
||||
|
||||
void Nvdec::ProcessMethod(u32 method, u32 argument) {
|
||||
|
||||
@ -137,7 +137,7 @@ void Vic::Execute() noexcept {
|
||||
break;
|
||||
}
|
||||
Blend(config, slot_config, config.output_surface_config.out_pixel_format);
|
||||
} else {
|
||||
} else if (nvdec_id != -1) {
|
||||
LOG_ERROR(HW_GPU, "Vic {} failed to get frame with offset {:#X}", id, luma_offset);
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user