diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index 161579927c..4a84e405f0 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -269,7 +269,7 @@ class SettingsFragmentPresenter( // TODO(crueter): sub-submenus? private fun addGraphicsSettings(sl: ArrayList) { sl.apply { - // add(IntSetting.RENDERER_NVDEC_EMULATION.key) + add(IntSetting.RENDERER_NVDEC_EMULATION.key) add(IntSetting.RENDERER_RESOLUTION.key) add(IntSetting.RENDERER_VSYNC.key) diff --git a/src/video_core/host1x/codecs/decoder.cpp b/src/video_core/host1x/codecs/decoder.cpp index c75059db6f..13399632ff 100644 --- a/src/video_core/host1x/codecs/decoder.cpp +++ b/src/video_core/host1x/codecs/decoder.cpp @@ -26,52 +26,39 @@ void Decoder::Decode() { } const auto packet_data = ComposeFrame(); - // Send assembled bitstream to decoder. - if (!decode_api.SendPacket(packet_data)) { - return; - } - - // Only receive/store visible frames. - if (vp9_hidden_frame) { - return; - } - - // Receive output frames from decoder. - auto frame = decode_api.ReceiveFrame(); - - if (!frame) { - return; - } - - if (IsInterlaced()) { - auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = GetInterlacedOffsets(); - auto frame_copy = frame; - - if (!frame.get()) { - LOG_ERROR(HW_GPU, - "Nvdec {} failed to decode interlaced frame for top {:#X} bottom 0x{:X}", id, - luma_top, luma_bottom); - } - - if (UsingDecodeOrder()) { - host1x.frame_queue.PushDecodeOrder(id, luma_top, std::move(frame)); - host1x.frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy)); - } else { - host1x.frame_queue.PushPresentOrder(id, luma_top, std::move(frame)); - host1x.frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy)); - } + FFmpeg::FrameOffsets offsets{}; + offsets.hidden = vp9_hidden_frame; + offsets.interlaced = IsInterlaced(); + if (offsets.interlaced) { + std::tie(offsets.luma, offsets.luma_bottom, offsets.chroma, offsets.chroma_bottom) = + GetInterlacedOffsets(); } else { - auto [luma_offset, chroma_offset] = GetProgressiveOffsets(); + std::tie(offsets.luma, offsets.chroma) = GetProgressiveOffsets(); + } - if (!frame.get()) { - LOG_ERROR(HW_GPU, "Nvdec {} failed to decode progressive frame for luma {:#X}", id, - luma_offset); - } + if (!decode_api.SendPacket(packet_data, offsets, GetFrameDimensions())) { + return; + } + auto push = [&](u64 luma, std::shared_ptr frame) { if (UsingDecodeOrder()) { - host1x.frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame)); + host1x.frame_queue.PushDecodeOrder(id, luma, std::move(frame)); } else { - host1x.frame_queue.PushPresentOrder(id, luma_offset, std::move(frame)); + host1x.frame_queue.PushPresentOrder(id, luma, std::move(frame)); + } + }; + + while (auto result = decode_api.ReceiveFrame()) { + auto& [frame, o] = *result; + if (o.hidden || !frame) { + continue; + } + if (o.interlaced) { + auto frame_copy = frame; + push(o.luma, std::move(frame)); + push(o.luma_bottom, std::move(frame_copy)); + } else { + push(o.luma, std::move(frame)); } } } diff --git a/src/video_core/host1x/codecs/decoder.h b/src/video_core/host1x/codecs/decoder.h index 9fca89aa40..ec99b65cad 100644 --- a/src/video_core/host1x/codecs/decoder.h +++ b/src/video_core/host1x/codecs/decoder.h @@ -45,6 +45,9 @@ protected: virtual std::tuple GetProgressiveOffsets() = 0; virtual std::tuple GetInterlacedOffsets() = 0; virtual bool IsInterlaced() = 0; + virtual std::optional GetFrameDimensions() { + return std::nullopt; + } FFmpeg::DecodeApi decode_api; Host1x::Host1x& host1x; diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index f439ac3828..291359345a 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -50,6 +50,16 @@ bool H264::IsInterlaced() { current_context.h264_parameter_set.luma_bot_offset.Address() != 0; } +std::optional H264::GetFrameDimensions() { + const auto& params = current_context.h264_parameter_set; + const s32 width = static_cast(params.pic_width_in_mbs) * 16; + const s32 height = static_cast(params.frame_height_in_mbs) * 16; + if (width <= 0 || height <= 0) { + return std::nullopt; + } + return FFmpeg::FrameDimensions{width, height}; +} + std::span H264::ComposeFrame() { host1x.gmmu_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_context, sizeof(H264DecoderContext)); const s64 frame_number = current_context.h264_parameter_set.frame_number.Value(); diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h index 1e5576291c..afe238cb91 100644 --- a/src/video_core/host1x/codecs/h264.h +++ b/src/video_core/host1x/codecs/h264.h @@ -79,6 +79,7 @@ public: std::tuple GetProgressiveOffsets() override; std::tuple GetInterlacedOffsets() override; bool IsInterlaced() override; + std::optional GetFrameDimensions() override; std::string_view GetCurrentCodecName() const override { return "H264"; diff --git a/src/video_core/host1x/ffmpeg.cpp b/src/video_core/host1x/ffmpeg.cpp index 507e94f193..6cdb0f2edf 100644 --- a/src/video_core/host1x/ffmpeg.cpp +++ b/src/video_core/host1x/ffmpeg.cpp @@ -4,6 +4,10 @@ // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include + #include "common/assert.h" #include "common/logging.h" #include "common/scope_exit.h" @@ -83,6 +87,57 @@ std::string AVError(int errnum) { return errbuf; } +#ifdef ANDROID +// Match a 3- or 4-byte annex-B start code at `i`. Returns its length, or 0. +size_t MatchStartCode(std::span data, size_t i) { + const size_t n = data.size(); + if (i + 3 < n && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 0 && data[i + 3] == 1) { + return 4; + } + if (i + 2 < n && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1) { + return 3; + } + return 0; +} + +// Pull SPS (NAL type 7) + PPS (NAL type 8) out of an annex-B frame into an +// extradata buffer, each prefixed with a 4-byte start code. Eden synthesizes +// these inline into the very first frame; h264_mediacodec wants them at open. +std::vector ExtractH264AnnexBExtradata(std::span packet) { + std::vector extradata; + const size_t size = packet.size(); + size_t i = 0; + while (i < size) { + const size_t sc = MatchStartCode(packet, i); + if (sc == 0) { + ++i; + continue; + } + const size_t nal_start = i + sc; + if (nal_start >= size) { + break; + } + const u8 nal_type = packet[nal_start] & 0x1F; + + size_t j = nal_start + 1; + while (j < size && MatchStartCode(packet, j) == 0) { + ++j; + } + + if (nal_type == 7 || nal_type == 8) { + constexpr u8 start[4] = {0, 0, 0, 1}; + extradata.insert(extradata.end(), start, start + sizeof(start)); + extradata.insert(extradata.end(), packet.begin() + nal_start, packet.begin() + j); + } else if (nal_type == 1 || nal_type == 5) { + break; + } + i = j; + } + return extradata; +} + +#endif + } Packet::Packet(std::span data) { @@ -117,7 +172,26 @@ Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) { return AV_CODEC_ID_NONE; } }(); - m_codec = avcodec_find_decoder(av_codec); + +#ifdef ANDROID + // FFmpeg exposes MediaCodec via dedicated decoders rather than as a + // hw_config on the regular ones. + if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) { + const char* mc_name = nullptr; + switch (av_codec) { + case AV_CODEC_ID_H264: mc_name = "h264_mediacodec"; break; + case AV_CODEC_ID_VP8: mc_name = "vp8_mediacodec"; break; + case AV_CODEC_ID_VP9: mc_name = "vp9_mediacodec"; break; + default: break; + } + if (mc_name) { + m_codec = avcodec_find_decoder_by_name(mc_name); + } + } +#endif + if (!m_codec) { + m_codec = avcodec_find_decoder(av_codec); + } } bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const { @@ -205,6 +279,9 @@ DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} { av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0); m_codec_context->thread_count = 0; m_codec_context->thread_type &= ~FF_THREAD_FRAME; + // Forwarded into MediaCodec as KEY_LOW_LATENCY on Android. + m_codec_context->flags |= AV_CODEC_FLAG_LOW_DELAY; + m_codec_context->flags2 |= AV_CODEC_FLAG2_FAST; } DecoderContext::~DecoderContext() { @@ -218,7 +295,19 @@ void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, A m_codec_context->pix_fmt = hw_pix_fmt; } -bool DecoderContext::OpenContext(const Decoder& decoder) { +bool DecoderContext::OpenContext(const Decoder& decoder, std::span extradata) { + if (!extradata.empty()) { + av_freep(&m_codec_context->extradata); + m_codec_context->extradata = static_cast( + av_mallocz(extradata.size() + AV_INPUT_BUFFER_PADDING_SIZE)); + if (!m_codec_context->extradata) { + LOG_ERROR(HW_GPU, "Failed to allocate extradata"); + return false; + } + std::memcpy(m_codec_context->extradata, extradata.data(), extradata.size()); + m_codec_context->extradata_size = static_cast(extradata.size()); + } + if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) { LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret)); return false; @@ -278,6 +367,12 @@ void DecodeApi::Reset() { m_hardware_context.reset(); m_decoder_context.reset(); m_decoder.reset(); + m_opened = false; + m_needs_h264_extradata = false; + m_next_pts = 0; + while (!m_pending_offsets.empty()) { + m_pending_offsets.pop(); + } } bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) { @@ -293,23 +388,69 @@ bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) { m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder); } - // Open the decoder context. +#ifdef ANDROID + // h264_mediacodec needs SPS/PPS in extradata at open. We pull them from + // the first frame's bitstream in SendPacket. + m_needs_h264_extradata = m_decoder->GetCodec() && + std::string_view(m_decoder->GetCodec()->name) == "h264_mediacodec"; + if (m_needs_h264_extradata) { + return true; + } +#endif + if (!m_decoder_context->OpenContext(*m_decoder)) { this->Reset(); return false; } + m_opened = true; return true; } -bool DecodeApi::SendPacket(std::span packet_data) { +bool DecodeApi::SendPacket(std::span packet_data, const FrameOffsets& offsets, + std::optional dimensions) { + if (!m_opened) { + std::vector extradata; +#ifdef ANDROID + if (m_needs_h264_extradata) { + extradata = ExtractH264AnnexBExtradata(packet_data); + if (extradata.empty()) { + return true; + } + if (dimensions) { + auto* ctx = m_decoder_context->GetCodecContext(); + ctx->width = dimensions->width; + ctx->height = dimensions->height; + ctx->coded_width = dimensions->width; + ctx->coded_height = dimensions->height; + } + } +#endif + if (!m_decoder_context->OpenContext(*m_decoder, extradata)) { + this->Reset(); + return false; + } + m_opened = true; + } + m_pending_offsets.push(offsets); FFmpeg::Packet packet(packet_data); + packet.GetPacket()->pts = m_next_pts; + packet.GetPacket()->dts = m_next_pts; + ++m_next_pts; return m_decoder_context->SendPacket(packet); } -std::shared_ptr DecodeApi::ReceiveFrame() { - // Receive raw frame from decoder. - return m_decoder_context->ReceiveFrame(); +std::optional DecodeApi::ReceiveFrame() { + auto frame = m_decoder_context->ReceiveFrame(); + if (!frame) { + return std::nullopt; + } + FrameOffsets offsets{}; + if (!m_pending_offsets.empty()) { + offsets = m_pending_offsets.front(); + m_pending_offsets.pop(); + } + return DecodedFrame{std::move(frame), offsets}; } } diff --git a/src/video_core/host1x/ffmpeg.h b/src/video_core/host1x/ffmpeg.h index fdb6908bb6..a366cd2c68 100644 --- a/src/video_core/host1x/ffmpeg.h +++ b/src/video_core/host1x/ffmpeg.h @@ -179,7 +179,7 @@ public: ~DecoderContext(); void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt); - bool OpenContext(const Decoder& decoder); + bool OpenContext(const Decoder& decoder, std::span extradata = {}); bool SendPacket(const Packet& packet); std::shared_ptr ReceiveFrame(); @@ -198,6 +198,20 @@ private: bool m_decode_order{}; }; +struct FrameOffsets { + bool interlaced{}; + bool hidden{}; + u64 luma{}; + u64 chroma{}; + u64 luma_bottom{}; + u64 chroma_bottom{}; +}; + +struct FrameDimensions { + s32 width{}; + s32 height{}; +}; + class DecodeApi { public: YUZU_NON_COPYABLE(DecodeApi); @@ -213,13 +227,23 @@ public: return m_decoder_context->UsingDecodeOrder(); } - bool SendPacket(std::span packet_data); - std::shared_ptr ReceiveFrame(); + bool SendPacket(std::span packet_data, const FrameOffsets& offsets, + std::optional dimensions = std::nullopt); + + struct DecodedFrame { + std::shared_ptr frame; + FrameOffsets offsets; + }; + std::optional ReceiveFrame(); private: std::optional m_decoder; std::optional m_decoder_context; std::optional m_hardware_context; + bool m_opened{}; + bool m_needs_h264_extradata{}; + s64 m_next_pts{}; + std::queue m_pending_offsets; }; } // namespace FFmpeg diff --git a/src/video_core/host1x/nvdec.cpp b/src/video_core/host1x/nvdec.cpp index f2e5c358d8..5dbc6a417e 100644 --- a/src/video_core/host1x/nvdec.cpp +++ b/src/video_core/host1x/nvdec.cpp @@ -31,6 +31,7 @@ Nvdec::Nvdec(Host1x& host1x_, s32 id_, u32 syncpt) Nvdec::~Nvdec() { LOG_INFO(HW_GPU, "Destroying nvdec {}", id); + host1x.frame_queue.Close(id); } void Nvdec::ProcessMethod(u32 method, u32 argument) { diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 906714cc16..2d1ea5b8d3 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -137,7 +137,7 @@ void Vic::Execute() noexcept { break; } Blend(config, slot_config, config.output_surface_config.out_pixel_format); - } else { + } else if (nvdec_id != -1) { LOG_ERROR(HW_GPU, "Vic {} failed to get frame with offset {:#X}", id, luma_offset); } }