better inlining

This commit is contained in:
lizzie 2026-05-01 16:21:21 +00:00
parent ed48efec20
commit 00cb6d0748
8 changed files with 77 additions and 107 deletions

View File

@ -20,8 +20,7 @@
namespace Tegra {
CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id)
: host_processor(host1x_)
, host1x{host1x_}
: host1x{host1x_}
, current_class{ChClassId(id)}
{
thread = std::jthread([this](std::stop_token stop_token) {
@ -99,7 +98,7 @@ void CDmaPusher::ExecuteCommand(u32 method, u32 arg) {
switch (current_class) {
case ChClassId::Control:
LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", u32(current_class), method, arg);
host_processor.ProcessMethod(Host1x::Control::Method(method), arg);
host_processor.ProcessMethod(host1x, Host1x::Control::Method(method), arg);
break;
default:
thi_regs.reg_array[method] = arg;

View File

@ -45,7 +45,7 @@ struct GPU::Impl {
, use_nvdec{use_nvdec_}
, shader_notify()
, is_async{is_async_}
, gpu_thread{system_, is_async_}
, gpu_thread{system_}
{}
~Impl() = default;

View File

@ -19,7 +19,7 @@
namespace VideoCommon::GPUThread {
ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
ThreadManager::ThreadManager(Core::System& system_)
: system{system_}
{}
@ -70,7 +70,6 @@ void ThreadManager::FlushRegion(DAddr addr, u64 size, bool is_async) {
// Always flush with synchronous GPU mode
PushCommand(FlushRegionCommand(addr, size), false, is_async);
}
return;
}
void ThreadManager::TickGPU(bool is_async) {

View File

@ -104,7 +104,7 @@ struct SynchState final {
/// Class used to manage the GPU thread
class ThreadManager final {
public:
explicit ThreadManager(Core::System& system_, bool is_async_);
explicit ThreadManager(Core::System& system_);
~ThreadManager();
/// Creates and starts the GPU thread.

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
@ -10,26 +10,22 @@
namespace Tegra::Host1x {
Control::Control(Host1x& host1x_) : host1x(host1x_) {}
Control::~Control() = default;
void Control::ProcessMethod(Method method, u32 argument) {
void Control::ProcessMethod(Host1x& host1x, Method method, u32 argument) {
switch (method) {
case Method::LoadSyncptPayload32:
syncpoint_value = argument;
break;
case Method::WaitSyncpt:
case Method::WaitSyncpt32:
Execute(argument);
Execute(host1x, argument);
break;
default:
UNIMPLEMENTED_MSG("Control method {:#X}", static_cast<u32>(method));
UNIMPLEMENTED_MSG("Control method {:#X}", u32(method));
break;
}
}
void Control::Execute(u32 data) {
void Control::Execute(Host1x& host1x, u32 data) {
LOG_TRACE(Service_NVDRV, "Control wait syncpt {} value {}", data, syncpoint_value);
host1x.GetSyncpointManager().WaitHost(data, syncpoint_value);
}

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
@ -19,17 +22,11 @@ public:
WaitSyncpt32 = 0x50,
};
explicit Control(Host1x& host1x);
~Control();
/// Writes the method into the state, Invoke Execute() if encountered
void ProcessMethod(Method method, u32 argument);
private:
void ProcessMethod(Host1x& host1x, Method method, u32 argument);
/// For Host1x, execute is waiting on a syncpoint previously written into the state
void Execute(u32 data);
void Execute(Host1x& host1x, u32 data);
Host1x& host1x;
u32 syncpoint_value{};
};

View File

@ -27,22 +27,22 @@ void Host1x::StartDevice(s32 fd, ChannelType type, u32 syncpt) {
#ifdef YUZU_LEGACY
std::call_once(nvdec_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer
#endif
devices[fd] = std::make_unique<Tegra::Host1x::Nvdec>(*this, fd, syncpt);
devices[fd].emplace<Tegra::Host1x::Nvdec>(*this, fd, syncpt);
break;
case ChannelType::VIC:
#ifdef YUZU_LEGACY
std::call_once(vic_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer
#endif
devices[fd] = std::make_unique<Tegra::Host1x::Vic>(*this, fd, syncpt);
devices[fd].emplace<Tegra::Host1x::Vic>(*this, fd, syncpt);
break;
default:
LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", static_cast<u32>(type));
LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", u32(type));
break;
}
}
void Host1x::StopDevice(s32 fd, ChannelType type) {
devices.erase(fd);
devices[fd].emplace<std::monostate>();
}
} // namespace Tegra::Host1x

View File

@ -8,7 +8,7 @@
#include <ankerl/unordered_dense.h>
#include <unordered_map>
#include <queue>
#include <variant>
#include "common/common_types.h"
@ -17,6 +17,9 @@
#include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/host1x/syncpoint_manager.h"
#include "video_core/memory_manager.h"
// fd types?
#include "video_core/host1x/nvdec.h"
#include "video_core/host1x/vic.h"
namespace Core {
class System;
@ -31,118 +34,90 @@ class Nvdec;
class FrameQueue {
public:
struct FrameDevice {
std::deque<std::pair<u64, std::shared_ptr<FFmpeg::Frame>>> m_presentation_order;
std::unordered_map<u64, std::shared_ptr<FFmpeg::Frame>> m_decode_order;
};
void Open(s32 fd) {
std::scoped_lock l{m_mutex};
m_presentation_order.insert({fd, {}});
m_decode_order.insert({fd, {}});
m_frame_devices.insert_or_assign(fd, FrameDevice{});
}
void Close(s32 fd) {
std::scoped_lock l{m_mutex};
m_presentation_order.erase(fd);
m_decode_order.erase(fd);
m_frame_devices.erase(fd);
}
s32 VicFindNvdecFdFromOffset(u64 search_offset) {
std::scoped_lock l{m_mutex};
for (auto& map : m_presentation_order) {
for (auto& [offset, frame] : map.second) {
if (offset == search_offset) {
return map.first;
}
}
}
for (auto& map : m_decode_order) {
for (auto& [offset, frame] : map.second) {
if (offset == search_offset) {
return map.first;
}
}
for (auto const& [fd, dev] : m_frame_devices) {
for (auto const& [offset, frame] : dev.m_presentation_order)
if (offset == search_offset)
return fd;
for (auto const& [offset, frame] : dev.m_decode_order)
if (offset == search_offset)
return fd;
}
return -1;
}
void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
std::scoped_lock l{m_mutex};
auto map = m_presentation_order.find(fd);
if (map == m_presentation_order.end()) {
return;
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
if (it->second.m_presentation_order.size() >= MAX_PRESENT_QUEUE)
it->second.m_presentation_order.pop_front();
it->second.m_presentation_order.emplace_back(offset, std::move(frame));
}
if (map->second.size() >= MAX_PRESENT_QUEUE) {
map->second.pop_front();
}
map->second.emplace_back(offset, std::move(frame));
}
void PushDecodeOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
std::scoped_lock l{m_mutex};
auto map = m_decode_order.find(fd);
if (map == m_decode_order.end()) {
return;
}
map->second.insert_or_assign(offset, std::move(frame));
if (map->second.size() > MAX_DECODE_MAP) {
auto it = map->second.begin();
std::advance(it, map->second.size() - MAX_DECODE_MAP);
map->second.erase(map->second.begin(), it);
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
it->second.m_decode_order.insert_or_assign(offset, std::move(frame));
if (it->second.m_decode_order.size() > MAX_DECODE_MAP) {
auto it2 = it->second.m_decode_order.begin();
std::advance(it2, it->second.m_decode_order.size() - MAX_DECODE_MAP);
it->second.m_decode_order.erase(it->second.m_decode_order.begin(), it2);
}
}
}
std::shared_ptr<FFmpeg::Frame> GetFrame(s32 fd, u64 offset) {
if (fd == -1) {
return {};
if (fd != -1) {
std::scoped_lock l{m_mutex};
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
if (it->second.m_presentation_order.size() > 0)
return GetPresentOrderLocked(fd);
if (it->second.m_decode_order.size() > 0)
return GetDecodeOrderLocked(fd, offset);
}
}
std::scoped_lock l{m_mutex};
auto present_map = m_presentation_order.find(fd);
if (present_map != m_presentation_order.end() && !present_map->second.empty()) {
return GetPresentOrderLocked(fd);
}
auto decode_map = m_decode_order.find(fd);
if (decode_map != m_decode_order.end() && !decode_map->second.empty()) {
return GetDecodeOrderLocked(fd, offset);
}
return {};
}
private:
std::shared_ptr<FFmpeg::Frame> GetPresentOrderLocked(s32 fd) {
auto map = m_presentation_order.find(fd);
if (map == m_presentation_order.end() || map->second.empty()) {
return {};
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
auto frame = std::move(it->second.m_presentation_order.front().second);
it->second.m_presentation_order.pop_front();
return frame;
}
auto frame = std::move(map->second.front().second);
map->second.pop_front();
return frame;
return {};
}
std::shared_ptr<FFmpeg::Frame> GetDecodeOrderLocked(s32 fd, u64 offset) {
auto map = m_decode_order.find(fd);
if (map == m_decode_order.end() || map->second.empty()) {
return {};
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
if (auto const it2 = it->second.m_decode_order.find(offset); it2 != it->second.m_decode_order.end()) {
// TODO: this "mapped" prevents us from fully embracing ankerl
return std::move(it->second.m_decode_order.extract(it2).mapped());
}
}
auto it = map->second.find(offset);
if (it == map->second.end()) {
return {};
}
// TODO: this "mapped" prevents us from fully embracing ankerl
return std::move(map->second.extract(it).mapped());
return {};
}
using FramePtr = std::shared_ptr<FFmpeg::Frame>;
std::mutex m_mutex{};
ankerl::unordered_dense::map<s32, std::deque<std::pair<u64, FramePtr>>> m_presentation_order;
ankerl::unordered_dense::map<s32, std::unordered_map<u64, FramePtr>> m_decode_order;
ankerl::unordered_dense::map<s32, FrameDevice> m_frame_devices;
static constexpr size_t MAX_PRESENT_QUEUE = 100;
static constexpr size_t MAX_DECODE_MAP = 200;
@ -196,11 +171,11 @@ public:
void StopDevice(s32 fd, ChannelType type);
void PushEntries(s32 fd, ChCommandHeaderList&& entries) {
auto it = devices.find(fd);
if (it == devices.end()) {
return;
if (auto const nvdec = std::get_if<Tegra::Host1x::Nvdec>(&devices[fd])) {
nvdec->PushEntries(std::move(entries));
} else if (auto const vic = std::get_if<Tegra::Host1x::Vic>(&devices[fd])) {
vic->PushEntries(std::move(entries));
}
it->second->PushEntries(std::move(entries));
}
Core::System& system;
@ -209,7 +184,11 @@ public:
Tegra::MemoryManager gmmu_manager;
Common::FlatAllocator<u32, 0, 32> allocator;
FrameQueue frame_queue;
ankerl::unordered_dense::map<s32, std::unique_ptr<CDmaPusher>> devices;
std::array<std::variant<
std::monostate,
Tegra::Host1x::Nvdec,
Tegra::Host1x::Vic
>, 1024> devices;
#ifdef YUZU_LEGACY
std::once_flag nvdec_first_init;
std::once_flag vic_first_init;