From b76e1de6da94a3d20175731da6f320ddf16e63e0 Mon Sep 17 00:00:00 2001 From: jackun Date: Sat, 18 Sep 2021 02:43:36 +0300 Subject: [PATCH] Support displaying multiple gpus' stats --- src/amdgpu.cpp | 108 +++++++++-------- src/amdgpu.h | 2 - src/cpu.cpp | 17 +-- src/file_utils.cpp | 4 +- src/file_utils.h | 2 +- src/gpu.cpp | 108 +++++++++-------- src/gpu.h | 135 +++++++++++++++++++-- src/hud_elements.cpp | 157 +++++++++++++++--------- src/hud_elements.h | 3 +- src/nvapi.cpp | 32 +++-- src/nvctrl.cpp | 33 ++++-- src/nvidia_info.h | 10 +- src/nvml.cpp | 131 +++++++++++++++------ src/overlay.cpp | 275 +++++++++++++++++++++++++------------------ src/overlay_params.h | 1 + 15 files changed, 649 insertions(+), 369 deletions(-) diff --git a/src/amdgpu.cpp b/src/amdgpu.cpp index 4f060224..f223c7c2 100644 --- a/src/amdgpu.cpp +++ b/src/amdgpu.cpp @@ -9,7 +9,7 @@ #define METRICS_POLLING_PERIOD_MS 5 #define METRICS_SAMPLE_COUNT (METRICS_UPDATE_PERIOD_MS/METRICS_POLLING_PERIOD_MS) -std::string metrics_path = ""; +//std::string metrics_path = ""; /* This structure is used to communicate the latest values of the amdgpu metrics. * The direction of communication is amdgpu_polling_thread -> amdgpu_get_metrics(). @@ -74,66 +74,62 @@ bool amdgpu_check_metrics(const std::string& path) } #define MAX(x, y) (((x) > (y)) ? (x) : (y)) -void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) { - FILE *f; +void AMDGPUInfo::get_instant_metrics(struct amdgpu_common_metrics& metrics) { void *buf[MAX(sizeof(struct gpu_metrics_v1_3), sizeof(struct gpu_metrics_v2_2))]; struct metrics_table_header* header = (metrics_table_header*)buf; - f = fopen(metrics_path.c_str(), "rb"); - if (!f) - return; - + rewind(file); + fflush(file); // Read the whole file - if (!fread(buf, sizeof(buf), 1, f) == 0) { + if (fread(buf, sizeof(buf), 1, file) == 0) { SPDLOG_DEBUG("Failed to read amdgpu metrics file '{}'", metrics_path.c_str()); - fclose(f); + fclose(file); return; } - fclose(f); int64_t indep_throttle_status = 0; if (header->format_revision == 1) { // Desktop GPUs cpuStats.cpu_type = "GPU"; struct gpu_metrics_v1_3 *amdgpu_metrics = (struct gpu_metrics_v1_3 *) buf; - metrics->gpu_load_percent = amdgpu_metrics->average_gfx_activity; + metrics.gpu_load_percent = amdgpu_metrics->average_gfx_activity; - metrics->average_gfx_power_w = amdgpu_metrics->average_socket_power; + metrics.average_gfx_power_w = amdgpu_metrics->average_socket_power; - metrics->current_gfxclk_mhz = amdgpu_metrics->average_gfxclk_frequency; - metrics->current_uclk_mhz = amdgpu_metrics->current_uclk; + metrics.current_gfxclk_mhz = amdgpu_metrics->average_gfxclk_frequency; + metrics.current_uclk_mhz = amdgpu_metrics->current_uclk; - metrics->gpu_temp_c = amdgpu_metrics->temperature_edge; + metrics.gpu_temp_c = amdgpu_metrics->temperature_edge; indep_throttle_status = amdgpu_metrics->indep_throttle_status; } else if (header->format_revision == 2) { // APUs cpuStats.cpu_type = "APU"; struct gpu_metrics_v2_2 *amdgpu_metrics = (struct gpu_metrics_v2_2 *) buf; - metrics->gpu_load_percent = amdgpu_metrics->average_gfx_activity; + metrics.gpu_load_percent = amdgpu_metrics->average_gfx_activity; - metrics->average_gfx_power_w = amdgpu_metrics->average_gfx_power / 1000.f; - metrics->average_cpu_power_w = amdgpu_metrics->average_cpu_power / 1000.f; + metrics.average_gfx_power_w = amdgpu_metrics->average_gfx_power / 1000.f; + metrics.average_cpu_power_w = amdgpu_metrics->average_cpu_power / 1000.f; - metrics->current_gfxclk_mhz = amdgpu_metrics->current_gfxclk; - metrics->current_uclk_mhz = amdgpu_metrics->current_uclk; + metrics.current_gfxclk_mhz = amdgpu_metrics->current_gfxclk; + metrics.current_uclk_mhz = amdgpu_metrics->current_uclk; - metrics->soc_temp_c = amdgpu_metrics->temperature_soc / 100; - metrics->gpu_temp_c = amdgpu_metrics->temperature_gfx / 100; + metrics.soc_temp_c = amdgpu_metrics->temperature_soc / 100; + metrics.gpu_temp_c = amdgpu_metrics->temperature_gfx / 100; int cpu_temp = 0; for (unsigned i = 0; i < cpuStats.GetCPUData().size() / 2; i++) cpu_temp = MAX(cpu_temp, amdgpu_metrics->temperature_core[i]); - metrics->apu_cpu_temp_c = cpu_temp / 100; + metrics.apu_cpu_temp_c = cpu_temp / 100; indep_throttle_status = amdgpu_metrics->indep_throttle_status; } /* Throttling: See https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h for the offsets */ - metrics->is_power_throttled = ((indep_throttle_status >> 0) & 0xFF) != 0; - metrics->is_current_throttled = ((indep_throttle_status >> 16) & 0xFF) != 0; - metrics->is_temp_throttled = ((indep_throttle_status >> 32) & 0xFFFF) != 0; - metrics->is_other_throttled = ((indep_throttle_status >> 56) & 0xFF) != 0; + metrics.is_power_throttled = ((indep_throttle_status >> 0) & 0xFF) != 0; + metrics.is_current_throttled = ((indep_throttle_status >> 16) & 0xFF) != 0; + metrics.is_temp_throttled = ((indep_throttle_status >> 32) & 0xFFFF) != 0; + metrics.is_other_throttled = ((indep_throttle_status >> 56) & 0xFF) != 0; } #define UPDATE_METRIC_AVERAGE(FIELD) do { int value_sum = 0; for (size_t s=0; s < METRICS_SAMPLE_COUNT; s++) { value_sum += metrics_buffer[s].FIELD; } amdgpu_common_metrics.FIELD = value_sum / METRICS_SAMPLE_COUNT; } while(0) @@ -141,24 +137,25 @@ void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) { #define UPDATE_METRIC_MAX(FIELD) do { int cur_max = metrics_buffer[0].FIELD; for (size_t s=1; s < METRICS_SAMPLE_COUNT; s++) { cur_max = MAX(cur_max, metrics_buffer[s].FIELD); }; amdgpu_common_metrics.FIELD = cur_max; } while(0) #define UPDATE_METRIC_LAST(FIELD) do { amdgpu_common_metrics.FIELD = metrics_buffer[METRICS_SAMPLE_COUNT - 1].FIELD; } while(0) -void amdgpu_metrics_polling_thread() { +void AMDGPUInfo::metrics_polling_thread() { + SPDLOG_DEBUG("{}", __func__); struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT]; bool gpu_load_needs_dividing = false; //some GPUs report load as centipercent // Initial poll of the metrics, so that we have values to display as fast as possible - amdgpu_get_instant_metrics(&amdgpu_common_metrics); + get_instant_metrics(amdgpu_common_metrics); if (amdgpu_common_metrics.gpu_load_percent > 100){ gpu_load_needs_dividing = true; amdgpu_common_metrics.gpu_load_percent /= 100; } - + // Set all the fields to 0 by default. Only done once as we're just replacing previous values after memset(metrics_buffer, 0, sizeof(metrics_buffer)); - while (1) { + while (!quit) { // Get all the samples for (size_t cur_sample_id=0; cur_sample_id < METRICS_SAMPLE_COUNT; cur_sample_id++) { - amdgpu_get_instant_metrics(&metrics_buffer[cur_sample_id]); + get_instant_metrics(metrics_buffer[cur_sample_id]); // Detect and fix if the gpu load is reported in centipercent if (gpu_load_needs_dividing || metrics_buffer[cur_sample_id].gpu_load_percent > 100){ @@ -168,7 +165,7 @@ void amdgpu_metrics_polling_thread() { usleep(METRICS_POLLING_PERIOD_MS * 1000); } - + // Copy the results from the different metrics to amdgpu_common_metrics amdgpu_common_metrics_m.lock(); UPDATE_METRIC_AVERAGE(gpu_load_percent); @@ -189,29 +186,38 @@ void amdgpu_metrics_polling_thread() { } } -void amdgpu_get_metrics(){ - static bool init = false; - if (!init){ - std::thread(amdgpu_metrics_polling_thread).detach(); - init = true; - } +bool AMDGPUInfo::init(){ + if (file) + return true; + + if (!(file = fopen(metrics_path.c_str(), "rb"))) + return false; + + AMDGPUHWMonInfo::init(); + + // TODO start polling only when actually used + thread = std::thread(&AMDGPUInfo::metrics_polling_thread, this); + return true; +} +void AMDGPUInfo::update(const overlay_params& params){ amdgpu_common_metrics_m.lock(); - gpu_info.load = amdgpu_common_metrics.gpu_load_percent; + info.load = amdgpu_common_metrics.gpu_load_percent; - gpu_info.powerUsage = amdgpu_common_metrics.average_gfx_power_w; - gpu_info.CoreClock = amdgpu_common_metrics.current_gfxclk_mhz; - gpu_info.MemClock = amdgpu_common_metrics.current_uclk_mhz; + info.power_usage = amdgpu_common_metrics.average_gfx_power_w; + info.core_clock = amdgpu_common_metrics.current_gfxclk_mhz; + info.memory_clock = amdgpu_common_metrics.current_uclk_mhz; // Use hwmon instead, see gpu.cpp - // gpu_info.temp = amdgpu_common_metrics.gpu_temp_c; - gpu_info.apu_cpu_power = amdgpu_common_metrics.average_cpu_power_w; - gpu_info.apu_cpu_temp = amdgpu_common_metrics.apu_cpu_temp_c; - - gpu_info.is_power_throttled = amdgpu_common_metrics.is_power_throttled; - gpu_info.is_current_throttled = amdgpu_common_metrics.is_current_throttled; - gpu_info.is_temp_throttled = amdgpu_common_metrics.is_temp_throttled; - gpu_info.is_other_throttled = amdgpu_common_metrics.is_other_throttled; + // info.temp = amdgpu_common_metrics.gpu_temp_c; + info.apu_cpu_power = amdgpu_common_metrics.average_cpu_power_w; + info.apu_cpu_temp = amdgpu_common_metrics.apu_cpu_temp_c; + + info.is_power_throttled = amdgpu_common_metrics.is_power_throttled; + info.is_current_throttled = amdgpu_common_metrics.is_current_throttled; + info.is_temp_throttled = amdgpu_common_metrics.is_temp_throttled; + info.is_other_throttled = amdgpu_common_metrics.is_other_throttled; + getAmdGpuInfo(files, info, true); amdgpu_common_metrics_m.unlock(); } diff --git a/src/amdgpu.h b/src/amdgpu.h index e8867de6..d3573ef8 100644 --- a/src/amdgpu.h +++ b/src/amdgpu.h @@ -140,5 +140,3 @@ struct gpu_metrics_v2_2 { }; bool amdgpu_check_metrics(const std::string& path); -extern void amdgpu_get_metrics(); -extern std::string metrics_path; diff --git a/src/cpu.cpp b/src/cpu.cpp index 035fd1ad..70583025 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -248,7 +248,7 @@ bool CPUStats::UpdateCoreMhz() { bool CPUStats::UpdateCpuTemp() { if (cpu_type == "APU"){ - m_cpuDataTotal.temp = gpu_info.apu_cpu_temp; + m_cpuDataTotal.temp = g_active_gpu ? g_active_gpu->info.apu_cpu_temp : 0; return true; } else { if (!m_cpuTempFile) @@ -349,7 +349,8 @@ static bool get_cpu_power_rapl(CPUPowerData* cpuPowerData, float& power) { } static bool get_cpu_power_amdgpu(float& power) { - power = gpu_info.apu_cpu_power; + if (g_active_gpu) + power = g_active_gpu->info.apu_cpu_power; //FIXME more reliable way to get APU stats return true; } @@ -383,7 +384,7 @@ bool CPUStats::UpdateCpuPower() { static bool find_temp_input(const std::string path, std::string& input, const std::string& name) { - auto files = ls(path.c_str(), "temp", LS_FILES); + auto files = ls(path, "temp", LS_FILES); for (auto& file : files) { if (!ends_with(file, "_label")) continue; @@ -404,7 +405,7 @@ static bool find_temp_input(const std::string path, std::string& input, const st static bool find_fallback_temp_input(const std::string path, std::string& input) { - auto files = ls(path.c_str(), "temp", LS_FILES); + auto files = ls(path, "temp", LS_FILES); if (!files.size()) return false; @@ -426,7 +427,7 @@ bool CPUStats::GetCpuFile() { std::string name, path, input; std::string hwmon = "/sys/class/hwmon/"; - auto dirs = ls(hwmon.c_str()); + auto dirs = ls(hwmon); for (auto& dir : dirs) { path = hwmon + dir; name = read_line(path + "/name"); @@ -458,7 +459,7 @@ bool CPUStats::GetCpuFile() { static bool find_input(const std::string& path, const char* input_prefix, std::string& input, const std::string& name) { - auto files = ls(path.c_str(), input_prefix, LS_FILES); + auto files = ls(path, input_prefix, LS_FILES); for (auto& file : files) { if (!ends_with(file, "_label")) continue; @@ -539,7 +540,7 @@ bool CPUStats::InitCpuPowerData() { CPUPowerData* cpuPowerData = nullptr; - auto dirs = ls(hwmon.c_str()); + auto dirs = ls(hwmon); for (auto& dir : dirs) { path = hwmon + dir; name = read_line(path + "/name"); @@ -558,7 +559,7 @@ bool CPUStats::InitCpuPowerData() { if (!cpuPowerData && intel) { std::string powercap = "/sys/class/powercap/"; - auto powercap_dirs = ls(powercap.c_str()); + auto powercap_dirs = ls(powercap); for (auto& dir : powercap_dirs) { path = powercap + dir; name = read_line(path + "/name"); diff --git a/src/file_utils.cpp b/src/file_utils.cpp index 9e3606d8..1546ce50 100644 --- a/src/file_utils.cpp +++ b/src/file_utils.cpp @@ -33,12 +33,12 @@ std::string get_basename(const std::string&& path) } #ifdef __linux__ -std::vector ls(const char* root, const char* prefix, LS_FLAGS flags) +std::vector ls(const std::string& root, const char* prefix, LS_FLAGS flags) { std::vector list; struct dirent* dp; - DIR* dirp = opendir(root); + DIR* dirp = opendir(root.c_str()); if (!dirp) { SPDLOG_ERROR("Error opening directory '{}': {}", root, strerror(errno)); return list; diff --git a/src/file_utils.h b/src/file_utils.h index 03b3d8eb..542dd16e 100644 --- a/src/file_utils.h +++ b/src/file_utils.h @@ -12,7 +12,7 @@ enum LS_FLAGS }; std::string read_line(const std::string& filename); -std::vector ls(const char* root, const char* prefix = nullptr, LS_FLAGS flags = LS_DIRS); +std::vector ls(const std::string& root, const char* prefix = nullptr, LS_FLAGS flags = LS_DIRS); bool file_exists(const std::string& path); bool dir_exists(const std::string& path); std::string read_symlink(const char * link); diff --git a/src/gpu.cpp b/src/gpu.cpp index 549f6fdf..e50dce53 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -7,6 +7,7 @@ #include #include "nvctrl.h" #include "timing.hpp" +#include "file_utils.h" #ifdef HAVE_NVML #include "nvidia_info.h" #endif @@ -15,65 +16,39 @@ using namespace std::chrono_literals; -struct gpuInfo gpu_info {}; -amdgpu_files amdgpu {}; +std::shared_ptr g_active_gpu; +std::unordered_map> g_gpu_devices; -bool checkNvidia(const char *pci_dev){ - bool nvSuccess = false; -#ifdef HAVE_NVML - nvSuccess = checkNVML(pci_dev) && getNVMLInfo({}); -#endif +bool NVCtrlInfo::init() +{ #ifdef HAVE_XNVCTRL - if (!nvSuccess) - nvSuccess = checkXNVCtrl(); + // FIXME correct device index + return checkXNVCtrl(); +#else + return false; #endif -#ifdef _WIN32 - if (!nvSuccess) - nvSuccess = checkNVAPI(); -#endif - return nvSuccess; } -void getNvidiaGpuInfo(const struct overlay_params& params){ -#ifdef HAVE_NVML - if (nvmlSuccess){ - getNVMLInfo(params); - gpu_info.load = nvidiaUtilization.gpu; - gpu_info.temp = nvidiaTemp; - gpu_info.memoryUsed = nvidiaMemory.used / (1024.f * 1024.f * 1024.f); - gpu_info.CoreClock = nvidiaCoreClock; - gpu_info.MemClock = nvidiaMemClock; - gpu_info.powerUsage = nvidiaPowerUsage / 1000; - gpu_info.memoryTotal = nvidiaMemory.total / (1024.f * 1024.f * 1024.f); - if (params.enabled[OVERLAY_PARAM_ENABLED_throttling_status]){ - gpu_info.is_temp_throttled = (nvml_throttle_reasons & 0x0000000000000060LL) != 0; - gpu_info.is_power_throttled = (nvml_throttle_reasons & 0x000000000000008CLL) != 0; - gpu_info.is_other_throttled = (nvml_throttle_reasons & 0x0000000000000112LL) != 0; - } - return; - } -#endif +void NVCtrlInfo::update(const struct overlay_params& params) +{ #ifdef HAVE_XNVCTRL if (nvctrlSuccess) { getNvctrlInfo(); - gpu_info.load = nvctrl_info.load; - gpu_info.temp = nvctrl_info.temp; - gpu_info.memoryUsed = nvctrl_info.memoryUsed / (1024.f); - gpu_info.CoreClock = nvctrl_info.CoreClock; - gpu_info.MemClock = nvctrl_info.MemClock; - gpu_info.powerUsage = 0; - gpu_info.memoryTotal = nvctrl_info.memoryTotal; + info.load = nvctrl_info.load; + info.temp = nvctrl_info.temp; + info.memory_used = nvctrl_info.memoryUsed; + info.core_clock = nvctrl_info.CoreClock; + info.memory_clock = nvctrl_info.MemClock; + info.power_usage = 0; + info.memory_total = nvctrl_info.memoryTotal; return; } #endif -#ifdef _WIN32 -nvapi_util(); -#endif } -void getAmdGpuInfo(){ +void getAmdGpuInfo(amdgpu_files& amdgpu, gpu_info& gpu_info, bool has_metrics){ int64_t value = 0; - if (metrics_path.empty()){ + if (!has_metrics){ if (amdgpu.busy) { rewind(amdgpu.busy); fflush(amdgpu.busy); @@ -89,7 +64,7 @@ void getAmdGpuInfo(){ if (fscanf(amdgpu.core_clock, "%" PRId64, &value) != 1) value = 0; - gpu_info.CoreClock = value / 1000000; + gpu_info.core_clock = value / 1000000; } if (amdgpu.memory_clock) { @@ -98,7 +73,7 @@ void getAmdGpuInfo(){ if (fscanf(amdgpu.memory_clock, "%" PRId64, &value) != 1) value = 0; - gpu_info.MemClock = value / 1000000; + gpu_info.memory_clock = value / 1000000; } if (amdgpu.power_usage) { @@ -107,7 +82,7 @@ void getAmdGpuInfo(){ if (fscanf(amdgpu.power_usage, "%" PRId64, &value) != 1) value = 0; - gpu_info.powerUsage = value / 1000000; + gpu_info.power_usage = value / 1000000; } } @@ -116,7 +91,7 @@ void getAmdGpuInfo(){ fflush(amdgpu.vram_total); if (fscanf(amdgpu.vram_total, "%" PRId64, &value) != 1) value = 0; - gpu_info.memoryTotal = float(value) / (1024 * 1024 * 1024); + gpu_info.memory_total = value; } if (amdgpu.vram_used) { @@ -124,10 +99,10 @@ void getAmdGpuInfo(){ fflush(amdgpu.vram_used); if (fscanf(amdgpu.vram_used, "%" PRId64, &value) != 1) value = 0; - gpu_info.memoryUsed = float(value) / (1024 * 1024 * 1024); + gpu_info.memory_used = value; } // On some GPUs SMU can sometimes return the wrong temperature. - // As HWMON is way more visible than the SMU metrics, let's always trust it as it is the most likely to work + // As HWMON is way more visible than the SMU metrics, let's always trust it as it is the most likely to work if (amdgpu.temp){ rewind(amdgpu.temp); fflush(amdgpu.temp); @@ -142,6 +117,35 @@ void getAmdGpuInfo(){ fflush(amdgpu.gtt_used); if (fscanf(amdgpu.gtt_used, "%" PRId64, &value) != 1) value = 0; - gpu_info.gtt_used = float(value) / (1024 * 1024 * 1024); + gpu_info.gtt_used = value; + } +} + +bool AMDGPUHWMonInfo::init() +{ + const auto device_path = sysfs_path + "/device"; + const auto hwmon_path = device_path + "/hwmon/"; + files.busy = fopen((device_path + "/gpu_busy_percent").c_str(), "r"); + files.vram_total = fopen((device_path + "/mem_info_vram_total").c_str(), "r"); + files.vram_used = fopen((device_path + "/mem_info_vram_used").c_str(), "r"); + files.gtt_used = fopen((device_path + "/mem_info_gtt_used").c_str(), "r"); + + const auto dirs = ls(hwmon_path, "hwmon", LS_DIRS); + for (const auto& dir : dirs) { + if (!files.core_clock) + files.core_clock = fopen((hwmon_path + dir + "/freq1_input").c_str(), "r"); + if (!files.memory_clock) + files.memory_clock = fopen((hwmon_path + dir + "/freq2_input").c_str(), "r"); + if (!files.temp) + files.temp = fopen((hwmon_path + dir + "/temp1_input").c_str(), "r"); + if (!files.power_usage) + files.power_usage = fopen((hwmon_path + dir + "/power1_average").c_str(), "r"); } + + return files.busy && files.temp && files.vram_total && files.vram_used; +} + +void AMDGPUHWMonInfo::update(const struct overlay_params& params) +{ + getAmdGpuInfo(files, info, false); } diff --git a/src/gpu.h b/src/gpu.h index ebbfc2fe..d772e397 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -5,6 +5,10 @@ #include #include #include "overlay_params.h" +#include +#include +#include +#include struct amdgpu_files { @@ -19,29 +23,138 @@ struct amdgpu_files FILE *gtt_used; }; -extern amdgpu_files amdgpu; - -struct gpuInfo{ +struct gpu_info { int load; int temp; - float memoryUsed; - float memoryTotal; - int MemClock; - int CoreClock; - float powerUsage; + uint64_t memory_used; + uint64_t memory_total; + int memory_clock; + int core_clock; + float power_usage; float apu_cpu_power; int apu_cpu_temp; bool is_power_throttled; bool is_current_throttled; bool is_temp_throttled; bool is_other_throttled; - float gtt_used; + uint64_t gtt_used; +}; + +struct gpu_handles +{ + virtual ~gpu_handles() {}; +}; + +struct gpu_device +{ + gpu_device(const std::string& sysfs, const std::string& pci) + : sysfs_path(sysfs) + , pci_device(pci) + {} + virtual ~gpu_device() {} + virtual void update(const struct overlay_params& params) = 0; + virtual bool init() = 0; + + std::string sysfs_path; + std::string pci_device; + std::string dev_name; + gpu_info info {}; + uint32_t vendorID {}, deviceID {}; + gpu_handles* device {}; +}; + +struct DummyGpu : public gpu_device +{ + DummyGpu() : gpu_device({}, {}) + { + dev_name = "dummy"; + } + void update(const struct overlay_params& params) {} + bool init() { return true; } +}; + +struct NVMLInfo : public gpu_device +{ + NVMLInfo(const std::string& sysfs, const std::string& pci) : gpu_device(sysfs, pci) {} + void update(const struct overlay_params& params); + bool init(); +}; + +struct NVCtrlInfo : public gpu_device +{ + NVCtrlInfo(const std::string& sysfs, const std::string& pci) : gpu_device(sysfs, pci) {} + void update(const struct overlay_params& params); + bool init(); +}; + +struct NVAPIInfo : public gpu_device +{ + NVAPIInfo() : gpu_device({}, {}) {} + void update(const struct overlay_params& params); + bool init(); +}; + +struct AMDGPUHWMonInfo : public gpu_device +{ + AMDGPUHWMonInfo(const std::string& sysfs, const std::string& pci) : gpu_device(sysfs, pci) {} + virtual ~AMDGPUHWMonInfo() + { + delete device; + if (files.busy) + fclose(files.busy); + if (files.temp) + fclose(files.temp); + if (files.vram_total) + fclose(files.vram_total); + if (files.vram_used) + fclose(files.vram_used); + if (files.core_clock) + fclose(files.core_clock); + if (files.memory_clock) + fclose(files.memory_clock); + if (files.power_usage) + fclose(files.power_usage); + files = {}; + } + + virtual void update(const struct overlay_params& params); + virtual bool init(); + + amdgpu_files files {}; +}; + +struct AMDGPUInfo : public AMDGPUHWMonInfo +{ + AMDGPUInfo(const std::string& metrics_, const std::string& sysfs, const std::string& pci) + : AMDGPUHWMonInfo(sysfs, pci) + , metrics_path(metrics_) + {} + + ~AMDGPUInfo() + { + quit = true; + if (thread.joinable()) + thread.join(); + if (file) + fclose(file); + } + void update(const struct overlay_params& params); + bool init(); +private: + void metrics_polling_thread(); + void get_instant_metrics(struct amdgpu_common_metrics& metrics); + + bool quit {false}; + FILE *file {}; + std::string metrics_path; + std::thread thread; }; -extern struct gpuInfo gpu_info; +extern std::shared_ptr g_active_gpu; +extern std::unordered_map> g_gpu_devices; void getNvidiaGpuInfo(const struct overlay_params& params); -void getAmdGpuInfo(void); +void getAmdGpuInfo(amdgpu_files& amdgpu, gpu_info& gpu_info, bool has_metrics = false); bool checkNvidia(const char *pci_dev); extern void nvapi_util(); extern bool checkNVAPI(); diff --git a/src/hud_elements.cpp b/src/hud_elements.cpp index 1b7353aa..cac8cdec 100644 --- a/src/hud_elements.cpp +++ b/src/hud_elements.cpp @@ -156,15 +156,57 @@ void HudElements::version(){ } } -void HudElements::gpu_stats(){ - if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats]){ +static void per_gpu_vram(const gpu_info& gpu_info){ + if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_vram]) + return; + + ImGui::TableNextRow(); ImGui::TableNextColumn(); + ImGui::TextColored(HUDElements.colors.vram, "VRAM"); + ImGui::TableNextColumn(); + // Add gtt_used to vram usage for APUs + if (cpuStats.cpu_type == "APU") + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memory_used + gpu_info.gtt_used); + else + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memory_used); + ImGui::SameLine(0,1.0f); + ImGui::PushFont(HUDElements.sw_stats->font1); + ImGui::Text("GiB"); + ImGui::PopFont(); +#ifndef MANGOAPP + if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock]){ + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu_info.memory_clock); + ImGui::SameLine(0, 1.0f); + ImGui::PushFont(HUDElements.sw_stats->font1); + ImGui::Text("MHz"); + ImGui::PopFont(); + } +#endif +} + +void HudElements::vram(){ + if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_show_all_gpus]) + return; + + if (g_active_gpu) + per_gpu_vram(g_active_gpu->info); +} + +static void per_gpu_stats(const gpu_device* gpu, bool single){ + if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats]){ ImGui::TableNextRow(); ImGui::TableNextColumn(); const char* gpu_text; - if (HUDElements.params->gpu_text.empty()) + if (!single) + gpu_text = gpu->dev_name.c_str(); + else if (HUDElements.params->gpu_text.empty()) gpu_text = "GPU"; else gpu_text = HUDElements.params->gpu_text.c_str(); ImGui::TextColored(HUDElements.colors.gpu, "%s", gpu_text); + if (!single) { + ImGui::TableNextRow(); + ImGui::TableNextColumn(); + } ImGui::TableNextColumn(); auto text_color = HUDElements.colors.text; if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_load_change]){ @@ -176,13 +218,13 @@ void HudElements::gpu_stats(){ HUDElements.params->gpu_load_value[1] }; - auto load_color = change_on_load_temp(gpu_data, gpu_info.load); - right_aligned_text(load_color, HUDElements.ralign_width, "%i", gpu_info.load); + auto load_color = change_on_load_temp(gpu_data, gpu->info.load); + right_aligned_text(load_color, HUDElements.ralign_width, "%i", gpu->info.load); ImGui::SameLine(0, 1.0f); ImGui::TextColored(load_color,"%%"); } else { - right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.load); + right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->info.load); ImGui::SameLine(0, 1.0f); ImGui::TextColored(text_color,"%%"); // ImGui::SameLine(150); @@ -190,7 +232,7 @@ void HudElements::gpu_stats(){ } if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_temp]){ ImguiNextColumnOrNewRow(); - right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.temp); + right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->info.temp); ImGui::SameLine(0, 1.0f); ImGui::Text("°C"); } @@ -199,7 +241,7 @@ void HudElements::gpu_stats(){ } if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock]){ ImguiNextColumnOrNewRow(); - right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.CoreClock); + right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->info.core_clock); ImGui::SameLine(0, 1.0f); ImGui::PushFont(HUDElements.sw_stats->font1); ImGui::Text("MHz"); @@ -208,9 +250,9 @@ void HudElements::gpu_stats(){ if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power]) { ImguiNextColumnOrNewRow(); #ifdef MANGOAPP - right_aligned_text(text_color, HUDElements.ralign_width, "%.1f", gpu_info.powerUsage); + right_aligned_text(text_color, HUDElements.ralign_width, "%.1f", gpu->info.power_usage); #else - right_aligned_text(text_color, HUDElements.ralign_width, "%.0f", gpu_info.powerUsage); + right_aligned_text(text_color, HUDElements.ralign_width, "%.0f", gpu->info.power_usage); #endif ImGui::SameLine(0, 1.0f); ImGui::PushFont(HUDElements.sw_stats->font1); @@ -218,6 +260,25 @@ void HudElements::gpu_stats(){ ImGui::PopFont(); } } + + if (!single) + per_gpu_vram(gpu->info); +} + +void HudElements::gpu_stats(){ + auto p = HUDElements.params; + if (!p->enabled[OVERLAY_PARAM_ENABLED_gpu_stats]) + return; + + if (p->enabled[OVERLAY_PARAM_ENABLED_show_all_gpus]) + { + for (const auto& g : g_gpu_devices) + per_gpu_stats(g.second.get(), false); + return; + } + + if (g_active_gpu) + per_gpu_stats(g_active_gpu.get(), true); } void HudElements::cpu_stats(){ @@ -359,33 +420,6 @@ void HudElements::io_stats(){ #endif } -void HudElements::vram(){ - if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_vram]){ - ImGui::TableNextRow(); ImGui::TableNextColumn(); - ImGui::TextColored(HUDElements.colors.vram, "VRAM"); - ImGui::TableNextColumn(); - // Add gtt_used to vram usage for APUs - if (cpuStats.cpu_type == "APU") - right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memoryUsed + gpu_info.gtt_used); - else - right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memoryUsed); - ImGui::SameLine(0,1.0f); - ImGui::PushFont(HUDElements.sw_stats->font1); - ImGui::Text("GiB"); - ImGui::PopFont(); -#ifndef MANGOAPP - if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock]){ - ImguiNextColumnOrNewRow(); - right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu_info.MemClock); - ImGui::SameLine(0, 1.0f); - ImGui::PushFont(HUDElements.sw_stats->font1); - ImGui::Text("MHz"); - ImGui::PopFont(); - } -#endif - } -} - void HudElements::ram(){ #ifdef __linux__ if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_ram]){ @@ -504,11 +538,11 @@ void HudElements::fps_only(){ } void HudElements::gpu_name(){ - if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_name] && !HUDElements.sw_stats->gpuName.empty()){ + if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_name] && g_active_gpu && !g_active_gpu->dev_name.empty()){ ImGui::TableNextRow(); ImGui::TableNextColumn(); ImGui::PushFont(HUDElements.sw_stats->font1); ImGui::TextColored(HUDElements.colors.engine, - "%s", HUDElements.sw_stats->gpuName.c_str()); + "%s", g_active_gpu->dev_name.c_str()); ImGui::PopFont(); } } @@ -944,23 +978,34 @@ void HudElements::fan(){ } } +static void gpu_throttling_status(const gpu_info& gpu_info, const std::string& name) +{ + if (gpu_info.is_power_throttled || gpu_info.is_current_throttled || gpu_info.is_temp_throttled || gpu_info.is_other_throttled){ + ImGui::TableNextRow(); ImGui::TableNextColumn(); + ImGui::TextColored(HUDElements.colors.engine, "%s %s", "Throttling", name.c_str()); + ImGui::TableNextColumn(); + ImGui::TableNextColumn(); + if (gpu_info.is_power_throttled) + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Power"); + if (gpu_info.is_current_throttled) + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Current"); + if (gpu_info.is_temp_throttled) + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Temp"); + if (gpu_info.is_other_throttled) + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Other"); + } +} + void HudElements::throttling_status(){ - if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_throttling_status]){ - if (gpu_info.is_power_throttled || gpu_info.is_current_throttled || gpu_info.is_temp_throttled || gpu_info.is_other_throttled){ - ImGui::TableNextRow(); ImGui::TableNextColumn(); - ImGui::TextColored(HUDElements.colors.engine, "%s", "Throttling"); - ImGui::TableNextColumn(); - ImGui::TableNextColumn(); - if (gpu_info.is_power_throttled) - right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Power"); - if (gpu_info.is_current_throttled) - right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Current"); - if (gpu_info.is_temp_throttled) - right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Temp"); - if (gpu_info.is_other_throttled) - right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Other"); - } + if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_throttling_status]) + return; + + if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_show_all_gpus]) { + for (const auto& it : g_gpu_devices) + gpu_throttling_status(it.second->info, it.second->dev_name); } + else if (g_active_gpu) + gpu_throttling_status(g_active_gpu->info, g_active_gpu->dev_name); } void HudElements::graphs(){ @@ -1037,12 +1082,12 @@ void HudElements::graphs(){ ImGui::TextColored(HUDElements.colors.engine, "%s", "GPU Mem Clock"); } - if (value == "vram"){ + if (value == "vram" && g_active_gpu){ for (auto& it : graph_data){ arr.push_back(float(it.gpu_vram_used)); } - HUDElements.max = gpu_info.memoryTotal; + HUDElements.max = g_active_gpu->info.memory_total; HUDElements.min = 0; ImGui::TextColored(HUDElements.colors.engine, "%s", "VRAM"); } diff --git a/src/hud_elements.h b/src/hud_elements.h index 93c6b8ce..43bfb5bd 100644 --- a/src/hud_elements.h +++ b/src/hud_elements.h @@ -24,7 +24,8 @@ class HudElements{ Clock::time_point last_exec; std::vector> options; std::vector> ordered_functions; - int min, max, gpu_core_max, gpu_mem_max, cpu_temp_max, gpu_temp_max; + float min, max; + int gpu_core_max, gpu_mem_max, cpu_temp_max, gpu_temp_max; const std::vector permitted_params = { "gpu_load", "cpu_load", "gpu_core_clock", "gpu_mem_clock", "vram", "ram", "cpu_temp", "gpu_temp" diff --git a/src/nvapi.cpp b/src/nvapi.cpp index 19e909e1..5bf22efe 100644 --- a/src/nvapi.cpp +++ b/src/nvapi.cpp @@ -40,28 +40,34 @@ bool checkNVAPI(){ NvAPI_Initialize = (NvAPI_Initialize_t) (*NvAPI_QueryInterface)(0x0150E828); NvAPI_EnumPhysicalGPUs = (NvAPI_EnumPhysicalGPUs_t) (*NvAPI_QueryInterface)(0xE5AC921F); NvAPI_GPU_GetUsages = (NvAPI_GPU_GetUsages_t) (*NvAPI_QueryInterface)(0x189A1FDF); - if (NvAPI_Initialize == NULL || NvAPI_EnumPhysicalGPUs == NULL || - NvAPI_EnumPhysicalGPUs == NULL || NvAPI_GPU_GetUsages == NULL) + + if (!NvAPI_Initialize || !NvAPI_EnumPhysicalGPUs || !NvAPI_EnumPhysicalGPUs || !NvAPI_GPU_GetUsages) { std::cerr << "Couldn't get functions in nvapi.dll" << std::endl; return 2; } - (*NvAPI_Initialize)(); - - int *gpuHandles[NVAPI_MAX_PHYSICAL_GPUS] = { NULL }; + NvAPI_Initialize(); + + NvAPI_EnumPhysicalGPUs(gpuHandles, &gpuCount); return true; } -void nvapi_util() -{ +bool NVAPIInfo::init() +{ + if (!init_nvapi_bool) + init_nvapi_bool = checkNVAPI(); + return init_nvapi_bool; +} + +void NVAPIInfo::update() +{ if (!init_nvapi_bool){ init_nvapi_bool = checkNVAPI(); } - - gpuUsages[0] = (NVAPI_MAX_USAGES_PER_GPU * 4) | 0x10000; - (*NvAPI_EnumPhysicalGPUs)(gpuHandles, &gpuCount); - (*NvAPI_GPU_GetUsages)(gpuHandles[0], gpuUsages); - gpu_info.load = gpuUsages[3]; -} \ No newline at end of file + gpuUsages[0] = (NVAPI_MAX_USAGES_PER_GPU * 4) | 0x10000; + NvAPI_GPU_GetUsages(gpuHandles[0], gpuUsages); + if (g_active_gpu) + g_active_gpu->info.load = gpuUsages[3]; +} diff --git a/src/nvctrl.cpp b/src/nvctrl.cpp index daedd80d..9d9bfd72 100644 --- a/src/nvctrl.cpp +++ b/src/nvctrl.cpp @@ -17,17 +17,22 @@ static std::unique_ptr> display; struct nvctrlInfo nvctrl_info; bool nvctrlSuccess = false; -static bool find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy) +static bool find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy, int& scr) { char buf[8] {}; for (int i = 0; i < 16; i++) { snprintf(buf, sizeof(buf), ":%d", i); Display *d = g_x11->XOpenDisplay(buf); if (d) { - if (nvctrl.XNVCTRLIsNvScreen(d, 0)) { - dpy = d; - SPDLOG_DEBUG("XNVCtrl is using display {}", buf); - return true; + int nscreens = ScreenCount(d); //FIXME yes, no, maybe? + for (int screen = 0; screen < nscreens; screen++) + { + if (nvctrl.XNVCTRLIsNvScreen(d, screen)) { + dpy = d; + scr = screen; + SPDLOG_DEBUG("XNVCtrl is using display {}", buf); + return true; + } } g_x11->XCloseDisplay(d); } @@ -46,20 +51,15 @@ bool checkXNVCtrl() return false; } - Display *dpy; - nvctrlSuccess = find_nv_x11(nvctrl, dpy); + Display *dpy = nullptr; + int screen = 0; + nvctrlSuccess = find_nv_x11(nvctrl, dpy, screen); if (!nvctrlSuccess) { SPDLOG_ERROR("XNVCtrl didn't find the correct display"); return false; } - auto local_x11 = g_x11; - display = { dpy, - [local_x11](Display *dpy) { - local_x11->XCloseDisplay(dpy); - } - }; // get device id at init int64_t pci_id; nvctrl.XNVCTRLQueryTargetAttribute64(display.get(), @@ -70,6 +70,13 @@ bool checkXNVCtrl() &pci_id); deviceID = (pci_id & 0xFFFF); + auto local_x11 = g_x11; + display = { dpy, + [local_x11](Display *dpy) { + local_x11->XCloseDisplay(dpy); + } + }; + return true; } diff --git a/src/nvidia_info.h b/src/nvidia_info.h index 518d81ac..bf6fd421 100644 --- a/src/nvidia_info.h +++ b/src/nvidia_info.h @@ -5,15 +5,9 @@ #include #include "overlay_params.h" -extern nvmlReturn_t result; -extern unsigned int nvidiaTemp, processSamplesCount, *vgpuInstanceSamplesCount, nvidiaCoreClock, nvidiaMemClock, nvidiaPowerUsage; -extern nvmlDevice_t nvidiaDevice; -extern struct nvmlUtilization_st nvidiaUtilization; -extern struct nvmlMemory_st nvidiaMemory; extern bool nvmlSuccess; -extern unsigned long long nvml_throttle_reasons; -bool checkNVML(const char* pciBusId); -bool getNVMLInfo(const struct overlay_params& params); +bool checkNVML(const char* pciBusId, nvmlDevice_t& device, uint32_t& device_id); +bool getNVMLInfo(nvmlDevice_t device, const struct overlay_params& params); #endif //MANGOHUD_NVIDIA_INFO_H diff --git a/src/nvml.cpp b/src/nvml.cpp index 2b682fcd..ff26d566 100644 --- a/src/nvml.cpp +++ b/src/nvml.cpp @@ -4,6 +4,7 @@ #include #include "overlay.h" #include "overlay_params.h" +#include "gpu.h" nvmlReturn_t result; nvmlDevice_t nvidiaDevice; @@ -14,50 +15,64 @@ unsigned long long nvml_throttle_reasons; struct nvmlUtilization_st nvidiaUtilization; struct nvmlMemory_st nvidiaMemory {}; -bool checkNVML(const char* pciBusId){ - auto& nvml = get_libnvml_loader(); - if (nvml.IsLoaded()){ - result = nvml.nvmlInit(); - if (NVML_SUCCESS != result) { - SPDLOG_ERROR("Nvidia module not loaded"); - } else { - nvmlReturn_t ret = NVML_ERROR_UNKNOWN; - if (pciBusId && ((ret = nvml.nvmlDeviceGetHandleByPciBusId(pciBusId, &nvidiaDevice)) != NVML_SUCCESS)) { - SPDLOG_ERROR("Getting device handle by PCI bus ID failed: {}", nvml.nvmlErrorString(ret)); - SPDLOG_ERROR("Using index 0."); - } +static std::unique_ptr> nvml_shutdown; - if (ret != NVML_SUCCESS) - ret = nvml.nvmlDeviceGetHandleByIndex(0, &nvidiaDevice); - - if (ret != NVML_SUCCESS) - SPDLOG_ERROR("Getting device handle failed: {}", nvml.nvmlErrorString(ret)); +bool checkNVML() +{ + auto& nvml = get_libnvml_loader(); + if (!nvml.IsLoaded()) + { + SPDLOG_ERROR("Failed to load NVML"); + return false; + } - nvmlSuccess = (ret == NVML_SUCCESS); - if (ret == NVML_SUCCESS) - nvml.nvmlDeviceGetPciInfo_v3(nvidiaDevice, &nvidiaPciInfo); + if (nvmlSuccess) + return nvmlSuccess; - return nvmlSuccess; - } - } else { - SPDLOG_ERROR("Failed to load NVML"); + result = nvml.nvmlInit(); + if (NVML_SUCCESS != result) + { + SPDLOG_ERROR("Nvidia module not loaded"); + return false; } - return false; + nvml_shutdown = { &nvml, + [](libnvml_loader *nvml) -> void { + nvml->nvmlShutdown(); + } + }; + nvmlSuccess = true; + return nvmlSuccess; } -bool getNVMLInfo(const struct overlay_params& params){ +bool getNVMLInfo(nvmlDevice_t device, gpu_info& gpu_info, const struct overlay_params& params){ nvmlReturn_t response; + unsigned long long nvml_throttle_reasons = 0; + unsigned int nvidiaTemp, nvidiaCoreClock, nvidiaMemClock, nvidiaPowerUsage; + struct nvmlUtilization_st nvidiaUtilization; + struct nvmlMemory_st nvidiaMemory; + auto& nvml = get_libnvml_loader(); - response = nvml.nvmlDeviceGetUtilizationRates(nvidiaDevice, &nvidiaUtilization); - nvml.nvmlDeviceGetTemperature(nvidiaDevice, NVML_TEMPERATURE_GPU, &nvidiaTemp); - nvml.nvmlDeviceGetMemoryInfo(nvidiaDevice, &nvidiaMemory); - nvml.nvmlDeviceGetClockInfo(nvidiaDevice, NVML_CLOCK_GRAPHICS, &nvidiaCoreClock); - nvml.nvmlDeviceGetClockInfo(nvidiaDevice, NVML_CLOCK_MEM, &nvidiaMemClock); - nvml.nvmlDeviceGetPowerUsage(nvidiaDevice, &nvidiaPowerUsage); - deviceID = nvidiaPciInfo.pciDeviceId >> 16; - if (params.enabled[OVERLAY_PARAM_ENABLED_throttling_status]) - nvml.nvmlDeviceGetCurrentClocksThrottleReasons(nvidiaDevice, &nvml_throttle_reasons); + response = nvml.nvmlDeviceGetUtilizationRates(device, &nvidiaUtilization); + nvml.nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &nvidiaTemp); + nvml.nvmlDeviceGetMemoryInfo(device, &nvidiaMemory); + nvml.nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &nvidiaCoreClock); + nvml.nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &nvidiaMemClock); + nvml.nvmlDeviceGetPowerUsage(device, &nvidiaPowerUsage); + if (params.enabled[OVERLAY_PARAM_ENABLED_throttling_status]){ + nvml.nvmlDeviceGetCurrentClocksThrottleReasons(device, &nvml_throttle_reasons); + gpu_info.is_temp_throttled = (nvml_throttle_reasons & 0x0000000000000060LL) != 0; + gpu_info.is_power_throttled = (nvml_throttle_reasons & 0x000000000000008CLL) != 0; + gpu_info.is_other_throttled = (nvml_throttle_reasons & 0x0000000000000112LL) != 0; + } + + gpu_info.load = nvidiaUtilization.gpu; + gpu_info.temp = nvidiaTemp; + gpu_info.memory_used = nvidiaMemory.used; + gpu_info.core_clock = nvidiaCoreClock; + gpu_info.memory_clock = nvidiaMemClock; + gpu_info.power_usage = nvidiaPowerUsage / 1000; + gpu_info.memory_total = nvidiaMemory.total; if (response == NVML_ERROR_NOT_SUPPORTED) { if (nvmlSuccess) @@ -66,3 +81,49 @@ bool getNVMLInfo(const struct overlay_params& params){ } return nvmlSuccess; } + +bool NVMLInfo::init() +{ + nvmlDevice_t nvml_dev; + if (!checkNVML()) + return false; + + auto& nvml = get_libnvml_loader(); + nvmlReturn_t ret = NVML_ERROR_UNKNOWN; + if ((ret = nvml.nvmlDeviceGetHandleByPciBusId(pci_device.c_str(), &nvml_dev)) != NVML_SUCCESS) + { + SPDLOG_ERROR("Getting device handle by PCI bus ID failed: {}", nvml.nvmlErrorString(ret)); + } + + if (ret != NVML_SUCCESS) + { + unsigned int deviceCount = 0; + ret = nvml.nvmlDeviceGetCount(&deviceCount); + + if (ret == NVML_SUCCESS) + { + for (unsigned i = 0; i < deviceCount; i++) + { + ret = nvml.nvmlDeviceGetHandleByIndex(0, &nvml_dev); + if (ret != NVML_SUCCESS) + SPDLOG_ERROR("Getting device {} handle failed: {}", i, nvml.nvmlErrorString(ret)); + else if (nvml.nvmlDeviceGetPciInfo_v3(nvml_dev, &nvidiaPciInfo) == NVML_SUCCESS) + { + if (this->deviceID == nvidiaPciInfo.pciDeviceId >> 16) + break; + } + } + } + } + + device = reinterpret_cast(nvml_dev); + return true; +} + +void NVMLInfo::update(const struct overlay_params& params) +{ + if (nvmlSuccess){ + getNVMLInfo(reinterpret_cast(device), info, params); + return; + } +} diff --git a/src/overlay.cpp b/src/overlay.cpp index 78e70545..d79a751b 100644 --- a/src/overlay.cpp +++ b/src/overlay.cpp @@ -48,7 +48,6 @@ std::deque graph_data; const char* engines[] = {"Unknown", "OpenGL", "VULKAN", "DXVK", "VKD3D", "DAMAVAND", "ZINK", "WINED3D", "Feral3D", "ToGL", "GAMESCOPE"}; overlay_params *_params {}; double min_frametime, max_frametime; -bool gpu_metrics_exists = false; bool steam_focused = false; vector frametime_data(200,0.f); int fan_speed; @@ -120,14 +119,8 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID) #endif } if (params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] || logger->is_active()) { - if (vendorID == 0x1002) - getAmdGpuInfo(); - - if (gpu_metrics_exists) - amdgpu_get_metrics(); - - if (vendorID == 0x10de) - getNvidiaGpuInfo(params); + for (auto& gpu : g_gpu_devices) + gpu.second->update(params); } #ifdef __linux__ @@ -147,12 +140,15 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID) getIoStats(g_io_stats); #endif - currentLogData.gpu_load = gpu_info.load; - currentLogData.gpu_temp = gpu_info.temp; - currentLogData.gpu_core_clock = gpu_info.CoreClock; - currentLogData.gpu_mem_clock = gpu_info.MemClock; - currentLogData.gpu_vram_used = gpu_info.memoryUsed; - currentLogData.gpu_power = gpu_info.powerUsage; + if (g_active_gpu) + { + currentLogData.gpu_load = g_active_gpu->info.load; + currentLogData.gpu_temp = g_active_gpu->info.temp; + currentLogData.gpu_core_clock = g_active_gpu->info.core_clock; + currentLogData.gpu_mem_clock = g_active_gpu->info.memory_clock; + currentLogData.gpu_vram_used = g_active_gpu->info.memory_used; + currentLogData.gpu_power = g_active_gpu->info.power_usage; + } #ifdef __linux__ currentLogData.ram_used = memused; #endif @@ -624,7 +620,98 @@ struct pci_bus { int func; }; -void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_params& params) +static void enumerate_gpus(overlay_params& params) +{ +#ifdef WIN32 + auto gpu = std::make_shared(); + if (gpu->init()) + g_gpu_devices["nvapi_0"] = gpu; + return; +#endif + +#ifdef __gnu_linux__ + string path; + string drm = "/sys/class/drm/"; + + auto dirs = ls(drm, "card"); + for (auto& dir : dirs) { + path = drm + dir; + + // skip display outputs + if (!file_exists(path + "/device/vendor")) + continue; + + string vendor = read_line(path + "/device/vendor"); + uint32_t vendor_id = strtoul(vendor.c_str(), NULL, 16); + + string device = read_line(path + "/device/device"); + uint32_t device_id = strtoul(device.c_str(), NULL, 16); // OGL might fail so read from sysfs + + const std::string device_path = path + "/device"; + string pci_device = read_symlink(device_path.c_str()); + auto pos = pci_device.find_last_of('/'); + pci_device = pci_device.substr(pos != std::string::npos ? pos + 1 : 0); + + string module = get_basename(read_symlink(path + "/device/driver/module")); + SPDLOG_DEBUG("using device path: {}, module: {}, pci device: {}", path, module, pci_device); + + auto dev_name = get_device_name(vendor_id, device_id); + if (module == "amdgpu") + { + const std::string gpu_metrics_path = device_path + "/gpu_metrics"; + if (amdgpu_check_metrics(gpu_metrics_path)) { + SPDLOG_DEBUG("Using gpu_metrics of {}", gpu_metrics_path); + auto gpu = std::make_shared(gpu_metrics_path, path, pci_device); + if (gpu->init()) + { + gpu->vendorID = vendor_id; + gpu->deviceID = device_id; + gpu->dev_name = dev_name; + g_gpu_devices[pci_device] = gpu; + } + } + else { + + if (!file_exists(path + "/device/gpu_busy_percent")) + continue; + + auto gpu = std::make_shared(path, pci_device); + if (gpu->init()) + { + gpu->vendorID = vendor_id; + gpu->deviceID = device_id; + gpu->dev_name = dev_name; + g_gpu_devices[pci_device] = gpu; + } + } + } + else if (module == "nvidia") + { + auto gpu = std::make_shared(path, pci_device); + if (gpu->init()) + { + gpu->vendorID = vendor_id; + gpu->deviceID = device_id; + gpu->dev_name = dev_name; + g_gpu_devices[pci_device] = gpu; + } + else + { + auto gpu = std::make_shared(path, pci_device); + if (gpu->init()) + { + gpu->vendorID = vendor_id; + gpu->deviceID = device_id; + gpu->dev_name = dev_name; + g_gpu_devices[pci_device] = gpu; + } + } + } + } +#endif +} + +void init_gpu_stats(uint32_t& vendorID, uint32_t target_device_id, overlay_params& params) { //if (!params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats]) // return; @@ -658,111 +745,62 @@ void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_para } } - // NVIDIA or Intel but maybe has Optimus - if (vendorID == 0x8086 - || vendorID == 0x10de) { + if (!g_gpu_devices.size()) + enumerate_gpus(params); - if(checkNvidia(pci_dev)) - vendorID = 0x10de; - else - params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false; + if (pci_bus_parsed && pci_dev && g_gpu_devices.find(params.pci_dev) != g_gpu_devices.end()) + { + g_active_gpu = g_gpu_devices[params.pci_dev]; } - -#ifdef __linux__ - if (vendorID == 0x8086 || vendorID == 0x1002 - || gpu.find("Radeon") != std::string::npos - || gpu.find("AMD") != std::string::npos) { - string path; - string drm = "/sys/class/drm/"; - - auto dirs = ls(drm.c_str(), "card"); - for (auto& dir : dirs) { - path = drm + dir; - - SPDLOG_DEBUG("amdgpu path check: {}", path); - if (pci_bus_parsed && pci_dev) { - string pci_device = read_symlink((path + "/device").c_str()); - SPDLOG_DEBUG("PCI device symlink: '{}'", pci_device); - if (!ends_with(pci_device, pci_dev)) { - SPDLOG_DEBUG("skipping GPU, no PCI ID match"); - continue; - } - } - - FILE *fp; - string device = path + "/device/device"; - if ((fp = fopen(device.c_str(), "r"))){ - uint32_t temp = 0; - if (fscanf(fp, "%x", &temp) == 1) { - if (reported_deviceID && temp != reported_deviceID){ - fclose(fp); - SPDLOG_DEBUG("DeviceID does not match vulkan report {}", reported_deviceID); - continue; - } - deviceID = temp; - } - fclose(fp); - } - - string vendor = path + "/device/vendor"; - if ((fp = fopen(vendor.c_str(), "r"))){ - uint32_t temp = 0; - if (fscanf(fp, "%x", &temp) != 1 || temp != 0x1002) { - fclose(fp); - continue; - } - fclose(fp); - } - - const std::string device_path = path + "/device"; - const std::string gpu_metrics_path = device_path + "/gpu_metrics"; - if (amdgpu_check_metrics(gpu_metrics_path)) { - gpu_metrics_exists = true; - metrics_path = gpu_metrics_path; - SPDLOG_DEBUG("Using gpu_metrics of {}", gpu_metrics_path); - } - - if (!amdgpu.vram_total) - amdgpu.vram_total = fopen((device_path + "/mem_info_vram_total").c_str(), "r"); - if (!amdgpu.vram_used) - amdgpu.vram_used = fopen((device_path + "/mem_info_vram_used").c_str(), "r"); - if (!amdgpu.gtt_used) - amdgpu.gtt_used = fopen((device_path + "/mem_info_gtt_used").c_str(), "r"); - - const std::string hwmon_path = device_path + "/hwmon/"; - const auto dirs = ls(hwmon_path.c_str(), "hwmon", LS_DIRS); - for (const auto& dir : dirs) - if (!amdgpu.temp) - amdgpu.temp = fopen((hwmon_path + dir + "/temp1_input").c_str(), "r"); - - if (!metrics_path.empty()) + else if (vendorID == 0x8086) // Maybe an "Optimus" setup, try to get a secondary gpu + { + for (auto& it : g_gpu_devices) + { + const auto& gpu = it.second; + if (gpu->vendorID != 0x8086) + { + g_active_gpu = gpu; break; - - // The card output nodes - cardX-output, will point to the card node - // As such the actual metrics nodes will be missing. - amdgpu.busy = fopen((device_path + "/gpu_busy_percent").c_str(), "r"); - if (!amdgpu.busy) - continue; - - SPDLOG_DEBUG("using amdgpu path: {}", device_path); - - for (const auto& dir : dirs) { - if (!amdgpu.core_clock) - amdgpu.core_clock = fopen((hwmon_path + dir + "/freq1_input").c_str(), "r"); - if (!amdgpu.memory_clock) - amdgpu.memory_clock = fopen((hwmon_path + dir + "/freq2_input").c_str(), "r"); - if (!amdgpu.power_usage) - amdgpu.power_usage = fopen((hwmon_path + dir + "/power1_average").c_str(), "r"); } - break; } - - // don't bother then - if (metrics_path.empty() && !amdgpu.busy) { - params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false; + } + else + { + for (auto& info : g_gpu_devices) + { + auto& gpu = info.second; + if (gpu->vendorID == vendorID && (gpu->deviceID == target_device_id || target_device_id == 0)) + { + g_active_gpu = gpu; + if (!target_device_id) + SPDLOG_WARN("No device id given, using first device found from vendor 0x{:04X}", vendorID); + break; + } } } + +#ifdef WIN32 + //TODO windows' gpu stats + if (g_gpu_devices.size()) + g_active_gpu = g_gpu_devices.begin()->second; + else + g_active_gpu = std::make_shared(); #endif + + // for compatibility + if (g_active_gpu) + { + vendorID = g_active_gpu->vendorID; + deviceID = g_active_gpu->deviceID; + } + + if (g_active_gpu) + SPDLOG_INFO("Selected GPU: {}, 0x{:04X}:0x{:04X} [{}]", g_active_gpu->sysfs_path, vendorID, deviceID, g_active_gpu->dev_name); + else { + g_active_gpu = std::make_shared(); + SPDLOG_WARN("Selected dummy GPU"); + } + if (!params.permit_upload) SPDLOG_INFO("Uploading is disabled (permit_upload = 0)"); } @@ -865,7 +903,7 @@ void init_system_info(){ std::string get_device_name(uint32_t vendorID, uint32_t deviceID) { - string desc; + string desc {}; #ifdef __linux__ if (pci_ids.find(vendorID) == pci_ids.end()) parse_pciids(); @@ -886,8 +924,8 @@ std::string get_device_name(uint32_t vendorID, uint32_t deviceID) void update_fan(){ // This just handles steam deck fan for now string hwmon_path; - string path = "/sys/class/hwmon/"; - auto dirs = ls(path.c_str(), "hwmon", LS_DIRS); + const string path = "/sys/class/hwmon/"; + auto dirs = ls(path, "hwmon", LS_DIRS); for (auto& dir : dirs) { string full_path = (path + dir + "/name").c_str(); if (read_line(full_path).find("jupiter") != string::npos){ @@ -901,3 +939,8 @@ void update_fan(){ else fan_speed = -1; } + +void get_device_name(int32_t vendorID, int32_t deviceID, struct swapchain_stats& sw_stats) +{ + gpu = sw_stats.gpuName = get_device_name(vendorID, deviceID); +} diff --git a/src/overlay_params.h b/src/overlay_params.h index 7977edfe..268bc163 100644 --- a/src/overlay_params.h +++ b/src/overlay_params.h @@ -33,6 +33,7 @@ typedef unsigned long KeySym; OVERLAY_PARAM_BOOL(gpu_temp) \ OVERLAY_PARAM_BOOL(cpu_stats) \ OVERLAY_PARAM_BOOL(gpu_stats) \ + OVERLAY_PARAM_BOOL(show_all_gpus) \ OVERLAY_PARAM_BOOL(ram) \ OVERLAY_PARAM_BOOL(swap) \ OVERLAY_PARAM_BOOL(vram) \