From b76e1de6da94a3d20175731da6f320ddf16e63e0 Mon Sep 17 00:00:00 2001
From: jackun <jack.un@gmail.com>
Date: Sat, 18 Sep 2021 02:43:36 +0300
Subject: [PATCH] Support displaying multiple gpus' stats

---
 src/amdgpu.cpp       | 108 +++++++++--------
 src/amdgpu.h         |   2 -
 src/cpu.cpp          |  17 +--
 src/file_utils.cpp   |   4 +-
 src/file_utils.h     |   2 +-
 src/gpu.cpp          | 108 +++++++++--------
 src/gpu.h            | 135 +++++++++++++++++++--
 src/hud_elements.cpp | 157 +++++++++++++++---------
 src/hud_elements.h   |   3 +-
 src/nvapi.cpp        |  32 +++--
 src/nvctrl.cpp       |  33 ++++--
 src/nvidia_info.h    |  10 +-
 src/nvml.cpp         | 131 +++++++++++++++------
 src/overlay.cpp      | 275 +++++++++++++++++++++++++------------------
 src/overlay_params.h |   1 +
 15 files changed, 649 insertions(+), 369 deletions(-)

diff --git a/src/amdgpu.cpp b/src/amdgpu.cpp
index 4f060224..f223c7c2 100644
--- a/src/amdgpu.cpp
+++ b/src/amdgpu.cpp
@@ -9,7 +9,7 @@
 #define METRICS_POLLING_PERIOD_MS 5
 #define METRICS_SAMPLE_COUNT (METRICS_UPDATE_PERIOD_MS/METRICS_POLLING_PERIOD_MS)
 
-std::string metrics_path = "";
+//std::string metrics_path = "";
 
 /* This structure is used to communicate the latest values of the amdgpu metrics.
  * The direction of communication is amdgpu_polling_thread -> amdgpu_get_metrics().
@@ -74,66 +74,62 @@ bool amdgpu_check_metrics(const std::string& path)
 }
 
 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
-void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) {
-	FILE *f;
+void AMDGPUInfo::get_instant_metrics(struct amdgpu_common_metrics& metrics) {
 	void *buf[MAX(sizeof(struct gpu_metrics_v1_3), sizeof(struct gpu_metrics_v2_2))];
 	struct metrics_table_header* header = (metrics_table_header*)buf;
 
-	f = fopen(metrics_path.c_str(), "rb");
-	if (!f)
-		return;
-
+	rewind(file);
+	fflush(file);
 	// Read the whole file
-	if (!fread(buf, sizeof(buf), 1, f) == 0) {
+	if (fread(buf, sizeof(buf), 1, file) == 0) {
 		SPDLOG_DEBUG("Failed to read amdgpu metrics file '{}'", metrics_path.c_str());
-		fclose(f);
+		fclose(file);
 		return;
 	}
-	fclose(f);
 
 	int64_t indep_throttle_status = 0;
 	if (header->format_revision == 1) {
 		// Desktop GPUs
 		cpuStats.cpu_type = "GPU";
 		struct gpu_metrics_v1_3 *amdgpu_metrics = (struct gpu_metrics_v1_3 *) buf;
-		metrics->gpu_load_percent = amdgpu_metrics->average_gfx_activity;
+		metrics.gpu_load_percent = amdgpu_metrics->average_gfx_activity;
 
-		metrics->average_gfx_power_w = amdgpu_metrics->average_socket_power;
+		metrics.average_gfx_power_w = amdgpu_metrics->average_socket_power;
 
-		metrics->current_gfxclk_mhz = amdgpu_metrics->average_gfxclk_frequency;
-		metrics->current_uclk_mhz = amdgpu_metrics->current_uclk;
+		metrics.current_gfxclk_mhz = amdgpu_metrics->average_gfxclk_frequency;
+		metrics.current_uclk_mhz = amdgpu_metrics->current_uclk;
 
-		metrics->gpu_temp_c = amdgpu_metrics->temperature_edge;
+		metrics.gpu_temp_c = amdgpu_metrics->temperature_edge;
 		indep_throttle_status = amdgpu_metrics->indep_throttle_status;
 	} else if (header->format_revision == 2) {
 		// APUs
 		cpuStats.cpu_type = "APU";
 		struct gpu_metrics_v2_2 *amdgpu_metrics = (struct gpu_metrics_v2_2 *) buf;
 
-		metrics->gpu_load_percent = amdgpu_metrics->average_gfx_activity;
+		metrics.gpu_load_percent = amdgpu_metrics->average_gfx_activity;
 
-		metrics->average_gfx_power_w = amdgpu_metrics->average_gfx_power / 1000.f;
-		metrics->average_cpu_power_w = amdgpu_metrics->average_cpu_power / 1000.f;
+		metrics.average_gfx_power_w = amdgpu_metrics->average_gfx_power / 1000.f;
+		metrics.average_cpu_power_w = amdgpu_metrics->average_cpu_power / 1000.f;
 
-		metrics->current_gfxclk_mhz = amdgpu_metrics->current_gfxclk;
-		metrics->current_uclk_mhz = amdgpu_metrics->current_uclk;
+		metrics.current_gfxclk_mhz = amdgpu_metrics->current_gfxclk;
+		metrics.current_uclk_mhz = amdgpu_metrics->current_uclk;
 
-		metrics->soc_temp_c = amdgpu_metrics->temperature_soc / 100;
-		metrics->gpu_temp_c = amdgpu_metrics->temperature_gfx / 100;
+		metrics.soc_temp_c = amdgpu_metrics->temperature_soc / 100;
+		metrics.gpu_temp_c = amdgpu_metrics->temperature_gfx / 100;
 		int cpu_temp = 0;
 		for (unsigned i = 0; i < cpuStats.GetCPUData().size() / 2; i++)
 			cpu_temp = MAX(cpu_temp, amdgpu_metrics->temperature_core[i]);
-		metrics->apu_cpu_temp_c = cpu_temp / 100;
+		metrics.apu_cpu_temp_c = cpu_temp / 100;
 		indep_throttle_status = amdgpu_metrics->indep_throttle_status;
 	}
 
 	/* Throttling: See
 	https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
 	for the offsets */
-	metrics->is_power_throttled = ((indep_throttle_status >> 0) & 0xFF) != 0;
-	metrics->is_current_throttled = ((indep_throttle_status >> 16) & 0xFF) != 0;
-	metrics->is_temp_throttled = ((indep_throttle_status >> 32) & 0xFFFF) != 0;
-	metrics->is_other_throttled = ((indep_throttle_status >> 56) & 0xFF) != 0;
+	metrics.is_power_throttled = ((indep_throttle_status >> 0) & 0xFF) != 0;
+	metrics.is_current_throttled = ((indep_throttle_status >> 16) & 0xFF) != 0;
+	metrics.is_temp_throttled = ((indep_throttle_status >> 32) & 0xFFFF) != 0;
+	metrics.is_other_throttled = ((indep_throttle_status >> 56) & 0xFF) != 0;
 }
 
 #define UPDATE_METRIC_AVERAGE(FIELD) do { int value_sum = 0; for (size_t s=0; s < METRICS_SAMPLE_COUNT; s++) { value_sum += metrics_buffer[s].FIELD; } amdgpu_common_metrics.FIELD = value_sum / METRICS_SAMPLE_COUNT; } while(0)
@@ -141,24 +137,25 @@ void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) {
 #define UPDATE_METRIC_MAX(FIELD) do { int cur_max = metrics_buffer[0].FIELD; for (size_t s=1; s < METRICS_SAMPLE_COUNT; s++) { cur_max = MAX(cur_max, metrics_buffer[s].FIELD); }; amdgpu_common_metrics.FIELD = cur_max; } while(0)
 #define UPDATE_METRIC_LAST(FIELD) do { amdgpu_common_metrics.FIELD = metrics_buffer[METRICS_SAMPLE_COUNT - 1].FIELD; } while(0)
 
-void amdgpu_metrics_polling_thread() {
+void AMDGPUInfo::metrics_polling_thread() {
+	SPDLOG_DEBUG("{}", __func__);
 	struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT];
 	bool gpu_load_needs_dividing = false;  //some GPUs report load as centipercent
 
 	// Initial poll of the metrics, so that we have values to display as fast as possible
-	amdgpu_get_instant_metrics(&amdgpu_common_metrics);
+	get_instant_metrics(amdgpu_common_metrics);
 	if (amdgpu_common_metrics.gpu_load_percent > 100){
 		gpu_load_needs_dividing = true;
 		amdgpu_common_metrics.gpu_load_percent /= 100;
 	}
-	
+
 	// Set all the fields to 0 by default. Only done once as we're just replacing previous values after
 	memset(metrics_buffer, 0, sizeof(metrics_buffer));
 
-	while (1) {
+	while (!quit) {
 		// Get all the samples
 		for (size_t cur_sample_id=0; cur_sample_id < METRICS_SAMPLE_COUNT; cur_sample_id++) {
-			amdgpu_get_instant_metrics(&metrics_buffer[cur_sample_id]);
+			get_instant_metrics(metrics_buffer[cur_sample_id]);
 
 			// Detect and fix if the gpu load is reported in centipercent
 			if (gpu_load_needs_dividing || metrics_buffer[cur_sample_id].gpu_load_percent > 100){
@@ -168,7 +165,7 @@ void amdgpu_metrics_polling_thread() {
 
 			usleep(METRICS_POLLING_PERIOD_MS * 1000);
 		}
-		
+
 		// Copy the results from the different metrics to amdgpu_common_metrics
 		amdgpu_common_metrics_m.lock();
 		UPDATE_METRIC_AVERAGE(gpu_load_percent);
@@ -189,29 +186,38 @@ void amdgpu_metrics_polling_thread() {
 	}
 }
 
-void amdgpu_get_metrics(){
-	static bool init = false;
-	if (!init){
-		std::thread(amdgpu_metrics_polling_thread).detach();
-		init = true;
-	}
+bool AMDGPUInfo::init(){
+	if (file)
+		return true;
+
+	if (!(file = fopen(metrics_path.c_str(), "rb")))
+		return false;
+
+	AMDGPUHWMonInfo::init();
+
+	// TODO start polling only when actually used
+	thread = std::thread(&AMDGPUInfo::metrics_polling_thread, this);
+	return true;
+}
 
+void AMDGPUInfo::update(const overlay_params& params){
 	amdgpu_common_metrics_m.lock();
-	gpu_info.load = amdgpu_common_metrics.gpu_load_percent;
+	info.load = amdgpu_common_metrics.gpu_load_percent;
 
-	gpu_info.powerUsage = amdgpu_common_metrics.average_gfx_power_w;
-	gpu_info.CoreClock = amdgpu_common_metrics.current_gfxclk_mhz;
-	gpu_info.MemClock = amdgpu_common_metrics.current_uclk_mhz;
+	info.power_usage = amdgpu_common_metrics.average_gfx_power_w;
+	info.core_clock = amdgpu_common_metrics.current_gfxclk_mhz;
+	info.memory_clock = amdgpu_common_metrics.current_uclk_mhz;
 
 	// Use hwmon instead, see gpu.cpp
-	// gpu_info.temp = amdgpu_common_metrics.gpu_temp_c;
-	gpu_info.apu_cpu_power = amdgpu_common_metrics.average_cpu_power_w;
-	gpu_info.apu_cpu_temp = amdgpu_common_metrics.apu_cpu_temp_c;
-
-	gpu_info.is_power_throttled = amdgpu_common_metrics.is_power_throttled;
-	gpu_info.is_current_throttled = amdgpu_common_metrics.is_current_throttled;
-	gpu_info.is_temp_throttled = amdgpu_common_metrics.is_temp_throttled;
-	gpu_info.is_other_throttled = amdgpu_common_metrics.is_other_throttled;
+	// info.temp = amdgpu_common_metrics.gpu_temp_c;
+	info.apu_cpu_power = amdgpu_common_metrics.average_cpu_power_w;
+	info.apu_cpu_temp = amdgpu_common_metrics.apu_cpu_temp_c;
+
+	info.is_power_throttled = amdgpu_common_metrics.is_power_throttled;
+	info.is_current_throttled = amdgpu_common_metrics.is_current_throttled;
+	info.is_temp_throttled = amdgpu_common_metrics.is_temp_throttled;
+	info.is_other_throttled = amdgpu_common_metrics.is_other_throttled;
+	getAmdGpuInfo(files, info, true);
 
 	amdgpu_common_metrics_m.unlock();
 }
diff --git a/src/amdgpu.h b/src/amdgpu.h
index e8867de6..d3573ef8 100644
--- a/src/amdgpu.h
+++ b/src/amdgpu.h
@@ -140,5 +140,3 @@ struct gpu_metrics_v2_2 {
 };
 
 bool amdgpu_check_metrics(const std::string& path);
-extern void amdgpu_get_metrics();
-extern std::string metrics_path;
diff --git a/src/cpu.cpp b/src/cpu.cpp
index 035fd1ad..70583025 100644
--- a/src/cpu.cpp
+++ b/src/cpu.cpp
@@ -248,7 +248,7 @@ bool CPUStats::UpdateCoreMhz() {
 
 bool CPUStats::UpdateCpuTemp() {
     if (cpu_type == "APU"){
-        m_cpuDataTotal.temp = gpu_info.apu_cpu_temp;
+        m_cpuDataTotal.temp = g_active_gpu ? g_active_gpu->info.apu_cpu_temp : 0;
         return true;
     } else {
         if (!m_cpuTempFile)
@@ -349,7 +349,8 @@ static bool get_cpu_power_rapl(CPUPowerData* cpuPowerData, float& power) {
 }
 
 static bool get_cpu_power_amdgpu(float& power) {
-    power = gpu_info.apu_cpu_power;
+    if (g_active_gpu)
+        power = g_active_gpu->info.apu_cpu_power; //FIXME more reliable way to get APU stats
     return true;
 }
 
@@ -383,7 +384,7 @@ bool CPUStats::UpdateCpuPower() {
 
 static bool find_temp_input(const std::string path, std::string& input, const std::string& name)
 {
-    auto files = ls(path.c_str(), "temp", LS_FILES);
+    auto files = ls(path, "temp", LS_FILES);
     for (auto& file : files) {
         if (!ends_with(file, "_label"))
             continue;
@@ -404,7 +405,7 @@ static bool find_temp_input(const std::string path, std::string& input, const st
 
 static bool find_fallback_temp_input(const std::string path, std::string& input)
 {
-    auto files = ls(path.c_str(), "temp", LS_FILES);
+    auto files = ls(path, "temp", LS_FILES);
     if (!files.size())
         return false;
 
@@ -426,7 +427,7 @@ bool CPUStats::GetCpuFile() {
     std::string name, path, input;
     std::string hwmon = "/sys/class/hwmon/";
 
-    auto dirs = ls(hwmon.c_str());
+    auto dirs = ls(hwmon);
     for (auto& dir : dirs) {
         path = hwmon + dir;
         name = read_line(path + "/name");
@@ -458,7 +459,7 @@ bool CPUStats::GetCpuFile() {
 
 static bool find_input(const std::string& path, const char* input_prefix, std::string& input, const std::string& name)
 {
-    auto files = ls(path.c_str(), input_prefix, LS_FILES);
+    auto files = ls(path, input_prefix, LS_FILES);
     for (auto& file : files) {
         if (!ends_with(file, "_label"))
             continue;
@@ -539,7 +540,7 @@ bool CPUStats::InitCpuPowerData() {
 
     CPUPowerData* cpuPowerData = nullptr;
 
-    auto dirs = ls(hwmon.c_str());
+    auto dirs = ls(hwmon);
     for (auto& dir : dirs) {
         path = hwmon + dir;
         name = read_line(path + "/name");
@@ -558,7 +559,7 @@ bool CPUStats::InitCpuPowerData() {
 
     if (!cpuPowerData && intel) {
         std::string powercap = "/sys/class/powercap/";
-        auto powercap_dirs = ls(powercap.c_str());
+        auto powercap_dirs = ls(powercap);
         for (auto& dir : powercap_dirs) {
             path = powercap + dir;
             name = read_line(path + "/name");
diff --git a/src/file_utils.cpp b/src/file_utils.cpp
index 9e3606d8..1546ce50 100644
--- a/src/file_utils.cpp
+++ b/src/file_utils.cpp
@@ -33,12 +33,12 @@ std::string get_basename(const std::string&& path)
 }
 
 #ifdef __linux__
-std::vector<std::string> ls(const char* root, const char* prefix, LS_FLAGS flags)
+std::vector<std::string> ls(const std::string& root, const char* prefix, LS_FLAGS flags)
 {
     std::vector<std::string> list;
     struct dirent* dp;
 
-    DIR* dirp = opendir(root);
+    DIR* dirp = opendir(root.c_str());
     if (!dirp) {
         SPDLOG_ERROR("Error opening directory '{}': {}", root, strerror(errno));
         return list;
diff --git a/src/file_utils.h b/src/file_utils.h
index 03b3d8eb..542dd16e 100644
--- a/src/file_utils.h
+++ b/src/file_utils.h
@@ -12,7 +12,7 @@ enum LS_FLAGS
 };
 
 std::string read_line(const std::string& filename);
-std::vector<std::string> ls(const char* root, const char* prefix = nullptr, LS_FLAGS flags = LS_DIRS);
+std::vector<std::string> ls(const std::string& root, const char* prefix = nullptr, LS_FLAGS flags = LS_DIRS);
 bool file_exists(const std::string& path);
 bool dir_exists(const std::string& path);
 std::string read_symlink(const char * link);
diff --git a/src/gpu.cpp b/src/gpu.cpp
index 549f6fdf..e50dce53 100644
--- a/src/gpu.cpp
+++ b/src/gpu.cpp
@@ -7,6 +7,7 @@
 #include <spdlog/spdlog.h>
 #include "nvctrl.h"
 #include "timing.hpp"
+#include "file_utils.h"
 #ifdef HAVE_NVML
 #include "nvidia_info.h"
 #endif
@@ -15,65 +16,39 @@
 
 using namespace std::chrono_literals;
 
-struct gpuInfo gpu_info {};
-amdgpu_files amdgpu {};
+std::shared_ptr<gpu_device> g_active_gpu;
+std::unordered_map<std::string /*device*/, std::shared_ptr<gpu_device>> g_gpu_devices;
 
-bool checkNvidia(const char *pci_dev){
-    bool nvSuccess = false;
-#ifdef HAVE_NVML
-    nvSuccess = checkNVML(pci_dev) && getNVMLInfo({});
-#endif
+bool NVCtrlInfo::init()
+{
 #ifdef HAVE_XNVCTRL
-    if (!nvSuccess)
-        nvSuccess = checkXNVCtrl();
+    // FIXME correct device index
+    return checkXNVCtrl();
+#else
+    return false;
 #endif
-#ifdef _WIN32
-    if (!nvSuccess)
-        nvSuccess = checkNVAPI();
-#endif
-    return nvSuccess;
 }
 
-void getNvidiaGpuInfo(const struct overlay_params& params){
-#ifdef HAVE_NVML
-    if (nvmlSuccess){
-        getNVMLInfo(params);
-        gpu_info.load = nvidiaUtilization.gpu;
-        gpu_info.temp = nvidiaTemp;
-        gpu_info.memoryUsed = nvidiaMemory.used / (1024.f * 1024.f * 1024.f);
-        gpu_info.CoreClock = nvidiaCoreClock;
-        gpu_info.MemClock = nvidiaMemClock;
-        gpu_info.powerUsage = nvidiaPowerUsage / 1000;
-        gpu_info.memoryTotal = nvidiaMemory.total / (1024.f * 1024.f * 1024.f);
-        if (params.enabled[OVERLAY_PARAM_ENABLED_throttling_status]){
-            gpu_info.is_temp_throttled = (nvml_throttle_reasons & 0x0000000000000060LL) != 0;
-            gpu_info.is_power_throttled = (nvml_throttle_reasons & 0x000000000000008CLL) != 0;
-            gpu_info.is_other_throttled = (nvml_throttle_reasons & 0x0000000000000112LL) != 0;
-        }
-        return;
-    }
-#endif
+void NVCtrlInfo::update(const struct overlay_params& params)
+{
 #ifdef HAVE_XNVCTRL
     if (nvctrlSuccess) {
         getNvctrlInfo();
-        gpu_info.load = nvctrl_info.load;
-        gpu_info.temp = nvctrl_info.temp;
-        gpu_info.memoryUsed = nvctrl_info.memoryUsed / (1024.f);
-        gpu_info.CoreClock = nvctrl_info.CoreClock;
-        gpu_info.MemClock = nvctrl_info.MemClock;
-        gpu_info.powerUsage = 0;
-        gpu_info.memoryTotal = nvctrl_info.memoryTotal;
+        info.load = nvctrl_info.load;
+        info.temp = nvctrl_info.temp;
+        info.memory_used = nvctrl_info.memoryUsed;
+        info.core_clock = nvctrl_info.CoreClock;
+        info.memory_clock = nvctrl_info.MemClock;
+        info.power_usage = 0;
+        info.memory_total = nvctrl_info.memoryTotal;
         return;
     }
 #endif
-#ifdef _WIN32
-nvapi_util();
-#endif
 }
 
-void getAmdGpuInfo(){
+void getAmdGpuInfo(amdgpu_files& amdgpu, gpu_info& gpu_info, bool has_metrics){
     int64_t value = 0;
-    if (metrics_path.empty()){
+    if (!has_metrics){
         if (amdgpu.busy) {
             rewind(amdgpu.busy);
             fflush(amdgpu.busy);
@@ -89,7 +64,7 @@ void getAmdGpuInfo(){
             if (fscanf(amdgpu.core_clock, "%" PRId64, &value) != 1)
                 value = 0;
 
-            gpu_info.CoreClock = value / 1000000;
+            gpu_info.core_clock = value / 1000000;
         }
 
         if (amdgpu.memory_clock) {
@@ -98,7 +73,7 @@ void getAmdGpuInfo(){
             if (fscanf(amdgpu.memory_clock, "%" PRId64, &value) != 1)
                 value = 0;
 
-            gpu_info.MemClock = value / 1000000;
+            gpu_info.memory_clock = value / 1000000;
         }
 
         if (amdgpu.power_usage) {
@@ -107,7 +82,7 @@ void getAmdGpuInfo(){
             if (fscanf(amdgpu.power_usage, "%" PRId64, &value) != 1)
                 value = 0;
 
-            gpu_info.powerUsage = value / 1000000;
+            gpu_info.power_usage = value / 1000000;
         }
     }
 
@@ -116,7 +91,7 @@ void getAmdGpuInfo(){
         fflush(amdgpu.vram_total);
         if (fscanf(amdgpu.vram_total, "%" PRId64, &value) != 1)
             value = 0;
-        gpu_info.memoryTotal = float(value) / (1024 * 1024 * 1024);
+        gpu_info.memory_total = value;
     }
 
     if (amdgpu.vram_used) {
@@ -124,10 +99,10 @@ void getAmdGpuInfo(){
         fflush(amdgpu.vram_used);
         if (fscanf(amdgpu.vram_used, "%" PRId64, &value) != 1)
             value = 0;
-        gpu_info.memoryUsed = float(value) / (1024 * 1024 * 1024);
+        gpu_info.memory_used = value;
     }
     // On some GPUs SMU can sometimes return the wrong temperature.
-    // As HWMON is way more visible than the SMU metrics, let's always trust it as it is the most likely to work 
+    // As HWMON is way more visible than the SMU metrics, let's always trust it as it is the most likely to work
     if (amdgpu.temp){
         rewind(amdgpu.temp);
         fflush(amdgpu.temp);
@@ -142,6 +117,35 @@ void getAmdGpuInfo(){
         fflush(amdgpu.gtt_used);
         if (fscanf(amdgpu.gtt_used, "%" PRId64, &value) != 1)
             value = 0;
-        gpu_info.gtt_used = float(value) / (1024 * 1024 * 1024);
+        gpu_info.gtt_used = value;
+    }
+}
+
+bool AMDGPUHWMonInfo::init()
+{
+    const auto device_path  = sysfs_path + "/device";
+    const auto hwmon_path = device_path + "/hwmon/";
+    files.busy = fopen((device_path + "/gpu_busy_percent").c_str(), "r");
+    files.vram_total = fopen((device_path + "/mem_info_vram_total").c_str(), "r");
+    files.vram_used = fopen((device_path + "/mem_info_vram_used").c_str(), "r");
+    files.gtt_used = fopen((device_path + "/mem_info_gtt_used").c_str(), "r");
+
+    const auto dirs = ls(hwmon_path, "hwmon", LS_DIRS);
+    for (const auto& dir : dirs) {
+        if (!files.core_clock)
+            files.core_clock = fopen((hwmon_path + dir + "/freq1_input").c_str(), "r");
+        if (!files.memory_clock)
+            files.memory_clock = fopen((hwmon_path + dir + "/freq2_input").c_str(), "r");
+        if (!files.temp)
+            files.temp = fopen((hwmon_path + dir + "/temp1_input").c_str(), "r");
+        if (!files.power_usage)
+            files.power_usage = fopen((hwmon_path + dir + "/power1_average").c_str(), "r");
     }
+
+    return files.busy && files.temp && files.vram_total && files.vram_used;
+}
+
+void AMDGPUHWMonInfo::update(const struct overlay_params& params)
+{
+    getAmdGpuInfo(files, info, false);
 }
diff --git a/src/gpu.h b/src/gpu.h
index ebbfc2fe..d772e397 100644
--- a/src/gpu.h
+++ b/src/gpu.h
@@ -5,6 +5,10 @@
 #include <cstdio>
 #include <cstdint>
 #include "overlay_params.h"
+#include <unordered_map>
+#include <memory>
+#include <string>
+#include <thread>
 
 struct amdgpu_files
 {
@@ -19,29 +23,138 @@ struct amdgpu_files
     FILE *gtt_used;
 };
 
-extern amdgpu_files amdgpu;
-
-struct gpuInfo{
+struct gpu_info {
     int load;
     int temp;
-    float memoryUsed;
-    float memoryTotal;
-    int MemClock;
-    int CoreClock;
-    float powerUsage;
+    uint64_t memory_used;
+    uint64_t memory_total;
+    int memory_clock;
+    int core_clock;
+    float power_usage;
     float apu_cpu_power;
     int apu_cpu_temp;
     bool is_power_throttled;
     bool is_current_throttled;
     bool is_temp_throttled;
     bool is_other_throttled;
-    float gtt_used;
+    uint64_t gtt_used;
+};
+
+struct gpu_handles
+{
+    virtual ~gpu_handles() {};
+};
+
+struct gpu_device
+{
+    gpu_device(const std::string& sysfs, const std::string& pci)
+    : sysfs_path(sysfs)
+    , pci_device(pci)
+    {}
+    virtual ~gpu_device() {}
+    virtual void update(const struct overlay_params& params) = 0;
+    virtual bool init() = 0;
+
+    std::string sysfs_path;
+    std::string pci_device;
+    std::string dev_name;
+    gpu_info info {};
+    uint32_t vendorID {}, deviceID {};
+    gpu_handles* device {};
+};
+
+struct DummyGpu : public gpu_device
+{
+    DummyGpu() : gpu_device({}, {})
+    {
+        dev_name = "dummy";
+    }
+    void update(const struct overlay_params& params) {}
+    bool init() { return true; }
+};
+
+struct NVMLInfo : public gpu_device
+{
+    NVMLInfo(const std::string& sysfs, const std::string& pci) : gpu_device(sysfs, pci) {}
+    void update(const struct overlay_params& params);
+    bool init();
+};
+
+struct NVCtrlInfo : public gpu_device
+{
+    NVCtrlInfo(const std::string& sysfs, const std::string& pci) : gpu_device(sysfs, pci) {}
+    void update(const struct overlay_params& params);
+    bool init();
+};
+
+struct NVAPIInfo : public gpu_device
+{
+    NVAPIInfo() : gpu_device({}, {}) {}
+    void update(const struct overlay_params& params);
+    bool init();
+};
+
+struct AMDGPUHWMonInfo : public gpu_device
+{
+    AMDGPUHWMonInfo(const std::string& sysfs, const std::string& pci) : gpu_device(sysfs, pci) {}
+   virtual  ~AMDGPUHWMonInfo()
+    {
+        delete device;
+        if (files.busy)
+            fclose(files.busy);
+        if (files.temp)
+            fclose(files.temp);
+        if (files.vram_total)
+            fclose(files.vram_total);
+        if (files.vram_used)
+            fclose(files.vram_used);
+        if (files.core_clock)
+            fclose(files.core_clock);
+        if (files.memory_clock)
+            fclose(files.memory_clock);
+        if (files.power_usage)
+            fclose(files.power_usage);
+        files = {};
+    }
+
+    virtual void update(const struct overlay_params& params);
+    virtual bool init();
+
+    amdgpu_files files {};
+};
+
+struct AMDGPUInfo : public AMDGPUHWMonInfo
+{
+    AMDGPUInfo(const std::string& metrics_, const std::string& sysfs, const std::string& pci)
+    : AMDGPUHWMonInfo(sysfs, pci)
+    , metrics_path(metrics_)
+    {}
+
+    ~AMDGPUInfo()
+    {
+        quit = true;
+        if (thread.joinable())
+            thread.join();
+        if (file)
+            fclose(file);
+    }
+    void update(const struct overlay_params& params);
+    bool init();
+private:
+    void metrics_polling_thread();
+    void get_instant_metrics(struct amdgpu_common_metrics& metrics);
+
+    bool quit {false};
+    FILE *file {};
+    std::string metrics_path;
+    std::thread thread;
 };
 
-extern struct gpuInfo gpu_info;
+extern std::shared_ptr<gpu_device> g_active_gpu;
+extern std::unordered_map<std::string /*device*/, std::shared_ptr<gpu_device>> g_gpu_devices;
 
 void getNvidiaGpuInfo(const struct overlay_params& params);
-void getAmdGpuInfo(void);
+void getAmdGpuInfo(amdgpu_files& amdgpu, gpu_info& gpu_info, bool has_metrics = false);
 bool checkNvidia(const char *pci_dev);
 extern void nvapi_util();
 extern bool checkNVAPI();
diff --git a/src/hud_elements.cpp b/src/hud_elements.cpp
index 1b7353aa..cac8cdec 100644
--- a/src/hud_elements.cpp
+++ b/src/hud_elements.cpp
@@ -156,15 +156,57 @@ void HudElements::version(){
     }
 }
 
-void HudElements::gpu_stats(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats]){
+static void per_gpu_vram(const gpu_info& gpu_info){
+    if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_vram])
+        return;
+
+    ImGui::TableNextRow(); ImGui::TableNextColumn();
+    ImGui::TextColored(HUDElements.colors.vram, "VRAM");
+    ImGui::TableNextColumn();
+    // Add gtt_used to vram usage for APUs
+    if (cpuStats.cpu_type == "APU")
+        right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memory_used + gpu_info.gtt_used);
+    else
+        right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memory_used);
+    ImGui::SameLine(0,1.0f);
+    ImGui::PushFont(HUDElements.sw_stats->font1);
+    ImGui::Text("GiB");
+    ImGui::PopFont();
+#ifndef MANGOAPP
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock]){
+        ImguiNextColumnOrNewRow();
+        right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu_info.memory_clock);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::PushFont(HUDElements.sw_stats->font1);
+        ImGui::Text("MHz");
+        ImGui::PopFont();
+    }
+#endif
+}
+
+void HudElements::vram(){
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_show_all_gpus])
+        return;
+
+    if (g_active_gpu)
+        per_gpu_vram(g_active_gpu->info);
+}
+
+static void per_gpu_stats(const gpu_device* gpu, bool single){
+   if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats]){
         ImGui::TableNextRow(); ImGui::TableNextColumn();
         const char* gpu_text;
-        if (HUDElements.params->gpu_text.empty())
+        if (!single)
+            gpu_text = gpu->dev_name.c_str();
+        else if (HUDElements.params->gpu_text.empty())
             gpu_text = "GPU";
         else
             gpu_text = HUDElements.params->gpu_text.c_str();
         ImGui::TextColored(HUDElements.colors.gpu, "%s", gpu_text);
+        if (!single) {
+            ImGui::TableNextRow();
+            ImGui::TableNextColumn();
+        }
         ImGui::TableNextColumn();
         auto text_color = HUDElements.colors.text;
         if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_load_change]){
@@ -176,13 +218,13 @@ void HudElements::gpu_stats(){
                 HUDElements.params->gpu_load_value[1]
             };
 
-            auto load_color = change_on_load_temp(gpu_data, gpu_info.load);
-            right_aligned_text(load_color, HUDElements.ralign_width, "%i", gpu_info.load);
+            auto load_color = change_on_load_temp(gpu_data, gpu->info.load);
+            right_aligned_text(load_color, HUDElements.ralign_width, "%i", gpu->info.load);
             ImGui::SameLine(0, 1.0f);
             ImGui::TextColored(load_color,"%%");
         }
         else {
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.load);
+            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->info.load);
             ImGui::SameLine(0, 1.0f);
             ImGui::TextColored(text_color,"%%");
             // ImGui::SameLine(150);
@@ -190,7 +232,7 @@ void HudElements::gpu_stats(){
         }
         if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_temp]){
             ImguiNextColumnOrNewRow();
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.temp);
+            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->info.temp);
             ImGui::SameLine(0, 1.0f);
             ImGui::Text("°C");
         }
@@ -199,7 +241,7 @@ void HudElements::gpu_stats(){
         }
         if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock]){
             ImguiNextColumnOrNewRow();
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.CoreClock);
+            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->info.core_clock);
             ImGui::SameLine(0, 1.0f);
             ImGui::PushFont(HUDElements.sw_stats->font1);
             ImGui::Text("MHz");
@@ -208,9 +250,9 @@ void HudElements::gpu_stats(){
         if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power]) {
             ImguiNextColumnOrNewRow();
 #ifdef MANGOAPP
-            right_aligned_text(text_color, HUDElements.ralign_width, "%.1f", gpu_info.powerUsage);
+            right_aligned_text(text_color, HUDElements.ralign_width, "%.1f", gpu->info.power_usage);
 #else
-            right_aligned_text(text_color, HUDElements.ralign_width, "%.0f", gpu_info.powerUsage);
+            right_aligned_text(text_color, HUDElements.ralign_width, "%.0f", gpu->info.power_usage);
 #endif
             ImGui::SameLine(0, 1.0f);
             ImGui::PushFont(HUDElements.sw_stats->font1);
@@ -218,6 +260,25 @@ void HudElements::gpu_stats(){
             ImGui::PopFont();
         }
     }
+
+    if (!single)
+        per_gpu_vram(gpu->info);
+}
+
+void HudElements::gpu_stats(){
+    auto p = HUDElements.params;
+    if (!p->enabled[OVERLAY_PARAM_ENABLED_gpu_stats])
+        return;
+
+    if (p->enabled[OVERLAY_PARAM_ENABLED_show_all_gpus])
+    {
+        for (const auto& g : g_gpu_devices)
+            per_gpu_stats(g.second.get(), false);
+        return;
+    }
+
+    if (g_active_gpu)
+        per_gpu_stats(g_active_gpu.get(), true);
 }
 
 void HudElements::cpu_stats(){
@@ -359,33 +420,6 @@ void HudElements::io_stats(){
 #endif
 }
 
-void HudElements::vram(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_vram]){
-        ImGui::TableNextRow(); ImGui::TableNextColumn();
-        ImGui::TextColored(HUDElements.colors.vram, "VRAM");
-        ImGui::TableNextColumn();
-        // Add gtt_used to vram usage for APUs
-        if (cpuStats.cpu_type == "APU")
-            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memoryUsed + gpu_info.gtt_used);
-        else
-            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memoryUsed);
-        ImGui::SameLine(0,1.0f);
-        ImGui::PushFont(HUDElements.sw_stats->font1);
-        ImGui::Text("GiB");
-        ImGui::PopFont();
-#ifndef MANGOAPP
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock]){
-            ImguiNextColumnOrNewRow();
-            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu_info.MemClock);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            ImGui::Text("MHz");
-            ImGui::PopFont();
-        }
-#endif
-    }
-}
-
 void HudElements::ram(){
 #ifdef __linux__
     if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_ram]){
@@ -504,11 +538,11 @@ void HudElements::fps_only(){
 }
 
 void HudElements::gpu_name(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_name] && !HUDElements.sw_stats->gpuName.empty()){
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_name] && g_active_gpu && !g_active_gpu->dev_name.empty()){
         ImGui::TableNextRow(); ImGui::TableNextColumn();
         ImGui::PushFont(HUDElements.sw_stats->font1);
         ImGui::TextColored(HUDElements.colors.engine,
-            "%s", HUDElements.sw_stats->gpuName.c_str());
+            "%s", g_active_gpu->dev_name.c_str());
         ImGui::PopFont();
     }
 }
@@ -944,23 +978,34 @@ void HudElements::fan(){
     }
 }
 
+static void gpu_throttling_status(const gpu_info& gpu_info, const std::string& name)
+{
+    if (gpu_info.is_power_throttled || gpu_info.is_current_throttled || gpu_info.is_temp_throttled || gpu_info.is_other_throttled){
+        ImGui::TableNextRow(); ImGui::TableNextColumn();
+        ImGui::TextColored(HUDElements.colors.engine, "%s %s", "Throttling", name.c_str());
+        ImGui::TableNextColumn();
+        ImGui::TableNextColumn();
+        if (gpu_info.is_power_throttled)
+            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Power");
+        if (gpu_info.is_current_throttled)
+            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Current");
+        if (gpu_info.is_temp_throttled)
+            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Temp");
+        if (gpu_info.is_other_throttled)
+            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Other");
+    }
+}
+
 void HudElements::throttling_status(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_throttling_status]){
-        if (gpu_info.is_power_throttled || gpu_info.is_current_throttled || gpu_info.is_temp_throttled || gpu_info.is_other_throttled){
-            ImGui::TableNextRow(); ImGui::TableNextColumn();
-            ImGui::TextColored(HUDElements.colors.engine, "%s", "Throttling");
-            ImGui::TableNextColumn();
-            ImGui::TableNextColumn();
-            if (gpu_info.is_power_throttled)
-                right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Power");
-            if (gpu_info.is_current_throttled)
-                right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Current");
-            if (gpu_info.is_temp_throttled)
-                right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Temp");
-            if (gpu_info.is_other_throttled)
-                right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Other");
-        }
+    if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_throttling_status])
+        return;
+
+    if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_show_all_gpus]) {
+        for (const auto& it : g_gpu_devices)
+            gpu_throttling_status(it.second->info, it.second->dev_name);
     }
+    else if (g_active_gpu)
+        gpu_throttling_status(g_active_gpu->info, g_active_gpu->dev_name);
 }
 
 void HudElements::graphs(){
@@ -1037,12 +1082,12 @@ void HudElements::graphs(){
         ImGui::TextColored(HUDElements.colors.engine, "%s", "GPU Mem Clock");
     }
 
-    if (value == "vram"){
+    if (value == "vram" && g_active_gpu){
         for (auto& it : graph_data){
             arr.push_back(float(it.gpu_vram_used));
         }
 
-        HUDElements.max = gpu_info.memoryTotal;
+        HUDElements.max = g_active_gpu->info.memory_total;
         HUDElements.min = 0;
         ImGui::TextColored(HUDElements.colors.engine, "%s", "VRAM");
     }
diff --git a/src/hud_elements.h b/src/hud_elements.h
index 93c6b8ce..43bfb5bd 100644
--- a/src/hud_elements.h
+++ b/src/hud_elements.h
@@ -24,7 +24,8 @@ class HudElements{
         Clock::time_point last_exec;
         std::vector<std::pair<std::string, std::string>> options;
         std::vector<std::pair<void(*)(), std::string >> ordered_functions;
-        int min, max, gpu_core_max, gpu_mem_max, cpu_temp_max, gpu_temp_max;
+        float min, max;
+        int gpu_core_max, gpu_mem_max, cpu_temp_max, gpu_temp_max;
         const std::vector<std::string> permitted_params = {
             "gpu_load", "cpu_load", "gpu_core_clock", "gpu_mem_clock",
             "vram", "ram", "cpu_temp", "gpu_temp"
diff --git a/src/nvapi.cpp b/src/nvapi.cpp
index 19e909e1..5bf22efe 100644
--- a/src/nvapi.cpp
+++ b/src/nvapi.cpp
@@ -40,28 +40,34 @@ bool checkNVAPI(){
     NvAPI_Initialize = (NvAPI_Initialize_t) (*NvAPI_QueryInterface)(0x0150E828);
     NvAPI_EnumPhysicalGPUs = (NvAPI_EnumPhysicalGPUs_t) (*NvAPI_QueryInterface)(0xE5AC921F);
     NvAPI_GPU_GetUsages = (NvAPI_GPU_GetUsages_t) (*NvAPI_QueryInterface)(0x189A1FDF);
-    if (NvAPI_Initialize == NULL || NvAPI_EnumPhysicalGPUs == NULL ||
-        NvAPI_EnumPhysicalGPUs == NULL || NvAPI_GPU_GetUsages == NULL)
+
+    if (!NvAPI_Initialize || !NvAPI_EnumPhysicalGPUs || !NvAPI_EnumPhysicalGPUs || !NvAPI_GPU_GetUsages)
     {
         std::cerr << "Couldn't get functions in nvapi.dll" << std::endl;
         return 2;
     }
-    (*NvAPI_Initialize)();
-    
-    int         *gpuHandles[NVAPI_MAX_PHYSICAL_GPUS] = { NULL };
+    NvAPI_Initialize();
+
+    NvAPI_EnumPhysicalGPUs(gpuHandles, &gpuCount);
 
     return true;
 }
 
-void nvapi_util()
-{  
+bool NVAPIInfo::init()
+{
+    if (!init_nvapi_bool)
+        init_nvapi_bool = checkNVAPI();
+    return init_nvapi_bool;
+}
+
+void NVAPIInfo::update()
+{
     if (!init_nvapi_bool){
         init_nvapi_bool = checkNVAPI();
     }
-    
-    gpuUsages[0] = (NVAPI_MAX_USAGES_PER_GPU * 4) | 0x10000;
-    (*NvAPI_EnumPhysicalGPUs)(gpuHandles, &gpuCount);
-    (*NvAPI_GPU_GetUsages)(gpuHandles[0], gpuUsages);
-    gpu_info.load = gpuUsages[3];
 
-}
\ No newline at end of file
+    gpuUsages[0] = (NVAPI_MAX_USAGES_PER_GPU * 4) | 0x10000;
+    NvAPI_GPU_GetUsages(gpuHandles[0], gpuUsages);
+    if (g_active_gpu)
+        g_active_gpu->info.load = gpuUsages[3];
+}
diff --git a/src/nvctrl.cpp b/src/nvctrl.cpp
index daedd80d..9d9bfd72 100644
--- a/src/nvctrl.cpp
+++ b/src/nvctrl.cpp
@@ -17,17 +17,22 @@ static std::unique_ptr<Display, std::function<void(Display*)>> display;
 struct nvctrlInfo nvctrl_info;
 bool nvctrlSuccess = false;
 
-static bool find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy)
+static bool find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy, int& scr)
 {
     char buf[8] {};
     for (int i = 0; i < 16; i++) {
         snprintf(buf, sizeof(buf), ":%d", i);
         Display *d = g_x11->XOpenDisplay(buf);
         if (d) {
-            if (nvctrl.XNVCTRLIsNvScreen(d, 0)) {
-                dpy = d;
-                SPDLOG_DEBUG("XNVCtrl is using display {}", buf);
-                return true;
+            int nscreens = ScreenCount(d); //FIXME yes, no, maybe?
+            for (int screen = 0; screen < nscreens; screen++)
+            {
+                if (nvctrl.XNVCTRLIsNvScreen(d, screen)) {
+                    dpy = d;
+                    scr = screen;
+                    SPDLOG_DEBUG("XNVCtrl is using display {}", buf);
+                    return true;
+                }
             }
             g_x11->XCloseDisplay(d);
         }
@@ -46,20 +51,15 @@ bool checkXNVCtrl()
         return false;
     }
 
-    Display *dpy;
-    nvctrlSuccess = find_nv_x11(nvctrl, dpy);
+    Display *dpy = nullptr;
+    int screen = 0;
+    nvctrlSuccess = find_nv_x11(nvctrl, dpy, screen);
 
     if (!nvctrlSuccess) {
         SPDLOG_ERROR("XNVCtrl didn't find the correct display");
         return false;
     }
 
-    auto local_x11 = g_x11;
-    display = { dpy,
-        [local_x11](Display *dpy) {
-            local_x11->XCloseDisplay(dpy);
-        }
-    };
     // get device id at init
     int64_t pci_id;
     nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
@@ -70,6 +70,13 @@ bool checkXNVCtrl()
                     &pci_id);
     deviceID = (pci_id & 0xFFFF);
 
+    auto local_x11 = g_x11;
+    display = { dpy,
+        [local_x11](Display *dpy) {
+            local_x11->XCloseDisplay(dpy);
+        }
+    };
+
     return true;
 }
 
diff --git a/src/nvidia_info.h b/src/nvidia_info.h
index 518d81ac..bf6fd421 100644
--- a/src/nvidia_info.h
+++ b/src/nvidia_info.h
@@ -5,15 +5,9 @@
 #include <nvml.h>
 #include "overlay_params.h"
 
-extern nvmlReturn_t result;
-extern unsigned int nvidiaTemp, processSamplesCount, *vgpuInstanceSamplesCount, nvidiaCoreClock, nvidiaMemClock, nvidiaPowerUsage;
-extern nvmlDevice_t nvidiaDevice;
-extern struct nvmlUtilization_st nvidiaUtilization;
-extern struct nvmlMemory_st nvidiaMemory;
 extern bool nvmlSuccess;
-extern unsigned long long nvml_throttle_reasons;
 
-bool checkNVML(const char* pciBusId);
-bool getNVMLInfo(const struct overlay_params& params);
+bool checkNVML(const char* pciBusId, nvmlDevice_t& device, uint32_t& device_id);
+bool getNVMLInfo(nvmlDevice_t device, const struct overlay_params& params);
 
 #endif //MANGOHUD_NVIDIA_INFO_H
diff --git a/src/nvml.cpp b/src/nvml.cpp
index 2b682fcd..ff26d566 100644
--- a/src/nvml.cpp
+++ b/src/nvml.cpp
@@ -4,6 +4,7 @@
 #include <iostream>
 #include "overlay.h"
 #include "overlay_params.h"
+#include "gpu.h"
 
 nvmlReturn_t result;
 nvmlDevice_t nvidiaDevice;
@@ -14,50 +15,64 @@ unsigned long long nvml_throttle_reasons;
 struct nvmlUtilization_st nvidiaUtilization;
 struct nvmlMemory_st nvidiaMemory {};
 
-bool checkNVML(const char* pciBusId){
-    auto& nvml = get_libnvml_loader();
-    if (nvml.IsLoaded()){
-        result = nvml.nvmlInit();
-        if (NVML_SUCCESS != result) {
-            SPDLOG_ERROR("Nvidia module not loaded");
-        } else {
-            nvmlReturn_t ret = NVML_ERROR_UNKNOWN;
-            if (pciBusId && ((ret = nvml.nvmlDeviceGetHandleByPciBusId(pciBusId, &nvidiaDevice)) != NVML_SUCCESS)) {
-                SPDLOG_ERROR("Getting device handle by PCI bus ID failed: {}", nvml.nvmlErrorString(ret));
-                SPDLOG_ERROR("Using index 0.");
-            }
+static std::unique_ptr<libnvml_loader, std::function<void(libnvml_loader*)>> nvml_shutdown;
 
-            if (ret != NVML_SUCCESS)
-                ret = nvml.nvmlDeviceGetHandleByIndex(0, &nvidiaDevice);
-
-            if (ret != NVML_SUCCESS)
-                SPDLOG_ERROR("Getting device handle failed: {}", nvml.nvmlErrorString(ret));
+bool checkNVML()
+{
+    auto& nvml = get_libnvml_loader();
+    if (!nvml.IsLoaded())
+    {
+        SPDLOG_ERROR("Failed to load NVML");
+        return false;
+    }
 
-            nvmlSuccess = (ret == NVML_SUCCESS);
-            if (ret == NVML_SUCCESS)
-                nvml.nvmlDeviceGetPciInfo_v3(nvidiaDevice, &nvidiaPciInfo);
+    if (nvmlSuccess)
+        return nvmlSuccess;
 
-            return nvmlSuccess;
-        }
-    } else {
-        SPDLOG_ERROR("Failed to load NVML");
+    result = nvml.nvmlInit();
+    if (NVML_SUCCESS != result)
+    {
+        SPDLOG_ERROR("Nvidia module not loaded");
+        return false;
     }
 
-    return false;
+    nvml_shutdown = { &nvml,
+        [](libnvml_loader *nvml) -> void {
+            nvml->nvmlShutdown();
+        }
+    };
+    nvmlSuccess = true;
+    return nvmlSuccess;
 }
 
-bool getNVMLInfo(const struct overlay_params& params){
+bool getNVMLInfo(nvmlDevice_t device, gpu_info& gpu_info, const struct overlay_params& params){
     nvmlReturn_t response;
+    unsigned long long nvml_throttle_reasons = 0;
+    unsigned int nvidiaTemp, nvidiaCoreClock, nvidiaMemClock, nvidiaPowerUsage;
+    struct nvmlUtilization_st nvidiaUtilization;
+    struct nvmlMemory_st nvidiaMemory;
+
     auto& nvml = get_libnvml_loader();
-    response = nvml.nvmlDeviceGetUtilizationRates(nvidiaDevice, &nvidiaUtilization);
-    nvml.nvmlDeviceGetTemperature(nvidiaDevice, NVML_TEMPERATURE_GPU, &nvidiaTemp);
-    nvml.nvmlDeviceGetMemoryInfo(nvidiaDevice, &nvidiaMemory);
-    nvml.nvmlDeviceGetClockInfo(nvidiaDevice, NVML_CLOCK_GRAPHICS, &nvidiaCoreClock);
-    nvml.nvmlDeviceGetClockInfo(nvidiaDevice, NVML_CLOCK_MEM, &nvidiaMemClock);
-    nvml.nvmlDeviceGetPowerUsage(nvidiaDevice, &nvidiaPowerUsage);
-    deviceID = nvidiaPciInfo.pciDeviceId >> 16;
-    if (params.enabled[OVERLAY_PARAM_ENABLED_throttling_status])
-        nvml.nvmlDeviceGetCurrentClocksThrottleReasons(nvidiaDevice, &nvml_throttle_reasons);
+    response = nvml.nvmlDeviceGetUtilizationRates(device, &nvidiaUtilization);
+    nvml.nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &nvidiaTemp);
+    nvml.nvmlDeviceGetMemoryInfo(device, &nvidiaMemory);
+    nvml.nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &nvidiaCoreClock);
+    nvml.nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &nvidiaMemClock);
+    nvml.nvmlDeviceGetPowerUsage(device, &nvidiaPowerUsage);
+    if (params.enabled[OVERLAY_PARAM_ENABLED_throttling_status]){
+        nvml.nvmlDeviceGetCurrentClocksThrottleReasons(device, &nvml_throttle_reasons);
+        gpu_info.is_temp_throttled = (nvml_throttle_reasons & 0x0000000000000060LL) != 0;
+        gpu_info.is_power_throttled = (nvml_throttle_reasons & 0x000000000000008CLL) != 0;
+        gpu_info.is_other_throttled = (nvml_throttle_reasons & 0x0000000000000112LL) != 0;
+    }
+
+    gpu_info.load = nvidiaUtilization.gpu;
+    gpu_info.temp = nvidiaTemp;
+    gpu_info.memory_used = nvidiaMemory.used;
+    gpu_info.core_clock = nvidiaCoreClock;
+    gpu_info.memory_clock = nvidiaMemClock;
+    gpu_info.power_usage = nvidiaPowerUsage / 1000;
+    gpu_info.memory_total = nvidiaMemory.total;
 
     if (response == NVML_ERROR_NOT_SUPPORTED) {
         if (nvmlSuccess)
@@ -66,3 +81,49 @@ bool getNVMLInfo(const struct overlay_params& params){
     }
     return nvmlSuccess;
 }
+
+bool NVMLInfo::init()
+{
+    nvmlDevice_t nvml_dev;
+    if (!checkNVML())
+        return false;
+
+    auto& nvml = get_libnvml_loader();
+    nvmlReturn_t ret = NVML_ERROR_UNKNOWN;
+    if ((ret = nvml.nvmlDeviceGetHandleByPciBusId(pci_device.c_str(), &nvml_dev)) != NVML_SUCCESS)
+    {
+        SPDLOG_ERROR("Getting device handle by PCI bus ID failed: {}", nvml.nvmlErrorString(ret));
+    }
+
+    if (ret != NVML_SUCCESS)
+    {
+        unsigned int deviceCount = 0;
+        ret = nvml.nvmlDeviceGetCount(&deviceCount);
+
+        if (ret == NVML_SUCCESS)
+        {
+            for (unsigned i = 0; i < deviceCount; i++)
+            {
+                ret = nvml.nvmlDeviceGetHandleByIndex(0, &nvml_dev);
+                if (ret != NVML_SUCCESS)
+                    SPDLOG_ERROR("Getting device {} handle failed: {}", i, nvml.nvmlErrorString(ret));
+                else if (nvml.nvmlDeviceGetPciInfo_v3(nvml_dev, &nvidiaPciInfo) == NVML_SUCCESS)
+                {
+                    if (this->deviceID == nvidiaPciInfo.pciDeviceId >> 16)
+                        break;
+                }
+            }
+        }
+    }
+
+    device = reinterpret_cast<gpu_handles*>(nvml_dev);
+    return true;
+}
+
+void NVMLInfo::update(const struct overlay_params& params)
+{
+    if (nvmlSuccess){
+        getNVMLInfo(reinterpret_cast<nvmlDevice_t>(device), info, params);
+        return;
+    }
+}
diff --git a/src/overlay.cpp b/src/overlay.cpp
index 78e70545..d79a751b 100644
--- a/src/overlay.cpp
+++ b/src/overlay.cpp
@@ -48,7 +48,6 @@ std::deque<logData> graph_data;
 const char* engines[] = {"Unknown", "OpenGL", "VULKAN", "DXVK", "VKD3D", "DAMAVAND", "ZINK", "WINED3D", "Feral3D", "ToGL", "GAMESCOPE"};
 overlay_params *_params {};
 double min_frametime, max_frametime;
-bool gpu_metrics_exists = false;
 bool steam_focused = false;
 vector<float> frametime_data(200,0.f);
 int fan_speed;
@@ -120,14 +119,8 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID)
 #endif
    }
    if (params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] || logger->is_active()) {
-      if (vendorID == 0x1002)
-         getAmdGpuInfo();
-
-      if (gpu_metrics_exists)
-         amdgpu_get_metrics();
-
-      if (vendorID == 0x10de)
-         getNvidiaGpuInfo(params);
+      for (auto& gpu : g_gpu_devices)
+         gpu.second->update(params);
    }
 
 #ifdef __linux__
@@ -147,12 +140,15 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID)
       getIoStats(g_io_stats);
 #endif
 
-   currentLogData.gpu_load = gpu_info.load;
-   currentLogData.gpu_temp = gpu_info.temp;
-   currentLogData.gpu_core_clock = gpu_info.CoreClock;
-   currentLogData.gpu_mem_clock = gpu_info.MemClock;
-   currentLogData.gpu_vram_used = gpu_info.memoryUsed;
-   currentLogData.gpu_power = gpu_info.powerUsage;
+   if (g_active_gpu)
+   {
+      currentLogData.gpu_load = g_active_gpu->info.load;
+      currentLogData.gpu_temp = g_active_gpu->info.temp;
+      currentLogData.gpu_core_clock = g_active_gpu->info.core_clock;
+      currentLogData.gpu_mem_clock = g_active_gpu->info.memory_clock;
+      currentLogData.gpu_vram_used = g_active_gpu->info.memory_used;
+      currentLogData.gpu_power = g_active_gpu->info.power_usage;
+   }
 #ifdef __linux__
    currentLogData.ram_used = memused;
 #endif
@@ -624,7 +620,98 @@ struct pci_bus {
    int func;
 };
 
-void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_params& params)
+static void enumerate_gpus(overlay_params& params)
+{
+#ifdef WIN32
+   auto gpu = std::make_shared<NVAPIInfo>();
+   if (gpu->init())
+      g_gpu_devices["nvapi_0"] = gpu;
+   return;
+#endif
+
+#ifdef __gnu_linux__
+   string path;
+   string drm = "/sys/class/drm/";
+
+   auto dirs = ls(drm, "card");
+   for (auto& dir : dirs) {
+      path = drm + dir;
+
+      // skip display outputs
+      if (!file_exists(path + "/device/vendor"))
+         continue;
+
+      string vendor = read_line(path + "/device/vendor");
+      uint32_t vendor_id = strtoul(vendor.c_str(), NULL, 16);
+
+      string device = read_line(path + "/device/device");
+      uint32_t device_id = strtoul(device.c_str(), NULL, 16); // OGL might fail so read from sysfs
+
+      const std::string device_path = path + "/device";
+      string pci_device = read_symlink(device_path.c_str());
+      auto pos = pci_device.find_last_of('/');
+      pci_device = pci_device.substr(pos != std::string::npos ? pos + 1 : 0);
+
+      string module = get_basename(read_symlink(path + "/device/driver/module"));
+      SPDLOG_DEBUG("using device path: {}, module: {}, pci device: {}", path, module, pci_device);
+
+      auto dev_name = get_device_name(vendor_id, device_id);
+      if (module == "amdgpu")
+      {
+         const std::string gpu_metrics_path = device_path + "/gpu_metrics";
+         if (amdgpu_check_metrics(gpu_metrics_path)) {
+            SPDLOG_DEBUG("Using gpu_metrics of {}", gpu_metrics_path);
+            auto gpu = std::make_shared<AMDGPUInfo>(gpu_metrics_path, path, pci_device);
+            if (gpu->init())
+            {
+               gpu->vendorID = vendor_id;
+               gpu->deviceID = device_id;
+               gpu->dev_name = dev_name;
+               g_gpu_devices[pci_device] = gpu;
+            }
+         }
+         else {
+
+            if (!file_exists(path + "/device/gpu_busy_percent"))
+               continue;
+
+            auto gpu = std::make_shared<AMDGPUHWMonInfo>(path, pci_device);
+            if (gpu->init())
+            {
+               gpu->vendorID = vendor_id;
+               gpu->deviceID = device_id;
+               gpu->dev_name = dev_name;
+               g_gpu_devices[pci_device] = gpu;
+            }
+         }
+      }
+      else if (module == "nvidia")
+      {
+         auto gpu = std::make_shared<NVMLInfo>(path, pci_device);
+         if (gpu->init())
+         {
+            gpu->vendorID = vendor_id;
+            gpu->deviceID = device_id;
+            gpu->dev_name = dev_name;
+            g_gpu_devices[pci_device] = gpu;
+         }
+         else
+         {
+            auto gpu = std::make_shared<NVCtrlInfo>(path, pci_device);
+            if (gpu->init())
+            {
+               gpu->vendorID = vendor_id;
+               gpu->deviceID = device_id;
+               gpu->dev_name = dev_name;
+               g_gpu_devices[pci_device] = gpu;
+            }
+         }
+      }
+   }
+#endif
+}
+
+void init_gpu_stats(uint32_t& vendorID, uint32_t target_device_id, overlay_params& params)
 {
    //if (!params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats])
    //   return;
@@ -658,111 +745,62 @@ void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_para
       }
    }
 
-   // NVIDIA or Intel but maybe has Optimus
-   if (vendorID == 0x8086
-      || vendorID == 0x10de) {
+   if (!g_gpu_devices.size())
+      enumerate_gpus(params);
 
-      if(checkNvidia(pci_dev))
-         vendorID = 0x10de;
-      else
-         params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
+   if (pci_bus_parsed && pci_dev && g_gpu_devices.find(params.pci_dev) != g_gpu_devices.end())
+   {
+      g_active_gpu = g_gpu_devices[params.pci_dev];
    }
-
-#ifdef __linux__
-   if (vendorID == 0x8086 || vendorID == 0x1002
-       || gpu.find("Radeon") != std::string::npos
-       || gpu.find("AMD") != std::string::npos) {
-      string path;
-      string drm = "/sys/class/drm/";
-
-      auto dirs = ls(drm.c_str(), "card");
-      for (auto& dir : dirs) {
-         path = drm + dir;
-
-         SPDLOG_DEBUG("amdgpu path check: {}", path);
-         if (pci_bus_parsed && pci_dev) {
-            string pci_device = read_symlink((path + "/device").c_str());
-            SPDLOG_DEBUG("PCI device symlink: '{}'", pci_device);
-            if (!ends_with(pci_device, pci_dev)) {
-               SPDLOG_DEBUG("skipping GPU, no PCI ID match");
-               continue;
-            }
-         }
-
-         FILE *fp;
-         string device = path + "/device/device";
-         if ((fp = fopen(device.c_str(), "r"))){
-            uint32_t temp = 0;
-            if (fscanf(fp, "%x", &temp) == 1) {
-               if (reported_deviceID && temp != reported_deviceID){
-                  fclose(fp);
-                  SPDLOG_DEBUG("DeviceID does not match vulkan report {}", reported_deviceID);
-                  continue;
-               }
-               deviceID = temp;
-            }
-            fclose(fp);
-         }
-
-         string vendor = path + "/device/vendor";
-         if ((fp = fopen(vendor.c_str(), "r"))){
-            uint32_t temp = 0;
-            if (fscanf(fp, "%x", &temp) != 1 || temp != 0x1002) {
-               fclose(fp);
-               continue;
-            }
-            fclose(fp);
-         }
-
-         const std::string device_path = path + "/device";
-         const std::string gpu_metrics_path = device_path + "/gpu_metrics";
-         if (amdgpu_check_metrics(gpu_metrics_path)) {
-            gpu_metrics_exists = true;
-            metrics_path = gpu_metrics_path;
-            SPDLOG_DEBUG("Using gpu_metrics of {}", gpu_metrics_path);
-         }
-
-         if (!amdgpu.vram_total)
-            amdgpu.vram_total = fopen((device_path + "/mem_info_vram_total").c_str(), "r");
-         if (!amdgpu.vram_used)
-            amdgpu.vram_used = fopen((device_path + "/mem_info_vram_used").c_str(), "r");
-         if (!amdgpu.gtt_used)
-            amdgpu.gtt_used = fopen((device_path + "/mem_info_gtt_used").c_str(), "r");
-
-         const std::string hwmon_path = device_path + "/hwmon/";
-         const auto dirs = ls(hwmon_path.c_str(), "hwmon", LS_DIRS);
-         for (const auto& dir : dirs)
-            if (!amdgpu.temp)
-               amdgpu.temp = fopen((hwmon_path + dir + "/temp1_input").c_str(), "r");
-
-         if (!metrics_path.empty())
+   else if (vendorID == 0x8086) // Maybe an "Optimus" setup, try to get a secondary gpu
+   {
+      for (auto& it : g_gpu_devices)
+      {
+         const auto& gpu = it.second;
+         if (gpu->vendorID != 0x8086)
+         {
+            g_active_gpu = gpu;
             break;
-
-         // The card output nodes - cardX-output, will point to the card node
-         // As such the actual metrics nodes will be missing.
-         amdgpu.busy = fopen((device_path + "/gpu_busy_percent").c_str(), "r");
-         if (!amdgpu.busy)
-            continue;
-
-         SPDLOG_DEBUG("using amdgpu path: {}", device_path);
-
-         for (const auto& dir : dirs) {
-            if (!amdgpu.core_clock)
-               amdgpu.core_clock = fopen((hwmon_path + dir + "/freq1_input").c_str(), "r");
-            if (!amdgpu.memory_clock)
-               amdgpu.memory_clock = fopen((hwmon_path + dir + "/freq2_input").c_str(), "r");
-            if (!amdgpu.power_usage)
-               amdgpu.power_usage = fopen((hwmon_path + dir + "/power1_average").c_str(), "r");
          }
-         break;
       }
-
-      // don't bother then
-      if (metrics_path.empty() && !amdgpu.busy) {
-         params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
+   }
+   else
+   {
+      for (auto& info : g_gpu_devices)
+      {
+         auto& gpu = info.second;
+         if (gpu->vendorID == vendorID && (gpu->deviceID == target_device_id || target_device_id == 0))
+         {
+            g_active_gpu = gpu;
+            if (!target_device_id)
+               SPDLOG_WARN("No device id given, using first device found from vendor 0x{:04X}", vendorID);
+            break;
+         }
       }
    }
+
+#ifdef WIN32
+   //TODO windows' gpu stats
+   if (g_gpu_devices.size())
+      g_active_gpu = g_gpu_devices.begin()->second;
+   else
+      g_active_gpu = std::make_shared<DummyGpu>();
 #endif
+
+   // for compatibility
+   if (g_active_gpu)
+   {
+      vendorID = g_active_gpu->vendorID;
+      deviceID = g_active_gpu->deviceID;
+   }
+
+   if (g_active_gpu)
+      SPDLOG_INFO("Selected GPU: {}, 0x{:04X}:0x{:04X} [{}]", g_active_gpu->sysfs_path, vendorID, deviceID, g_active_gpu->dev_name);
+   else {
+      g_active_gpu = std::make_shared<DummyGpu>();
+      SPDLOG_WARN("Selected dummy GPU");
+   }
+
    if (!params.permit_upload)
       SPDLOG_INFO("Uploading is disabled (permit_upload = 0)");
 }
@@ -865,7 +903,7 @@ void init_system_info(){
 
 std::string get_device_name(uint32_t vendorID, uint32_t deviceID)
 {
-   string desc;
+   string desc {};
 #ifdef __linux__
    if (pci_ids.find(vendorID) == pci_ids.end())
       parse_pciids();
@@ -886,8 +924,8 @@ std::string get_device_name(uint32_t vendorID, uint32_t deviceID)
 void update_fan(){
    // This just handles steam deck fan for now
    string hwmon_path;
-   string path = "/sys/class/hwmon/";
-   auto dirs = ls(path.c_str(), "hwmon", LS_DIRS);
+   const string path = "/sys/class/hwmon/";
+   auto dirs = ls(path, "hwmon", LS_DIRS);
    for (auto& dir : dirs) {
       string full_path = (path + dir + "/name").c_str();
       if (read_line(full_path).find("jupiter") != string::npos){
@@ -901,3 +939,8 @@ void update_fan(){
    else
       fan_speed = -1;
 }
+
+void get_device_name(int32_t vendorID, int32_t deviceID, struct swapchain_stats& sw_stats)
+{
+   gpu = sw_stats.gpuName = get_device_name(vendorID, deviceID);
+}
diff --git a/src/overlay_params.h b/src/overlay_params.h
index 7977edfe..268bc163 100644
--- a/src/overlay_params.h
+++ b/src/overlay_params.h
@@ -33,6 +33,7 @@ typedef unsigned long KeySym;
    OVERLAY_PARAM_BOOL(gpu_temp)                      \
    OVERLAY_PARAM_BOOL(cpu_stats)                     \
    OVERLAY_PARAM_BOOL(gpu_stats)                     \
+   OVERLAY_PARAM_BOOL(show_all_gpus)                 \
    OVERLAY_PARAM_BOOL(ram)                           \
    OVERLAY_PARAM_BOOL(swap)                          \
    OVERLAY_PARAM_BOOL(vram)                          \