Initial work on adding NVidia GPU monitor

Adds support for nvidia cars via py3nvml.
Design is completly broken TODO.

Signed-off-by: Lucas Zampieri <lzampier@redhat.com>
gpu
Lucas Zampieri 2021-08-27 17:29:22 -03:00
parent 776927e79e
commit 2b852042f2
1 changed files with 196 additions and 120 deletions

194
bpytop.py
View File

@ -19,6 +19,7 @@
import os, sys, io, threading, signal, re, subprocess, logging, logging.handlers, argparse import os, sys, io, threading, signal, re, subprocess, logging, logging.handlers, argparse
import urllib.request import urllib.request
import py3nvml.py3nvml as nvml
from time import time, sleep, strftime, tzset from time import time, sleep, strftime, tzset
from datetime import timedelta from datetime import timedelta
from _thread import interrupt_main from _thread import interrupt_main
@ -2458,7 +2459,7 @@ class ProcBox(Box):
else: else:
width_p = cls.width_p width_p = cls.width_p
if not "cpu" in cls.boxes: if not "cpu" in cls.boxes or "gpu" in cls.boxes:
height_p = 100 height_p = 100
else: else:
height_p = cls.height_p height_p = cls.height_p
@ -2878,6 +2879,7 @@ class ProcBox(Box):
Draw.buffer(cls.buffer, f'{out_misc}{out}{Term.fg}', only_save=Menu.active) Draw.buffer(cls.buffer, f'{out_misc}{out}{Term.fg}', only_save=Menu.active)
cls.redraw = cls.resized = cls.moved = False cls.redraw = cls.resized = cls.moved = False
class GpuBox(Box): class GpuBox(Box):
name = "gpu" name = "gpu"
height_p = 30 height_p = 30
@ -2894,15 +2896,20 @@ class GpuBox(Box):
@classmethod @classmethod
def _calc_size(cls): def _calc_size(cls):
width_p: int # if not "gpu" in cls.boxes:
if cls.stat_mode: # width_p = 0
width_p = 100 # Box._b_cpu_h = 0
else: # cls.width = Term.width
width_p = cls.width_p # return
# width_p: int
# if cls.stat_mode:
# width_p = 30
# else:
width_p = cls.width_p
cls.width = round(Term.width * width_p / 100) cls.width = round(Term.width * width_p / 100)
cls.height = Term.height - Box._b_proc_h - Box._b_cpu_h cls.height = Term.height - Box._b_cpu_h - Box._b_cpu_h - 4
cls.y = Box._b_cpu_h + 1 cls.y = Box._b_cpu_h + 24
cls.x = Term.width - cls.width + 1 cls.x = Term.width - cls.width + 1
cls.box_width = 27 if cls.width > 45 else 19 cls.box_width = 27 if cls.width > 45 else 19
cls.box_height = 9 if cls.height > 10 else cls.height - 2 cls.box_height = 9 if cls.height > 10 else cls.height - 2
@ -2912,33 +2919,34 @@ class GpuBox(Box):
@classmethod @classmethod
def _draw_bg(cls) -> str: def _draw_bg(cls) -> str:
if cls.proc_mode: return ""
return f'{create_box(box=cls, line_color=THEME.gpu_box)}' return f'{create_box(box=cls, line_color=THEME.gpu_box)}'
@classmethod @classmethod
def _draw_fg(cls): def _draw_fg(cls):
if cls.proc_mode: return
gpu = GpuCollector gpu = GpuCollector
if gpu.redraw: cls.redraw = True
if not gpu.gpu: return if gpu.redraw:
card, name = gpu.gpu[0], gpu.gpu[1][:20] cls.redraw = True
stat = gpu.stats[card] if not gpu.gpu:
stat_nums = gpu.stat_nums[card] return
card, name = gpu.gpu[0], gpu.name
stat = gpu.stats[gpu.uuid]
stat_nums = gpu.stat_nums[gpu.uuid]
out: str = "" out: str = ""
out_misc: str = "" out_misc: str = ""
x, y, w, h = cls.x + 1, cls.y + 1, cls.width - 2, cls.height - 2 x, y, w, h = cls.x + 1, cls.y + 1, cls.width - 2, cls.height - 2
reset: bool = bool(True) reset: bool = bool(True)
if cls.resized or cls.redraw: if cls.resized or cls.redraw:
Meters.gpu = {k:{} for k in cls.gpu_keys} Meters.gpu = {k: {} for k in cls.gpu_keys}
out_misc += cls._draw_bg() out_misc += cls._draw_bg()
out_misc += (f'{Mv.to(y-1, x+w - 23)}{THEME.gpu_box}{Symbol.h_line * (18 - len(name))}' out_misc += (f'{Mv.to(y-1, x+w - 30)}{THEME.gpu_box}{Symbol.h_line * (18 - len(name))}'
f'{Symbol.title_left}{Fx.b}{THEME.title(name)}{Fx.ub}{THEME.gpu_box(Symbol.title_right)}{Term.fg}') f'{Symbol.title_left}{Fx.b}{THEME.title(name)}{Fx.ub}{THEME.gpu_box(Symbol.title_right)}{Term.fg}')
Meters.gpu["load"][card+"gpu"] = Meter(int(stat["load"]["gpu"]), (w // 2) - 4, "cpu") Meters.gpu["load"]["gpu"] = Meter(int(stat["load"]["gpu"]), (w // 2) - 4, "cpu")
Meters.gpu["load"][card+"mem"] = Meter(int(stat["load"]["mem"]), (w // 2) - 4, "cpu") Meters.gpu["load"]["mem"] = Meter(int(stat["load"]["mem"]), (w // 2) - 4, "cpu")
Meters.gpu["vitals"][card+"vram"] = Meter(int(stat["vitals"]["vram"]), (w // 2) - 4, "cpu") # Meters.gpu["vitals"]["vram"] = Meter(int(stat["vitals"]["vram"]), (w // 2) - 4, "cpu")
Draw.buffer("gpu_misc", out_misc, only_save=True) Draw.buffer("gpu_misc", out_misc, only_save=True)
@ -2947,42 +2955,43 @@ class GpuBox(Box):
fixedEnding = lambda s, n : f'{s}{(n-(int(log10(s))+1)) * " "}{Mv.r(n-(int(log10(s))+1))}' fixedEnding = lambda s, n : f'{s}{(n-(int(log10(s))+1)) * " "}{Mv.r(n-(int(log10(s))+1))}'
clock = lambda s: fixedEnding(s, 4) + 'Mhz' clock = lambda s: fixedEnding(s, 4) + 'Mhz'
voltage = lambda s: fixedEnding(s, 4) + 'mV' # voltage = lambda s: fixedEnding(s, 4) + 'mV'
watts = lambda s: f'{s}{(6-(len(str(s))+1)) * " "}{Mv.r(6-(len(str(s))+1))}W' watts = lambda s: f'{s}{(6-(len(str(s))+1)) * " "}{Mv.r(6-(len(str(s))+1))}W'
# load percent # load percent
out += f'{Mv.to(y, Term.width - round(w / 2) - 1)}{THEME.graph_text("GPU ")}{Meters.gpu["load"][card+"gpu"](None if cls.resized else stat["load"]["gpu"])}' out += f'{Mv.to(y, Term.width - round(w / 2) - 1)}{THEME.graph_text("GPU ")}{Meters.gpu["load"]["gpu"](None if cls.resized else stat["load"]["gpu"])}'
out += f'{Mv.to(y+1, Term.width - round(w / 2) - 1)}{THEME.graph_text("Mem ")}{Meters.gpu["load"][card+"mem"](None if cls.resized else stat["load"]["mem"])}' out += f'{Mv.to(y+1, Term.width - round(w / 2) - 1)}{THEME.graph_text("Mem ")}{Meters.gpu["load"]["mem"](None if cls.resized else stat["load"]["mem"])}'
# vram # vram
out += f'{Mv.to(y+2, Term.width - round(w / 2) - 2)}{THEME.graph_text("VRAM ")}{Meters.gpu["vitals"][card+"vram"](None if cls.resized else stat["vitals"]["vram"])}' if "AMD" == True:
out += f'{Mv.to(y+2, Term.width - round(w / 2) - 2)}{THEME.graph_text("VRAM ")}{Meters.gpu["vitals"]["vram"](None if cls.resized else stat["vitals"]["vram"])}'
# clocks # clocks
for f_i in range(len(stat_nums["freqs"])): for f_i in range(len(stat_nums["freqs"])):
(f, name) = stat_nums["freqs"][f_i] (f, name) = stat_nums["freqs"][f_i]
out += f'{Mv.to(y+f_i, x)}{THEME.graph_text(name+": ")}{clock(stat["freqs"][f"freq{f}"][0])}' out += f'{Mv.to(y+f_i, x)}{THEME.graph_text(name+": ")}{clock(stat["freqs"][f"freq{f_i}"][0])}'
# voltage # voltage
if "AMD" == True:
out += f'{Mv.to(y+len(stat_nums["freqs"])-1, x)}' out += f'{Mv.to(y+len(stat_nums["freqs"])-1, x)}'
for f_i in range(len(stat_nums["volts"])): for f_i in range(len(stat_nums["volts"])):
(n, name) = stat_nums["volts"][f_i] (n, name) = stat_nums["volts"][f_i]
out += f'{Mv.d(1)}{THEME.graph_text(name+": ")}{voltage(stat["volts"][f"volt{n}"][0])}' out += f'{Mv.d(1)}{THEME.graph_text(name+": ")}{voltage(stat["volts"][f"volt{n}"][0])}'
# power # power
out += f'{Mv.to(y+len(stat_nums["freqs"])+len(stat_nums["volts"])-1, x)}' out += f'{Mv.to(y+len(stat_nums["freqs"])+len(stat_nums["volts"])-2, x)}'
for f_i in range(len(stat_nums["power"])): for f_i in range(len(stat_nums["power"])):
n = stat_nums["power"][f_i] n = stat_nums["power"][f_i]
out += f'{Mv.d(1)}{THEME.graph_text("Draw: ")}{watts(stat["power"][f"power{n}"])}' out += f'{Mv.d(1)}{THEME.graph_text("Draw: ")}{stat["power"][f"power{n}"]} W'
# temps # temps
out += f'{Mv.to(y+3, Term.width - 5)}{stat["vitals"]["temp1"][0]}°C' out += f'{Mv.to(y+2, Term.width - 5)}{stat["vitals"]["temp1"]}°C'
# fans # fans
out += f'{Mv.l(4)}'#len of temps out += f'{Mv.l(4)}'#len of temps
for f_i in range(len(stat_nums["fans"])): for f_i in range(len(stat_nums["fans"])):
(n, rpm) = stat_nums["freqs"][f_i] rpm = stat_nums["fans"][f_i]
out += f'{Mv.l(8 * (f_i + 1))}{stat["fans"][f"fan{n}"][0]}RPM' out += f'{Mv.l(12 * (f_i + 1))}Fans: {rpm}% '
Draw.buffer(cls.buffer, f'{out_misc}{out}{Term.fg}', only_save=Menu.active) Draw.buffer(cls.buffer, f'{out_misc}{out}{Term.fg}', only_save=Menu.active)
cls.redraw = cls.resized = False cls.redraw = cls.resized = False
@ -4084,25 +4093,33 @@ class ProcCollector(Collector):
def _draw(cls): def _draw(cls):
ProcBox._draw_fg() ProcBox._draw_fg()
class GpuCollector(Collector): class GpuCollector(Collector):
'''Collects GPU stats''' '''Collects GPU stats'''
buffer: str = GpuBox.buffer buffer: str = GpuBox.buffer
gpus: List[Tuple[str, str]] = []
gpu_i: int = 0
gpu: str = ""
name: str = ""
gpu_error: bool = False
reset: bool = False
if "AMD" == True:
dir: str = "/sys/class/drm/" dir: str = "/sys/class/drm/"
hwmon: str = "/device/hwmon/hwmon0/" hwmon: str = "/device/hwmon/hwmon0/"
pci_id_locations = ["/usr/share/hwdata/", "/usr/share/misc/"] pci_id_locations = ["/usr/share/hwdata/", "/usr/share/misc/"]
gpus: List[Tuple[str, str]] = [] else:
gpu_i: int = 0 nvml.nvmlInit()
gpu: str = "" handle = nvml.nvmlDeviceGetHandleByIndex(0)
gpu_error: bool = False uuid = nvml.nvmlDeviceGetUUID(handle)
reset: bool = False
#* stats structure = stats[cardN] # * stats structure = stats[cardN]
#[fans, freqs, vitals, power, load] # [fans, freqs, vitals, power, load]
#[fanN_rpm, fanN_max, freqN, tempN, vram_used, vram_total, vddgfx, avg_draw, gpu_load_p, mem_load_p] # [fanN_rpm, fanN_max, freqN, tempN, vram_used, vram_total, vddgfx, avg_draw, gpu_load_p, mem_load_p]
stats: Dict[str, Dict[str, Dict[str, Any]]] = {} stats: Dict[str, Dict[str, Dict[str, Any]]] = {}
#* stat_nums structure = stats[cardN][fans, freqs, temps, power, volts][n, ..., m] # * stat_nums structure = stats[cardN][fans, freqs, temps, power, volts][n, ..., m]
stat_nums: Dict[str, Dict[str, List[Tuple[str, str]]]] = {} stat_nums: Dict[str, Dict[str, List[Tuple[str, str]]]] = {}
populated: bool = False populated: bool = False
@ -4160,13 +4177,15 @@ class GpuCollector(Collector):
def _get_stat_nums(cls, card): def _get_stat_nums(cls, card):
stat_keys = ["fans", "freqs", "temps", "power", "volts"] stat_keys = ["fans", "freqs", "temps", "power", "volts"]
sys_keys = ["fan", "freq", "temp", "power", "in"] sys_keys = ["fan", "freq", "temp", "power", "in"]
cls.stat_nums[card] = { cls.stat_nums[cls.uuid] = {
"fans": [], "fans": [],
"freqs": [], "freqs": [],
"temps": [], "temps": [],
"power": [], "power": [],
"volts": [], "volts": [],
} }
if "AMD" == True:
get_num = lambda s, f: re.match(f"{s}\d", f) get_num = lambda s, f: re.match(f"{s}\d", f)
with os.scandir(cls._get_hwmon(card)) as files: with os.scandir(cls._get_hwmon(card)) as files:
for file in files: for file in files:
@ -4192,10 +4211,19 @@ class GpuCollector(Collector):
fan = cls.stat_nums[card]["fans"][f] fan = cls.stat_nums[card]["fans"][f]
f_max = cls._read(cls._get_hwmon(card) + f"fan{fan}_max") f_max = cls._read(cls._get_hwmon(card) + f"fan{fan}_max")
cls.stat_nums[card]["fans"][f] = (fan, f_max) cls.stat_nums[card]["fans"][f] = (fan, f_max)
else:
cls.stat_nums[cls.uuid]["fans"] = [nvml.nvmlDeviceGetFanSpeed(card)]
cls.stat_nums[cls.uuid]["freqs"] = [(str(nvml.nvmlDeviceGetClockInfo(card, nvml.NVML_CLOCK_GRAPHICS)), "GPU"),
(str(nvml.nvmlDeviceGetClockInfo(card, nvml.NVML_CLOCK_MEM)), "MEM")]
cls.stat_nums[cls.uuid]["temps"] = [nvml.nvmlDeviceGetTemperature(card, nvml.NVML_TEMPERATURE_GPU)]
cls.stat_nums[cls.uuid]["volts"] = [nvml.nvmlDeviceGetPowerUsage(card)]
cls.stat_nums[cls.uuid]["power"] = [nvml.nvmlDeviceGetPowerUsage(card)]
@classmethod @classmethod
def _get_gpus(cls): def _get_gpus(cls):
'''Get a list of all GPUs with names''' '''Get a list of all GPUs with names'''
if "AMD" == True:
cls.gpu_i = 0 cls.gpu_i = 0
cls.gpu = "" cls.gpu = ""
id, keys = ["", "", "", ""], ["vendor", "device", "subsystem_vendor", "subsystem_device"] id, keys = ["", "", "", ""], ["vendor", "device", "subsystem_vendor", "subsystem_device"]
@ -4207,15 +4235,24 @@ class GpuCollector(Collector):
id[2] = f"{id[2]} {id[3]}" id[2] = f"{id[2]} {id[3]}"
cls.gpus.append((card.name, tuple(id[:3]))) cls.gpus.append((card.name, tuple(id[:3])))
cls.gpus = cls._get_gpu_names(cls.gpus); cls.gpus = cls._get_gpu_names(cls.gpus)
for gpu in cls.gpus: for gpu in cls.gpus:
cls._get_stat_nums(gpu[0]) cls._get_stat_nums(gpu[0])
if not cls.gpus: cls.gpus = [""] if not cls.gpus: cls.gpus = [""]
cls.gpu = cls.gpus[cls.gpu_i] cls.gpu = cls.gpus[cls.gpu_i]
cls.populated = True cls.populated = True
else:
if nvml.nvmlDeviceGetCount() == 1:
cls.gpu = [cls.handle]
cls.name = nvml.nvmlDeviceGetName(cls.handle)
cls._get_stat_nums(cls.gpu[0])
else:
print("multi gpu not yet supported")
@classmethod @classmethod
def _get_vitals(cls, card): def _get_vitals(cls, card):
stat = {}
if "AMD" == True:
temp = lambda n: str(round(int(cls._read(cls._get_hwmon(card) + f"temp{n}_input")) / 1000)) #°C temp = lambda n: str(round(int(cls._read(cls._get_hwmon(card) + f"temp{n}_input")) / 1000)) #°C
stat = { stat = {
"vram_used": int(cls._read(cls._get_device(card) + "mem_info_vram_used")), #kB "vram_used": int(cls._read(cls._get_device(card) + "mem_info_vram_used")), #kB
@ -4224,51 +4261,93 @@ class GpuCollector(Collector):
stat["vram"] = (stat["vram_used"] / stat["vram_total"]) * 100 #percent stat["vram"] = (stat["vram_used"] / stat["vram_total"]) * 100 #percent
for (i, name) in cls.stat_nums[card]["temps"]: for (i, name) in cls.stat_nums[card]["temps"]:
stat[f"temp{i}"] = (temp(i), name) stat[f"temp{i}"] = (temp(i), name)
else:
temp = nvml.nvmlDeviceGetTemperature(card, nvml.NVML_TEMPERATURE_GPU)
stat[f"temp1"] = temp
return stat return stat
@classmethod @classmethod
def _get_power(cls, card): def _get_power(cls, card):
stat = {}
if "amd" == True:
power = lambda n: str(round(int(cls._read(cls._get_hwmon(card) + f"power{n}_average")) / 1000000, 2)) # Watts power = lambda n: str(round(int(cls._read(cls._get_hwmon(card) + f"power{n}_average")) / 1000000, 2)) # Watts
volt = lambda n: cls._read(cls._get_hwmon(card) + f"in{n}_input") # mV volt = lambda n: cls._read(cls._get_hwmon(card) + f"in{n}_input") # mV
stat = {} else:
for i in cls.stat_nums[card]["power"]: power = str(round(int(nvml.nvmlDeviceGetPowerUsage(cls.handle) / 1000)))
stat[f"power{i}"] = power(i)
for i in cls.stat_nums[cls.uuid]["power"]:
stat[f"power{i}"] = power
return stat return stat
@classmethod @classmethod
def _get_volts(cls, card): def _get_volts(cls, card):
if "amd" == True:
volt = lambda n: int(cls._read(cls._get_hwmon(card) + f"in{n}_input")) # mV volt = lambda n: int(cls._read(cls._get_hwmon(card) + f"in{n}_input")) # mV
else:
# nvidia only allow voltage data to be returned for their S-class products, see nvmlReturn_t nvmlUnitGetPsuInfo()
return ""
stat = {} stat = {}
for (i, name) in cls.stat_nums[card]["volts"]: for (i, name) in cls.stat_nums[card]["volts"]:
stat[f"volt{i}"] = (volt(i), name) stat[f"volt{i}"] = (volt(i), name)
return stat return stat
@classmethod @classmethod
def _get_freqs(cls, card): def _get_freqs(cls, card):
stat = {}
if "AMD" == True:
freq = lambda n: str(round(int(cls._read(cls._get_hwmon(card) + f"freq{n}_input")) / 1000000)) # MHz freq = lambda n: str(round(int(cls._read(cls._get_hwmon(card) + f"freq{n}_input")) / 1000000)) # MHz
freq = lambda n: round(int(cls._read(cls._get_hwmon(card) + f"freq{n}_input")) / 1000000) # MHz
stat = {}
for (i, name) in cls.stat_nums[card]["freqs"]: for (i, name) in cls.stat_nums[card]["freqs"]:
stat[f"freq{i}"] = (freq(i), name) stat[f"freq{i}"] = (freq(i), name)
else:
stat["freq0"] = (nvml.nvmlDeviceGetClockInfo(card, nvml.NVML_CLOCK_GRAPHICS), "GPU")
stat["freq1"] = (nvml.nvmlDeviceGetClockInfo(card, nvml.NVML_CLOCK_MEM), "MEM")
return stat return stat
@classmethod @classmethod
def _get_fans(cls, card): def _get_fans(cls, card):
stat = {}
if "amd" == True:
fan = lambda n: int(cls._read(cls._get_hwmon(card) + f"fan{n}_input")) # RPM fan = lambda n: int(cls._read(cls._get_hwmon(card) + f"fan{n}_input")) # RPM
stat = {}
for (i, f_max) in cls.stat_nums[card]["fans"]: for (i, f_max) in cls.stat_nums[card]["fans"]:
stat[f"fan{i}"] = (fan(i), f_max) stat[f"fan{i}"] = (fan(i), f_max)
else:
fan = nvml.nvmlDeviceGetFanSpeed(card)
stat[f"fans"] = fan
return stat
@classmethod
def _get_load(cls, card):
stat = {}
if "AMD" == True:
stat = {
"mem": int(cls._read(cls._get_device(card) + "mem_busy_percent")),
"gpu": int(cls._read(cls._get_device(card) + "gpu_busy_percent")),
}
else:
gpu_stats = nvml.nvmlDeviceGetUtilizationRates(card)
stat["mem"] = int(gpu_stats.gpu)
stat["gpu"] = int(gpu_stats.memory)
return stat return stat
@classmethod @classmethod
def _collect(cls): def _collect(cls):
if not cls.populated: cls._get_gpus() if not cls.populated:
if not cls.gpu: return cls._get_gpus()
if not cls.gpu:
return
stat: Dict[str, Dict[str, Any]] = {} stat: Dict[str, Dict[str, Any]] = {}
card = cls.gpu[0] card = cls.gpu[0]
@ -4277,19 +4356,16 @@ class GpuCollector(Collector):
stat["power"] = cls._get_power(card) stat["power"] = cls._get_power(card)
stat["volts"] = cls._get_volts(card) stat["volts"] = cls._get_volts(card)
stat["vitals"] = cls._get_vitals(card) stat["vitals"] = cls._get_vitals(card)
stat["load"] = { stat["load"] = cls._get_load(card)
"mem": int(cls._read(cls._get_device(card) + "mem_busy_percent")),
"gpu": int(cls._read(cls._get_device(card) + "gpu_busy_percent")),
}
cls.stats[card] = stat cls.stats[cls.uuid] = stat
cls.timestamp = time() cls.timestamp = time()
@classmethod @classmethod
def _draw(cls): def _draw(cls):
GpuBox._draw_fg() GpuBox._draw_fg()
class Menu: class Menu:
'''Holds all menus''' '''Holds all menus'''
active: bool = False active: bool = False
@ -5676,7 +5752,7 @@ def temperature(value: int, scale: str = "celsius") -> Tuple[int, str]:
def process_keys(): def process_keys():
mouse_pos: Tuple[int, int] = (0, 0) mouse_pos: Tuple[int, int] = (0, 0)
filtered: bool = False filtered: bool = False
box_keys = {"1" : "cpu", "2" : "mem", "3" : "net", "4" : "proc"} box_keys = {"1": "cpu", "2": "mem", "3": "net", "4": "proc", "5": "gpu"}
while Key.has_key(): while Key.has_key():
key = Key.get() key = Key.get()
found: bool = True found: bool = True