Coverage for /home/antoine/projects/xpra-git/dist/python3/lib64/python/xpra/codecs/cuda_common/cuda_context.py : 36%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# This file is part of Xpra.
3# Copyright (C) 2013-2019 Antoine Martin <antoine@xpra.org>
4# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
5# later version. See the file COPYING for details.
7#@PydevCodeAnalysisIgnore
8#pylint: disable=no-member
10import os
11import pycuda #@UnresolvedImport
12from pycuda import driver #@UnresolvedImport
14from xpra.util import engs, print_nested_dict, envint, csv, first_time
15from xpra.os_util import monotonic_time, load_binary_file
16from xpra.log import Logger
18log = Logger("cuda")
20MIN_FREE_MEMORY = envint("XPRA_CUDA_MIN_FREE_MEMORY", 10)
22#record when we get failures/success:
23DEVICE_STATE = {}
25def record_device_failure(device_id):
26 global DEVICE_STATE
27 DEVICE_STATE[device_id] = False
29def record_device_success(device_id):
30 global DEVICE_STATE
31 DEVICE_STATE[device_id] = True
34def device_info(d):
35 if not d:
36 return "None"
37 return "%s @ %s" % (d.name(), d.pci_bus_id())
39def pci_bus_id(d):
40 if not d:
41 return "None"
42 return d.pci_bus_id()
44def device_name(d):
45 if not d:
46 return "None"
47 return d.name()
49def compute_capability(d):
50 SMmajor, SMminor = d.compute_capability()
51 return (SMmajor<<4) + SMminor
54def get_pycuda_version():
55 return pycuda.VERSION
58def get_pycuda_info():
59 init_all_devices()
60 i = {
61 "version" : {
62 "" : pycuda.VERSION,
63 "text" : pycuda.VERSION_TEXT,
64 }
65 }
66 if pycuda.VERSION_STATUS:
67 i["version.status"] = pycuda.VERSION_STATUS
68 return i
70def get_cuda_info():
71 init_all_devices()
72 return {
73 "driver" : {
74 "version" : driver.get_version(),
75 "driver_version" : driver.get_driver_version(),
76 }
77 }
80DEVICE_INFO = {}
81def get_device_info(i):
82 global DEVICE_INFO
83 return DEVICE_INFO.get(i, None)
84DEVICE_NAME = {}
85def get_device_name(i):
86 global DEVICE_NAME
87 return DEVICE_NAME.get(i, None)
90PREFS = None
91def get_prefs():
92 global PREFS
93 if PREFS is None:
94 PREFS = {}
95 from xpra.platform.paths import get_default_conf_dirs, get_system_conf_dirs, get_user_conf_dirs
96 dirs = get_default_conf_dirs() + get_system_conf_dirs() + get_user_conf_dirs()
97 log("get_prefs() will try to load cuda.conf from: %s", dirs)
98 for d in dirs:
99 conf_file = os.path.join(os.path.expanduser(d), "cuda.conf")
100 if not os.path.exists(conf_file):
101 log("get_prefs() '%s' does not exist!", conf_file)
102 continue
103 if not os.path.isfile(conf_file):
104 log("get_prefs() '%s' is not a file!", conf_file)
105 continue
106 try:
107 c_prefs = {}
108 with open(conf_file, "rb") as f:
109 for line in f:
110 sline = line.strip().rstrip(b'\r\n').strip().decode("latin1")
111 props = sline.split("=", 1)
112 if len(props)!=2:
113 continue
114 name = props[0].strip()
115 value = props[1].strip()
116 if name in ("enabled-devices", "disabled-devices"):
117 for v in value.split(","):
118 c_prefs.setdefault(name, []).append(v.strip())
119 elif name in ("device-id", "device-name", "load-balancing"):
120 c_prefs[name] = value
121 except Exception as e:
122 log.error("Error: cannot read cuda configuration file '%s':", conf_file)
123 log.error(" %s", e)
124 log("get_prefs() '%s' : %s", conf_file, c_prefs)
125 PREFS.update(c_prefs)
126 return PREFS
128def get_pref(name):
129 assert name in ("device-id", "device-name", "enabled-devices", "disabled-devices", "load-balancing")
130 #ie: env_name("device-id")="XPRA_CUDA_DEVICE_ID"
131 env_name = "XPRA_CUDA_%s" % str(name).upper().replace("-", "_")
132 env_value = os.environ.get(env_name)
133 if env_value is not None:
134 if name in ("enabled-devices", "disabled-devices"):
135 return env_value.split(",")
136 return env_value
137 return get_prefs().get(name)
139def get_gpu_list(list_type):
140 v = get_pref(list_type)
141 log("get_gpu_list(%s) pref=%s", list_type, v)
142 if not v:
143 return None
144 if "all" in v:
145 return True
146 if "none" in v:
147 return []
148 def dev(x):
149 try:
150 return int(x)
151 except ValueError:
152 return x.strip()
153 try:
154 return [dev(x) for x in v]
155 except ValueError:
156 log("get_gpu_list(%s)", list_type, exc_info=True)
157 log.error("Error: invalid value for '%s' CUDA preference", list_type)
158 return None
160driver_init_done = None
161def driver_init():
162 global driver_init_done
163 if driver_init_done is None:
164 log.info("CUDA initialization (this may take a few seconds)")
165 try:
166 driver.init()
167 driver_init_done = True
168 log("CUDA driver version=%s", driver.get_driver_version())
169 ngpus = driver.Device.count()
170 if ngpus==0:
171 log.info("CUDA %s / PyCUDA %s, no devices found",
172 ".".join(str(x) for x in driver.get_version()), pycuda.VERSION_TEXT)
173 driver_init_done = True
174 except Exception as e:
175 log.error("Error: cannot initialize CUDA")
176 log.error(" %s", e)
177 driver_init_done = False
178 return driver_init_done
181DEVICES = None
182def init_all_devices():
183 global DEVICES, DEVICE_INFO, DEVICE_NAME
184 if DEVICES is not None:
185 return DEVICES
186 DEVICES = []
187 DEVICE_INFO = {}
188 enabled_gpus = get_gpu_list("enabled-devices")
189 disabled_gpus = get_gpu_list("disabled-devices")
190 if disabled_gpus is True or enabled_gpus==[]:
191 log("all devices are disabled!")
192 return DEVICES
193 log("init_all_devices() enabled: %s, disabled: %s", csv(enabled_gpus), csv(disabled_gpus))
194 if not driver_init():
195 return DEVICES
196 ngpus = driver.Device.count()
197 log("init_all_devices() ngpus=%s", ngpus)
198 if ngpus==0:
199 return DEVICES
200 for i in range(ngpus):
201 #shortcut if this GPU number is disabled:
202 if disabled_gpus is not None and i in disabled_gpus:
203 log("device %i is in the list of disabled gpus, skipped", i)
204 continue
205 device = None
206 devinfo = "gpu %i" % i
207 try:
208 device = driver.Device(i)
209 devinfo = device_info(device)
210 log(" + testing device %s: %s", i, devinfo)
211 DEVICE_NAME[i] = device_name(device)
212 DEVICE_INFO[i] = devinfo
213 if check_device(i, device):
214 DEVICES.append(i)
215 except Exception as e:
216 log.error("error on device %s: %s", devinfo, e)
217 return DEVICES
219def check_device(i, device, min_compute=0):
220 ngpus = driver.Device.count()
221 da = driver.device_attribute
222 devinfo = device_info(device)
223 devname = device_name(device)
224 pci = pci_bus_id(device)
225 host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY)
226 if not host_mem:
227 log.warn("skipping device %s (cannot map host memory)", devinfo)
228 return False
229 compute = compute_capability(device)
230 if compute<min_compute:
231 log("ignoring device %s: compute capability %#x (minimum %#x required)",
232 device_info(device), compute, min_compute)
233 return False
234 enabled_gpus = get_gpu_list("enabled-devices")
235 disabled_gpus = get_gpu_list("disabled-devices")
236 if enabled_gpus not in (None, True) and \
237 i not in enabled_gpus and devname not in enabled_gpus and pci not in enabled_gpus:
238 log("device %i '%s' / '%s' is not in the list of enabled gpus, skipped", i, devname, pci)
239 return False
240 if disabled_gpus is not None and (devname in disabled_gpus or pci in disabled_gpus):
241 log("device '%s' / '%s' is in the list of disabled gpus, skipped", i, devname, pci)
242 return False
243 cf = driver.ctx_flags
244 context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
245 try:
246 log(" created context=%s", context)
247 log(" api version=%s", context.get_api_version())
248 free, total = driver.mem_get_info()
249 log(" memory: free=%sMB, total=%sMB", int(free//1024//1024), int(total//1024//1024))
250 log(" multi-processors: %s, clock rate: %s",
251 device.get_attribute(da.MULTIPROCESSOR_COUNT), device.get_attribute(da.CLOCK_RATE))
252 log(" max block sizes: (%s, %s, %s)",
253 device.get_attribute(da.MAX_BLOCK_DIM_X),
254 device.get_attribute(da.MAX_BLOCK_DIM_Y),
255 device.get_attribute(da.MAX_BLOCK_DIM_Z),
256 )
257 log(" max grid sizes: (%s, %s, %s)",
258 device.get_attribute(da.MAX_GRID_DIM_X),
259 device.get_attribute(da.MAX_GRID_DIM_Y),
260 device.get_attribute(da.MAX_GRID_DIM_Z),
261 )
262 max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH)
263 max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT)
264 log(" maximum texture size: %sx%s", max_width, max_height)
265 log(" max pitch: %s", device.get_attribute(da.MAX_PITCH))
266 SMmajor, SMminor = device.compute_capability()
267 compute = (SMmajor<<4) + SMminor
268 log(" compute capability: %#x (%s.%s)", compute, SMmajor, SMminor)
269 if i==0:
270 #we print the list info "header" from inside the loop
271 #so that the log output is bunched up together
272 log.info("CUDA %s / PyCUDA %s, found %s device%s:",
273 ".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT, ngpus, engs(ngpus))
274 log.info(" + %s (memory: %s%% free, compute: %s.%s)",
275 device_info(device), 100*free//total, SMmajor, SMminor)
276 if SMmajor<2:
277 log.info(" this device is too old!")
278 return False
279 return True
280 finally:
281 context.pop()
284def get_devices():
285 global DEVICES
286 return DEVICES
288def check_devices():
289 devices = init_all_devices()
290 assert devices, "no valid CUDA devices found!"
293def reset_state():
294 log("cuda_context.reset_state()")
295 global DEVICE_STATE
296 DEVICE_STATE = {}
299def select_device(preferred_device_id=-1, min_compute=0):
300 log("select_device(%s, %s)", preferred_device_id, min_compute)
301 for device_id in (preferred_device_id, get_pref("device-id")):
302 if device_id is not None and device_id>=0:
303 #try to honour the device specified:
304 try:
305 device, context, tpct = load_device(device_id)
306 finally:
307 context.pop()
308 context.detach()
309 if min_compute>0:
310 compute = compute_capability(device)
311 if compute<min_compute:
312 log.warn("Warning: GPU device %i only supports compute %#x", device_id, compute)
313 if tpct<MIN_FREE_MEMORY:
314 log.warn("Warning: GPU device %i is low on memory: %i%%", device_id, tpct)
315 return device_id, device
316 load_balancing = get_pref("load-balancing")
317 log("load-balancing=%s", load_balancing)
318 if load_balancing=="round-robin":
319 return select_round_robin(min_compute)
320 if load_balancing!="memory" and first_time("cuda-load-balancing"):
321 log.warn("Warning: invalid load balancing value '%s'", load_balancing)
322 return select_best_free_memory(min_compute)
324rr = 0
325def select_round_robin(min_compute):
326 if not driver_init():
327 return -1, None
328 enabled_gpus = get_gpu_list("enabled-devices")
329 disabled_gpus = get_gpu_list("disabled-devices")
330 if disabled_gpus is True or enabled_gpus==[]:
331 log("all devices are disabled!")
332 return -1, None
333 ngpus = driver.Device.count()
334 if ngpus==0:
335 return -1, None
336 devices = list(range(ngpus))
337 global rr
338 i = rr
339 while devices:
340 n = len(devices)
341 i = (rr+1) % n
342 device_id = devices[i]
343 device = driver.Device(device_id)
344 if check_device(device_id, device, min_compute):
345 break
346 devices.remove(device_id)
347 rr = i
348 return device_id, device
351def select_best_free_memory(min_compute=0):
352 #load preferences:
353 preferred_device_name = get_pref("device-name")
354 devices = init_all_devices()
355 global DEVICE_STATE
356 free_pct = 0
357 #split device list according to device state:
358 ok_devices = [device_id for device_id in devices if DEVICE_STATE.get(device_id, True) is True]
359 nok_devices = [device_id for device_id in devices if DEVICE_STATE.get(device_id, True) is not True]
360 for list_name, device_list in {"OK" : ok_devices, "failing" : nok_devices}.items():
361 selected_device_id = -1
362 selected_device = None
363 log("will test %s device%s from %s list: %s", len(device_list), engs(device_list), list_name, device_list)
364 for device_id in device_list:
365 context = None
366 try:
367 device, context, tpct = load_device(device_id)
368 compute = compute_capability(device)
369 if compute<min_compute:
370 log("ignoring device %s: compute capability %#x (minimum %#x required)",
371 device_info(device), compute, min_compute)
372 elif preferred_device_name and device_info(device).find(preferred_device_name)>=0:
373 log("device matches preferred device name: %s", preferred_device_name)
374 return device_id, device
375 elif tpct>=MIN_FREE_MEMORY and tpct>free_pct:
376 log("device has enough free memory: %i (min=%i, current best device=%i)",
377 tpct, MIN_FREE_MEMORY, free_pct)
378 selected_device = device
379 selected_device_id = device_id
380 free_pct = tpct
381 finally:
382 if context:
383 context.pop()
384 context.detach()
385 if selected_device_id>=0 and selected_device:
386 l = log
387 if len(devices)>1:
388 l = log.info
389 l("selected device %s: %s", selected_device_id, device_info(selected_device))
390 return selected_device_id, selected_device
391 return -1, None
393def load_device(device_id):
394 log("load_device(%i)", device_id)
395 device = driver.Device(device_id)
396 log("select_device: testing device %s: %s", device_id, device_info(device))
397 cf = driver.ctx_flags
398 context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
399 log("created context=%s", context)
400 free, total = driver.mem_get_info()
401 log("memory: free=%sMB, total=%sMB", int(free/1024/1024), int(total/1024/1024))
402 tpct = 100*free//total
403 return device, context, tpct
407CUDA_ERRORS_INFO = {
408 #this list is taken from the CUDA 7.0 SDK header file,
409 #so we don't have to build against CUDA (lacks pkgconfig anyway)
410 #and so we don't have to worry about which version of the SDK we link against either
411 0 : "SUCCESS",
412 1 : "INVALID_VALUE",
413 2 : "OUT_OF_MEMORY",
414 3 : "NOT_INITIALIZED",
415 4 : "DEINITIALIZED",
416 5 : "PROFILER_DISABLED",
417 6 : "PROFILER_NOT_INITIALIZED",
418 7 : "PROFILER_ALREADY_STARTED",
419 8 : "PROFILER_ALREADY_STOPPED",
420 100 : "NO_DEVICE",
421 101 : "INVALID_DEVICE",
422 200 : "INVALID_IMAGE",
423 201 : "INVALID_CONTEXT",
424 202 : "CONTEXT_ALREADY_CURRENT",
425 205 : "MAP_FAILED",
426 206 : "UNMAP_FAILED",
427 207 : "ARRAY_IS_MAPPED",
428 208 : "ALREADY_MAPPED",
429 209 : "NO_BINARY_FOR_GPU",
430 210 : "ALREADY_ACQUIRED",
431 211 : "NOT_MAPPED",
432 212 : "NOT_MAPPED_AS_ARRAY",
433 213 : "NOT_MAPPED_AS_POINTER",
434 214 : "ECC_UNCORRECTABLE",
435 215 : "UNSUPPORTED_LIMIT",
436 216 : "CONTEXT_ALREADY_IN_USE",
437 217 : "PEER_ACCESS_UNSUPPORTED",
438 218 : "INVALID_PTX",
439 219 : "INVALID_GRAPHICS_CONTEXT",
440 300 : "INVALID_SOURCE",
441 301 : "FILE_NOT_FOUND",
442 302 : "SHARED_OBJECT_SYMBOL_NOT_FOUND",
443 303 : "SHARED_OBJECT_INIT_FAILED",
444 304 : "OPERATING_SYSTEM",
445 400 : "INVALID_HANDLE",
446 500 : "NOT_FOUND",
447 600 : "NOT_READY",
448 700 : "ILLEGAL_ADDRESS",
449 701 : "LAUNCH_OUT_OF_RESOURCES",
450 702 : "LAUNCH_TIMEOUT",
451 703 : "LAUNCH_INCOMPATIBLE_TEXTURING",
452 704 : "PEER_ACCESS_ALREADY_ENABLED",
453 705 : "PEER_ACCESS_NOT_ENABLED",
454 708 : "PRIMARY_CONTEXT_ACTIVE",
455 709 : "CONTEXT_IS_DESTROYED",
456 710 : "ASSERT",
457 711 : "TOO_MANY_PEERS",
458 712 : "HOST_MEMORY_ALREADY_REGISTERED",
459 713 : "HOST_MEMORY_NOT_REGISTERED",
460 714 : "HARDWARE_STACK_ERROR",
461 715 : "ILLEGAL_INSTRUCTION",
462 716 : "MISALIGNED_ADDRESS",
463 717 : "INVALID_ADDRESS_SPACE",
464 718 : "INVALID_PC",
465 719 : "LAUNCH_FAILED",
466 800 : "NOT_PERMITTED",
467 801 : "NOT_SUPPORTED",
468 999 : "UNKNOWN",
469 }
472#cache kernel fatbin files:
473KERNELS = {}
474def get_CUDA_function(device_id, function_name):
475 """
476 Returns the compiled kernel for the given device
477 and kernel key.
478 """
479 global KERNELS
480 data = KERNELS.get(function_name)
481 if data is None:
482 from xpra.platform.paths import get_resources_dir
483 cubin_file = os.path.join(get_resources_dir(), "cuda", "%s.fatbin" % function_name)
484 log("get_CUDA_function(%s, %s) cubin file=%s", device_id, function_name, cubin_file)
485 data = load_binary_file(cubin_file)
486 if not data:
487 log.error("Error: failed to load CUDA bin file '%s'", cubin_file)
488 return None
489 log(" loaded %s bytes", len(data))
490 KERNELS[function_name] = data
491 #now load from cubin:
492 start = monotonic_time()
493 try:
494 mod = driver.module_from_buffer(data)
495 except Exception as e:
496 log("module_from_buffer(%s)", data, exc_info=True)
497 log.error("Error: failed to load module from buffer for '%s'", function_name)
498 log.error(" %s", e)
499 return None
500 log("get_CUDA_function(%s, %s) module=%s", device_id, function_name, mod)
501 try:
502 fn = function_name
503 CUDA_function = mod.get_function(fn)
504 except driver.LogicError as e:
505 raise Exception("failed to load '%s' from %s: %s" % (function_name, mod, e)) from None
506 end = monotonic_time()
507 log("loading function %s from pre-compiled cubin took %.1fms", function_name, 1000.0*(end-start))
508 return CUDA_function
511def main():
512 import sys
513 if "-v" in sys.argv or "--verbose" in sys.argv:
514 log.enable_debug()
516 from xpra.platform import program_context
517 with program_context("CUDA-Info", "CUDA Info"):
518 pycuda_info = get_pycuda_info()
519 log.info("pycuda_info")
520 print_nested_dict(pycuda_info, print_fn=log.info)
521 log.info("cuda_info")
522 print_nested_dict(get_cuda_info(), print_fn=log.info)
523 log.info("preferences:")
524 print_nested_dict(get_prefs(), print_fn=log.info)
525 log.info("device automatically selected:")
526 log.info(" %s", device_info(select_device()[1]))
528if __name__ == "__main__":
529 main()