Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2# This file is part of Xpra. 

3# Copyright (C) 2013-2019 Antoine Martin <antoine@xpra.org> 

4# Xpra is released under the terms of the GNU GPL v2, or, at your option, any 

5# later version. See the file COPYING for details. 

6 

7#@PydevCodeAnalysisIgnore 

8#pylint: disable=no-member 

9 

10import os 

11import pycuda #@UnresolvedImport 

12from pycuda import driver #@UnresolvedImport 

13 

14from xpra.util import engs, print_nested_dict, envint, csv, first_time 

15from xpra.os_util import monotonic_time, load_binary_file 

16from xpra.log import Logger 

17 

18log = Logger("cuda") 

19 

20MIN_FREE_MEMORY = envint("XPRA_CUDA_MIN_FREE_MEMORY", 10) 

21 

22#record when we get failures/success: 

23DEVICE_STATE = {} 

24 

25def record_device_failure(device_id): 

26 global DEVICE_STATE 

27 DEVICE_STATE[device_id] = False 

28 

29def record_device_success(device_id): 

30 global DEVICE_STATE 

31 DEVICE_STATE[device_id] = True 

32 

33 

34def device_info(d): 

35 if not d: 

36 return "None" 

37 return "%s @ %s" % (d.name(), d.pci_bus_id()) 

38 

39def pci_bus_id(d): 

40 if not d: 

41 return "None" 

42 return d.pci_bus_id() 

43 

44def device_name(d): 

45 if not d: 

46 return "None" 

47 return d.name() 

48 

49def compute_capability(d): 

50 SMmajor, SMminor = d.compute_capability() 

51 return (SMmajor<<4) + SMminor 

52 

53 

54def get_pycuda_version(): 

55 return pycuda.VERSION 

56 

57 

58def get_pycuda_info(): 

59 init_all_devices() 

60 i = { 

61 "version" : { 

62 "" : pycuda.VERSION, 

63 "text" : pycuda.VERSION_TEXT, 

64 } 

65 } 

66 if pycuda.VERSION_STATUS: 

67 i["version.status"] = pycuda.VERSION_STATUS 

68 return i 

69 

70def get_cuda_info(): 

71 init_all_devices() 

72 return { 

73 "driver" : { 

74 "version" : driver.get_version(), 

75 "driver_version" : driver.get_driver_version(), 

76 } 

77 } 

78 

79 

80DEVICE_INFO = {} 

81def get_device_info(i): 

82 global DEVICE_INFO 

83 return DEVICE_INFO.get(i, None) 

84DEVICE_NAME = {} 

85def get_device_name(i): 

86 global DEVICE_NAME 

87 return DEVICE_NAME.get(i, None) 

88 

89 

90PREFS = None 

91def get_prefs(): 

92 global PREFS 

93 if PREFS is None: 

94 PREFS = {} 

95 from xpra.platform.paths import get_default_conf_dirs, get_system_conf_dirs, get_user_conf_dirs 

96 dirs = get_default_conf_dirs() + get_system_conf_dirs() + get_user_conf_dirs() 

97 log("get_prefs() will try to load cuda.conf from: %s", dirs) 

98 for d in dirs: 

99 conf_file = os.path.join(os.path.expanduser(d), "cuda.conf") 

100 if not os.path.exists(conf_file): 

101 log("get_prefs() '%s' does not exist!", conf_file) 

102 continue 

103 if not os.path.isfile(conf_file): 

104 log("get_prefs() '%s' is not a file!", conf_file) 

105 continue 

106 try: 

107 c_prefs = {} 

108 with open(conf_file, "rb") as f: 

109 for line in f: 

110 sline = line.strip().rstrip(b'\r\n').strip().decode("latin1") 

111 props = sline.split("=", 1) 

112 if len(props)!=2: 

113 continue 

114 name = props[0].strip() 

115 value = props[1].strip() 

116 if name in ("enabled-devices", "disabled-devices"): 

117 for v in value.split(","): 

118 c_prefs.setdefault(name, []).append(v.strip()) 

119 elif name in ("device-id", "device-name", "load-balancing"): 

120 c_prefs[name] = value 

121 except Exception as e: 

122 log.error("Error: cannot read cuda configuration file '%s':", conf_file) 

123 log.error(" %s", e) 

124 log("get_prefs() '%s' : %s", conf_file, c_prefs) 

125 PREFS.update(c_prefs) 

126 return PREFS 

127 

128def get_pref(name): 

129 assert name in ("device-id", "device-name", "enabled-devices", "disabled-devices", "load-balancing") 

130 #ie: env_name("device-id")="XPRA_CUDA_DEVICE_ID" 

131 env_name = "XPRA_CUDA_%s" % str(name).upper().replace("-", "_") 

132 env_value = os.environ.get(env_name) 

133 if env_value is not None: 

134 if name in ("enabled-devices", "disabled-devices"): 

135 return env_value.split(",") 

136 return env_value 

137 return get_prefs().get(name) 

138 

139def get_gpu_list(list_type): 

140 v = get_pref(list_type) 

141 log("get_gpu_list(%s) pref=%s", list_type, v) 

142 if not v: 

143 return None 

144 if "all" in v: 

145 return True 

146 if "none" in v: 

147 return [] 

148 def dev(x): 

149 try: 

150 return int(x) 

151 except ValueError: 

152 return x.strip() 

153 try: 

154 return [dev(x) for x in v] 

155 except ValueError: 

156 log("get_gpu_list(%s)", list_type, exc_info=True) 

157 log.error("Error: invalid value for '%s' CUDA preference", list_type) 

158 return None 

159 

160driver_init_done = None 

161def driver_init(): 

162 global driver_init_done 

163 if driver_init_done is None: 

164 log.info("CUDA initialization (this may take a few seconds)") 

165 try: 

166 driver.init() 

167 driver_init_done = True 

168 log("CUDA driver version=%s", driver.get_driver_version()) 

169 ngpus = driver.Device.count() 

170 if ngpus==0: 

171 log.info("CUDA %s / PyCUDA %s, no devices found", 

172 ".".join(str(x) for x in driver.get_version()), pycuda.VERSION_TEXT) 

173 driver_init_done = True 

174 except Exception as e: 

175 log.error("Error: cannot initialize CUDA") 

176 log.error(" %s", e) 

177 driver_init_done = False 

178 return driver_init_done 

179 

180 

181DEVICES = None 

182def init_all_devices(): 

183 global DEVICES, DEVICE_INFO, DEVICE_NAME 

184 if DEVICES is not None: 

185 return DEVICES 

186 DEVICES = [] 

187 DEVICE_INFO = {} 

188 enabled_gpus = get_gpu_list("enabled-devices") 

189 disabled_gpus = get_gpu_list("disabled-devices") 

190 if disabled_gpus is True or enabled_gpus==[]: 

191 log("all devices are disabled!") 

192 return DEVICES 

193 log("init_all_devices() enabled: %s, disabled: %s", csv(enabled_gpus), csv(disabled_gpus)) 

194 if not driver_init(): 

195 return DEVICES 

196 ngpus = driver.Device.count() 

197 log("init_all_devices() ngpus=%s", ngpus) 

198 if ngpus==0: 

199 return DEVICES 

200 for i in range(ngpus): 

201 #shortcut if this GPU number is disabled: 

202 if disabled_gpus is not None and i in disabled_gpus: 

203 log("device %i is in the list of disabled gpus, skipped", i) 

204 continue 

205 device = None 

206 devinfo = "gpu %i" % i 

207 try: 

208 device = driver.Device(i) 

209 devinfo = device_info(device) 

210 log(" + testing device %s: %s", i, devinfo) 

211 DEVICE_NAME[i] = device_name(device) 

212 DEVICE_INFO[i] = devinfo 

213 if check_device(i, device): 

214 DEVICES.append(i) 

215 except Exception as e: 

216 log.error("error on device %s: %s", devinfo, e) 

217 return DEVICES 

218 

219def check_device(i, device, min_compute=0): 

220 ngpus = driver.Device.count() 

221 da = driver.device_attribute 

222 devinfo = device_info(device) 

223 devname = device_name(device) 

224 pci = pci_bus_id(device) 

225 host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY) 

226 if not host_mem: 

227 log.warn("skipping device %s (cannot map host memory)", devinfo) 

228 return False 

229 compute = compute_capability(device) 

230 if compute<min_compute: 

231 log("ignoring device %s: compute capability %#x (minimum %#x required)", 

232 device_info(device), compute, min_compute) 

233 return False 

234 enabled_gpus = get_gpu_list("enabled-devices") 

235 disabled_gpus = get_gpu_list("disabled-devices") 

236 if enabled_gpus not in (None, True) and \ 

237 i not in enabled_gpus and devname not in enabled_gpus and pci not in enabled_gpus: 

238 log("device %i '%s' / '%s' is not in the list of enabled gpus, skipped", i, devname, pci) 

239 return False 

240 if disabled_gpus is not None and (devname in disabled_gpus or pci in disabled_gpus): 

241 log("device '%s' / '%s' is in the list of disabled gpus, skipped", i, devname, pci) 

242 return False 

243 cf = driver.ctx_flags 

244 context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST) 

245 try: 

246 log(" created context=%s", context) 

247 log(" api version=%s", context.get_api_version()) 

248 free, total = driver.mem_get_info() 

249 log(" memory: free=%sMB, total=%sMB", int(free//1024//1024), int(total//1024//1024)) 

250 log(" multi-processors: %s, clock rate: %s", 

251 device.get_attribute(da.MULTIPROCESSOR_COUNT), device.get_attribute(da.CLOCK_RATE)) 

252 log(" max block sizes: (%s, %s, %s)", 

253 device.get_attribute(da.MAX_BLOCK_DIM_X), 

254 device.get_attribute(da.MAX_BLOCK_DIM_Y), 

255 device.get_attribute(da.MAX_BLOCK_DIM_Z), 

256 ) 

257 log(" max grid sizes: (%s, %s, %s)", 

258 device.get_attribute(da.MAX_GRID_DIM_X), 

259 device.get_attribute(da.MAX_GRID_DIM_Y), 

260 device.get_attribute(da.MAX_GRID_DIM_Z), 

261 ) 

262 max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH) 

263 max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT) 

264 log(" maximum texture size: %sx%s", max_width, max_height) 

265 log(" max pitch: %s", device.get_attribute(da.MAX_PITCH)) 

266 SMmajor, SMminor = device.compute_capability() 

267 compute = (SMmajor<<4) + SMminor 

268 log(" compute capability: %#x (%s.%s)", compute, SMmajor, SMminor) 

269 if i==0: 

270 #we print the list info "header" from inside the loop 

271 #so that the log output is bunched up together 

272 log.info("CUDA %s / PyCUDA %s, found %s device%s:", 

273 ".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT, ngpus, engs(ngpus)) 

274 log.info(" + %s (memory: %s%% free, compute: %s.%s)", 

275 device_info(device), 100*free//total, SMmajor, SMminor) 

276 if SMmajor<2: 

277 log.info(" this device is too old!") 

278 return False 

279 return True 

280 finally: 

281 context.pop() 

282 

283 

284def get_devices(): 

285 global DEVICES 

286 return DEVICES 

287 

288def check_devices(): 

289 devices = init_all_devices() 

290 assert devices, "no valid CUDA devices found!" 

291 

292 

293def reset_state(): 

294 log("cuda_context.reset_state()") 

295 global DEVICE_STATE 

296 DEVICE_STATE = {} 

297 

298 

299def select_device(preferred_device_id=-1, min_compute=0): 

300 log("select_device(%s, %s)", preferred_device_id, min_compute) 

301 for device_id in (preferred_device_id, get_pref("device-id")): 

302 if device_id is not None and device_id>=0: 

303 #try to honour the device specified: 

304 try: 

305 device, context, tpct = load_device(device_id) 

306 finally: 

307 context.pop() 

308 context.detach() 

309 if min_compute>0: 

310 compute = compute_capability(device) 

311 if compute<min_compute: 

312 log.warn("Warning: GPU device %i only supports compute %#x", device_id, compute) 

313 if tpct<MIN_FREE_MEMORY: 

314 log.warn("Warning: GPU device %i is low on memory: %i%%", device_id, tpct) 

315 return device_id, device 

316 load_balancing = get_pref("load-balancing") 

317 log("load-balancing=%s", load_balancing) 

318 if load_balancing=="round-robin": 

319 return select_round_robin(min_compute) 

320 if load_balancing!="memory" and first_time("cuda-load-balancing"): 

321 log.warn("Warning: invalid load balancing value '%s'", load_balancing) 

322 return select_best_free_memory(min_compute) 

323 

324rr = 0 

325def select_round_robin(min_compute): 

326 if not driver_init(): 

327 return -1, None 

328 enabled_gpus = get_gpu_list("enabled-devices") 

329 disabled_gpus = get_gpu_list("disabled-devices") 

330 if disabled_gpus is True or enabled_gpus==[]: 

331 log("all devices are disabled!") 

332 return -1, None 

333 ngpus = driver.Device.count() 

334 if ngpus==0: 

335 return -1, None 

336 devices = list(range(ngpus)) 

337 global rr 

338 i = rr 

339 while devices: 

340 n = len(devices) 

341 i = (rr+1) % n 

342 device_id = devices[i] 

343 device = driver.Device(device_id) 

344 if check_device(device_id, device, min_compute): 

345 break 

346 devices.remove(device_id) 

347 rr = i 

348 return device_id, device 

349 

350 

351def select_best_free_memory(min_compute=0): 

352 #load preferences: 

353 preferred_device_name = get_pref("device-name") 

354 devices = init_all_devices() 

355 global DEVICE_STATE 

356 free_pct = 0 

357 #split device list according to device state: 

358 ok_devices = [device_id for device_id in devices if DEVICE_STATE.get(device_id, True) is True] 

359 nok_devices = [device_id for device_id in devices if DEVICE_STATE.get(device_id, True) is not True] 

360 for list_name, device_list in {"OK" : ok_devices, "failing" : nok_devices}.items(): 

361 selected_device_id = -1 

362 selected_device = None 

363 log("will test %s device%s from %s list: %s", len(device_list), engs(device_list), list_name, device_list) 

364 for device_id in device_list: 

365 context = None 

366 try: 

367 device, context, tpct = load_device(device_id) 

368 compute = compute_capability(device) 

369 if compute<min_compute: 

370 log("ignoring device %s: compute capability %#x (minimum %#x required)", 

371 device_info(device), compute, min_compute) 

372 elif preferred_device_name and device_info(device).find(preferred_device_name)>=0: 

373 log("device matches preferred device name: %s", preferred_device_name) 

374 return device_id, device 

375 elif tpct>=MIN_FREE_MEMORY and tpct>free_pct: 

376 log("device has enough free memory: %i (min=%i, current best device=%i)", 

377 tpct, MIN_FREE_MEMORY, free_pct) 

378 selected_device = device 

379 selected_device_id = device_id 

380 free_pct = tpct 

381 finally: 

382 if context: 

383 context.pop() 

384 context.detach() 

385 if selected_device_id>=0 and selected_device: 

386 l = log 

387 if len(devices)>1: 

388 l = log.info 

389 l("selected device %s: %s", selected_device_id, device_info(selected_device)) 

390 return selected_device_id, selected_device 

391 return -1, None 

392 

393def load_device(device_id): 

394 log("load_device(%i)", device_id) 

395 device = driver.Device(device_id) 

396 log("select_device: testing device %s: %s", device_id, device_info(device)) 

397 cf = driver.ctx_flags 

398 context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST) 

399 log("created context=%s", context) 

400 free, total = driver.mem_get_info() 

401 log("memory: free=%sMB, total=%sMB", int(free/1024/1024), int(total/1024/1024)) 

402 tpct = 100*free//total 

403 return device, context, tpct 

404 

405 

406 

407CUDA_ERRORS_INFO = { 

408 #this list is taken from the CUDA 7.0 SDK header file, 

409 #so we don't have to build against CUDA (lacks pkgconfig anyway) 

410 #and so we don't have to worry about which version of the SDK we link against either 

411 0 : "SUCCESS", 

412 1 : "INVALID_VALUE", 

413 2 : "OUT_OF_MEMORY", 

414 3 : "NOT_INITIALIZED", 

415 4 : "DEINITIALIZED", 

416 5 : "PROFILER_DISABLED", 

417 6 : "PROFILER_NOT_INITIALIZED", 

418 7 : "PROFILER_ALREADY_STARTED", 

419 8 : "PROFILER_ALREADY_STOPPED", 

420 100 : "NO_DEVICE", 

421 101 : "INVALID_DEVICE", 

422 200 : "INVALID_IMAGE", 

423 201 : "INVALID_CONTEXT", 

424 202 : "CONTEXT_ALREADY_CURRENT", 

425 205 : "MAP_FAILED", 

426 206 : "UNMAP_FAILED", 

427 207 : "ARRAY_IS_MAPPED", 

428 208 : "ALREADY_MAPPED", 

429 209 : "NO_BINARY_FOR_GPU", 

430 210 : "ALREADY_ACQUIRED", 

431 211 : "NOT_MAPPED", 

432 212 : "NOT_MAPPED_AS_ARRAY", 

433 213 : "NOT_MAPPED_AS_POINTER", 

434 214 : "ECC_UNCORRECTABLE", 

435 215 : "UNSUPPORTED_LIMIT", 

436 216 : "CONTEXT_ALREADY_IN_USE", 

437 217 : "PEER_ACCESS_UNSUPPORTED", 

438 218 : "INVALID_PTX", 

439 219 : "INVALID_GRAPHICS_CONTEXT", 

440 300 : "INVALID_SOURCE", 

441 301 : "FILE_NOT_FOUND", 

442 302 : "SHARED_OBJECT_SYMBOL_NOT_FOUND", 

443 303 : "SHARED_OBJECT_INIT_FAILED", 

444 304 : "OPERATING_SYSTEM", 

445 400 : "INVALID_HANDLE", 

446 500 : "NOT_FOUND", 

447 600 : "NOT_READY", 

448 700 : "ILLEGAL_ADDRESS", 

449 701 : "LAUNCH_OUT_OF_RESOURCES", 

450 702 : "LAUNCH_TIMEOUT", 

451 703 : "LAUNCH_INCOMPATIBLE_TEXTURING", 

452 704 : "PEER_ACCESS_ALREADY_ENABLED", 

453 705 : "PEER_ACCESS_NOT_ENABLED", 

454 708 : "PRIMARY_CONTEXT_ACTIVE", 

455 709 : "CONTEXT_IS_DESTROYED", 

456 710 : "ASSERT", 

457 711 : "TOO_MANY_PEERS", 

458 712 : "HOST_MEMORY_ALREADY_REGISTERED", 

459 713 : "HOST_MEMORY_NOT_REGISTERED", 

460 714 : "HARDWARE_STACK_ERROR", 

461 715 : "ILLEGAL_INSTRUCTION", 

462 716 : "MISALIGNED_ADDRESS", 

463 717 : "INVALID_ADDRESS_SPACE", 

464 718 : "INVALID_PC", 

465 719 : "LAUNCH_FAILED", 

466 800 : "NOT_PERMITTED", 

467 801 : "NOT_SUPPORTED", 

468 999 : "UNKNOWN", 

469 } 

470 

471 

472#cache kernel fatbin files: 

473KERNELS = {} 

474def get_CUDA_function(device_id, function_name): 

475 """ 

476 Returns the compiled kernel for the given device 

477 and kernel key. 

478 """ 

479 global KERNELS 

480 data = KERNELS.get(function_name) 

481 if data is None: 

482 from xpra.platform.paths import get_resources_dir 

483 cubin_file = os.path.join(get_resources_dir(), "cuda", "%s.fatbin" % function_name) 

484 log("get_CUDA_function(%s, %s) cubin file=%s", device_id, function_name, cubin_file) 

485 data = load_binary_file(cubin_file) 

486 if not data: 

487 log.error("Error: failed to load CUDA bin file '%s'", cubin_file) 

488 return None 

489 log(" loaded %s bytes", len(data)) 

490 KERNELS[function_name] = data 

491 #now load from cubin: 

492 start = monotonic_time() 

493 try: 

494 mod = driver.module_from_buffer(data) 

495 except Exception as e: 

496 log("module_from_buffer(%s)", data, exc_info=True) 

497 log.error("Error: failed to load module from buffer for '%s'", function_name) 

498 log.error(" %s", e) 

499 return None 

500 log("get_CUDA_function(%s, %s) module=%s", device_id, function_name, mod) 

501 try: 

502 fn = function_name 

503 CUDA_function = mod.get_function(fn) 

504 except driver.LogicError as e: 

505 raise Exception("failed to load '%s' from %s: %s" % (function_name, mod, e)) from None 

506 end = monotonic_time() 

507 log("loading function %s from pre-compiled cubin took %.1fms", function_name, 1000.0*(end-start)) 

508 return CUDA_function 

509 

510 

511def main(): 

512 import sys 

513 if "-v" in sys.argv or "--verbose" in sys.argv: 

514 log.enable_debug() 

515 

516 from xpra.platform import program_context 

517 with program_context("CUDA-Info", "CUDA Info"): 

518 pycuda_info = get_pycuda_info() 

519 log.info("pycuda_info") 

520 print_nested_dict(pycuda_info, print_fn=log.info) 

521 log.info("cuda_info") 

522 print_nested_dict(get_cuda_info(), print_fn=log.info) 

523 log.info("preferences:") 

524 print_nested_dict(get_prefs(), print_fn=log.info) 

525 log.info("device automatically selected:") 

526 log.info(" %s", device_info(select_device()[1])) 

527 

528if __name__ == "__main__": 

529 main()