tinygrad 0.9.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. tinygrad/codegen/__init__.py +0 -0
  2. tinygrad/codegen/kernel.py +78 -90
  3. tinygrad/codegen/linearizer.py +237 -169
  4. tinygrad/codegen/uops.py +278 -242
  5. tinygrad/device.py +147 -10
  6. tinygrad/dtype.py +7 -7
  7. tinygrad/engine/graph.py +16 -16
  8. tinygrad/engine/jit.py +39 -36
  9. tinygrad/engine/realize.py +6 -5
  10. tinygrad/engine/schedule.py +15 -7
  11. tinygrad/engine/search.py +6 -3
  12. tinygrad/function.py +17 -23
  13. tinygrad/helpers.py +77 -8
  14. tinygrad/lazy.py +26 -26
  15. tinygrad/multi.py +13 -9
  16. tinygrad/nn/__init__.py +1 -1
  17. tinygrad/nn/datasets.py +2 -1
  18. tinygrad/nn/state.py +3 -4
  19. tinygrad/ops.py +49 -16
  20. tinygrad/renderer/__init__.py +8 -4
  21. tinygrad/renderer/assembly.py +93 -100
  22. tinygrad/renderer/cstyle.py +47 -42
  23. tinygrad/renderer/llvmir.py +30 -30
  24. tinygrad/runtime/__init__.py +0 -0
  25. tinygrad/runtime/autogen/amd_gpu.py +11504 -1
  26. tinygrad/runtime/autogen/comgr.py +36 -10
  27. tinygrad/runtime/autogen/hsa.py +146 -14
  28. tinygrad/runtime/autogen/io_uring.py +1486 -0
  29. tinygrad/runtime/autogen/nv_gpu.py +269 -0
  30. tinygrad/runtime/driver/__init__.py +0 -0
  31. tinygrad/runtime/driver/hip_comgr.py +20 -11
  32. tinygrad/runtime/graph/__init__.py +0 -0
  33. tinygrad/runtime/graph/clang.py +3 -2
  34. tinygrad/runtime/graph/cuda.py +2 -2
  35. tinygrad/runtime/graph/hcq.py +122 -78
  36. tinygrad/runtime/ops_amd.py +302 -316
  37. tinygrad/runtime/ops_cuda.py +3 -3
  38. tinygrad/runtime/ops_disk.py +70 -5
  39. tinygrad/runtime/ops_gpu.py +2 -2
  40. tinygrad/runtime/ops_metal.py +5 -6
  41. tinygrad/runtime/ops_npy.py +1 -1
  42. tinygrad/runtime/ops_nv.py +161 -166
  43. tinygrad/runtime/ops_python.py +20 -16
  44. tinygrad/shape/__init__.py +0 -0
  45. tinygrad/shape/shapetracker.py +5 -2
  46. tinygrad/shape/symbolic.py +1 -3
  47. tinygrad/shape/view.py +34 -19
  48. tinygrad/tensor.py +219 -135
  49. {tinygrad-0.9.0.dist-info → tinygrad-0.9.1.dist-info}/METADATA +14 -6
  50. tinygrad-0.9.1.dist-info/RECORD +63 -0
  51. {tinygrad-0.9.0.dist-info → tinygrad-0.9.1.dist-info}/WHEEL +1 -1
  52. tinygrad/runtime/driver/hsa.py +0 -143
  53. tinygrad/runtime/graph/hsa.py +0 -171
  54. tinygrad/runtime/ops_hsa.py +0 -278
  55. tinygrad-0.9.0.dist-info/RECORD +0 -60
  56. {tinygrad-0.9.0.dist-info → tinygrad-0.9.1.dist-info}/LICENSE +0 -0
  57. {tinygrad-0.9.0.dist-info → tinygrad-0.9.1.dist-info}/top_level.txt +0 -0
@@ -1,278 +0,0 @@
1
- from __future__ import annotations
2
- import ctypes, functools, subprocess, io, atexit, collections, json
3
- from typing import Tuple, TypeVar, List, Dict, Any
4
- import tinygrad.runtime.autogen.hsa as hsa
5
- from tinygrad.helpers import DEBUG, init_c_var, from_mv, round_up, to_mv, init_c_struct_t, getenv
6
- from tinygrad.device import Compiled, Compiler, CompileError, BufferOptions, LRUAllocator
7
- from tinygrad.renderer.cstyle import HIPRenderer
8
- from tinygrad.runtime.driver.hsa import check, scan_agents, find_memory_pool, AQLQueue
9
- from tinygrad.runtime.driver.hip_comgr import compile_hip
10
- if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401
11
-
12
- PROFILE = getenv("PROFILE", 0)
13
-
14
- class HSAProfiler:
15
- def __init__(self):
16
- self.tracked_signals = collections.defaultdict(list)
17
- self.collected_events: List[Tuple[Any, ...]] = []
18
- self.copy_timings = hsa.hsa_amd_profiling_async_copy_time_t()
19
- self.disp_timings = hsa.hsa_amd_profiling_dispatch_time_t()
20
-
21
- def track(self, signal, device, name, is_copy=False): self.tracked_signals[device].append((signal, name, is_copy))
22
- def process(self, device):
23
- # Process all tracked signals, should be called before any of tracked signals are reused.
24
- for sig,name,is_copy in self.tracked_signals[device]:
25
- if is_copy: check(hsa.hsa_amd_profiling_get_async_copy_time(sig, ctypes.byref(timings := self.copy_timings)))
26
- else: check(hsa.hsa_amd_profiling_get_dispatch_time(device.agent, sig, ctypes.byref(timings := self.disp_timings))) #type:ignore
27
- self.collected_events.append((device.device_id, 1 if is_copy else 0, name, timings.start, timings.end))
28
- self.tracked_signals.pop(device)
29
-
30
- def save(self, path):
31
- mjson = []
32
- for i in range(len(HSADevice.devices)):
33
- mjson.append({"name": "process_name", "ph": "M", "pid": i, "args": {"name": "HSA"}})
34
- mjson.append({"name": "thread_name", "ph": "M", "pid": i, "tid": 0, "args": {"name": "AQL"}})
35
- mjson.append({"name": "thread_name", "ph": "M", "pid": i, "tid": 1, "args": {"name": "SDMA"}})
36
-
37
- for dev_id,queue_id,name,st,et in self.collected_events:
38
- mjson.append({"name": name, "ph": "B", "pid": dev_id, "tid": queue_id, "ts": st*1e-3})
39
- mjson.append({"name": name, "ph": "E", "pid": dev_id, "tid": queue_id, "ts": et*1e-3})
40
- with open(path, "w") as f: f.write(json.dumps({"traceEvents": mjson}))
41
- print(f"Saved HSA profile to {path}")
42
- Profiler = HSAProfiler()
43
-
44
- class HSACompiler(Compiler):
45
- def __init__(self, arch:str):
46
- self.arch = arch
47
- super().__init__(f"compile_hip_{self.arch}")
48
- def compile(self, src:str) -> bytes:
49
- try: return compile_hip(src, self.arch)
50
- except RuntimeError as e: raise CompileError(e)
51
-
52
- class HSAProgram:
53
- def __init__(self, device:HSADevice, name:str, lib:bytes):
54
- self.device, self.name, self.lib = device, name, lib
55
-
56
- if DEBUG >= 6:
57
- asm = subprocess.check_output(["/opt/rocm/llvm/bin/llvm-objdump", '-d', '-'], input=lib)
58
- print('\n'.join([x for x in asm.decode('utf-8').split("\n") if 's_code_end' not in x]))
59
-
60
- self.exec = init_c_var(hsa.hsa_executable_t(), lambda x: check(hsa.hsa_executable_create_alt(hsa.HSA_PROFILE_FULL, hsa.HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, None, ctypes.byref(x)))) # noqa: E501
61
- self.code_reader = init_c_var(hsa.hsa_code_object_reader_t(),
62
- lambda x: check(hsa.hsa_code_object_reader_create_from_memory(lib, len(lib), ctypes.byref(x))))
63
- check(hsa.hsa_executable_load_agent_code_object(self.exec, self.device.agent, self.code_reader, None, None))
64
- check(hsa.hsa_executable_freeze(self.exec, None))
65
-
66
- self.kernel = init_c_var(hsa.hsa_executable_symbol_t(), lambda x: check(hsa.hsa_executable_get_symbol_by_name(self.exec, (name+".kd").encode("utf-8"), ctypes.byref(self.device.agent), ctypes.byref(x)))) # noqa: E501
67
- self.handle = init_c_var(ctypes.c_uint64(), lambda x: check(hsa.hsa_executable_symbol_get_info(self.kernel, hsa.HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, ctypes.byref(x)))) # noqa: E501
68
- self.kernargs_segment_size = init_c_var(ctypes.c_uint32(), lambda x: check(hsa.hsa_executable_symbol_get_info(self.kernel, hsa.HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, ctypes.byref(x)))).value # noqa: E501
69
- self.group_segment_size = init_c_var(ctypes.c_uint32(), lambda x: check(hsa.hsa_executable_symbol_get_info(self.kernel, hsa.HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, ctypes.byref(x)))).value # noqa: E501
70
- self.private_segment_size = init_c_var(ctypes.c_uint32(), lambda x: check(hsa.hsa_executable_symbol_get_info(self.kernel, hsa.HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, ctypes.byref(x)))).value # noqa: E501
71
-
72
- def __del__(self):
73
- self.device.synchronize()
74
- if hasattr(self, 'code_reader'): check(hsa.hsa_code_object_reader_destroy(self.code_reader))
75
- if hasattr(self, 'exec'): check(hsa.hsa_executable_destroy(self.exec))
76
-
77
- def __call__(self, *args, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1), vals:Tuple[int, ...]=(), wait=False):
78
- if not hasattr(self, "args_struct_t"):
79
- self.args_struct_t = init_c_struct_t(tuple([(f'f{i}', ctypes.c_void_p) for i in range(len(args))] +
80
- [(f'v{i}', ctypes.c_int) for i in range(len(vals))]))
81
- if ctypes.sizeof(self.args_struct_t) != self.kernargs_segment_size:
82
- raise RuntimeError(f"HSAProgram.__call__: incorrect args struct size {ctypes.sizeof(self.args_struct_t)} != {self.kernargs_segment_size}")
83
-
84
- kernargs = None
85
- if self.kernargs_segment_size > 0:
86
- kernargs = self.device.alloc_kernargs(self.kernargs_segment_size)
87
- args_st = self.args_struct_t.from_address(kernargs)
88
- for i in range(len(args)): args_st.__setattr__(f'f{i}', args[i])
89
- for i in range(len(vals)): args_st.__setattr__(f'v{i}', vals[i])
90
- self.device.flush_hdp()
91
-
92
- signal = self.device.alloc_signal(reusable=True) if wait or PROFILE else None
93
- self.device.hw_queue.submit_kernel(self, global_size, local_size, kernargs, completion_signal=signal)
94
- if PROFILE: Profiler.track(signal, self.device, self.name)
95
- if wait:
96
- hsa.hsa_signal_wait_scacquire(signal, hsa.HSA_SIGNAL_CONDITION_LT, 1, (1 << 64) - 1, hsa.HSA_WAIT_STATE_ACTIVE)
97
- check(hsa.hsa_amd_profiling_get_dispatch_time(self.device.agent, signal, ctypes.byref(timings := hsa.hsa_amd_profiling_dispatch_time_t())))
98
- return (timings.end - timings.start) * self.device.clocks_to_time
99
-
100
- T = TypeVar("T")
101
- CHUNK_SIZE, PAGE_SIZE = 256*1024*1024, 0x1000
102
- class HSAAllocator(LRUAllocator):
103
- def __init__(self, device:HSADevice):
104
- self.device = device
105
- super().__init__()
106
-
107
- def _alloc(self, size:int, options:BufferOptions):
108
- if options.host:
109
- check(hsa.hsa_amd_memory_pool_allocate(HSADevice.cpu_mempool, size, 0, ctypes.byref(mem := ctypes.c_void_p())))
110
- check(hsa.hsa_amd_agents_allow_access(2, (hsa.hsa_agent_t*2)(HSADevice.cpu_agent, self.device.agent), None, mem))
111
- return mem.value
112
- else:
113
- c_agents = (hsa.hsa_agent_t * len(HSADevice.agents[hsa.HSA_DEVICE_TYPE_GPU]))(*HSADevice.agents[hsa.HSA_DEVICE_TYPE_GPU])
114
- check(hsa.hsa_amd_memory_pool_allocate(self.device.gpu_mempool, size, 0, ctypes.byref(buf := ctypes.c_void_p())))
115
- check(hsa.hsa_amd_agents_allow_access(len(HSADevice.agents[hsa.HSA_DEVICE_TYPE_GPU]), c_agents, None, buf))
116
- return buf.value
117
-
118
- def _free(self, opaque:T, options:BufferOptions):
119
- HSADevice.synchronize_system()
120
- check(hsa.hsa_amd_memory_pool_free(opaque))
121
-
122
- def copyin(self, dest:T, src: memoryview):
123
- # Async copyin sync model uses barriers on the main hw queue, since barriers are guaranteed to execute in order with all other packets.
124
- self.device.hw_queue.submit_barrier([], sync_signal := self.device.alloc_signal(reusable=True))
125
- mem = self._alloc(src.nbytes, BufferOptions(host=True))
126
- ctypes.memmove(mem, from_mv(src), src.nbytes)
127
- check(hsa.hsa_amd_memory_async_copy_on_engine(dest, self.device.agent, mem, HSADevice.cpu_agent, src.nbytes, 1, ctypes.byref(sync_signal),
128
- copy_signal := self.device.alloc_signal(reusable=True), hsa.HSA_AMD_SDMA_ENGINE_0, True))
129
- self.device.hw_queue.submit_barrier([copy_signal])
130
- self.device.delayed_free.append(mem)
131
- if PROFILE: Profiler.track(copy_signal, self.device, f"copyin: CPU -> HSA:{self.device.device_id}", is_copy=True)
132
-
133
- def copy_from_fd(self, dest, fd, offset, size):
134
- self.device.hw_queue.submit_barrier([], sync_signal := self.device.alloc_signal(reusable=True))
135
-
136
- if not hasattr(self, 'hb'):
137
- self.hb = [self._alloc(CHUNK_SIZE, BufferOptions(host=True)) for _ in range(2)]
138
- self.hb_signals = [self.device.alloc_signal(reusable=False) for _ in range(2)]
139
- self.hb_polarity = 0
140
- self.sdma = [hsa.HSA_AMD_SDMA_ENGINE_0, hsa.HSA_AMD_SDMA_ENGINE_1]
141
- for sig in self.hb_signals: hsa.hsa_signal_store_relaxed(sig, 0)
142
-
143
- fo = io.FileIO(fd, "a+b", closefd=False)
144
- fo.seek(offset - (minor_offset:=offset % PAGE_SIZE))
145
-
146
- copies_called = 0
147
- copied_in = 0
148
- for local_offset in range(0, size+minor_offset, CHUNK_SIZE):
149
- local_size = min(round_up(size+minor_offset, PAGE_SIZE)-local_offset, CHUNK_SIZE)
150
- copy_size = min(local_size-minor_offset, size-copied_in)
151
- if copy_size == 0: break
152
-
153
- hsa.hsa_signal_wait_scacquire(self.hb_signals[self.hb_polarity], hsa.HSA_SIGNAL_CONDITION_LT, 1, (1 << 64) - 1, hsa.HSA_WAIT_STATE_ACTIVE)
154
- self.device.reusable_signals.append(self.hb_signals[self.hb_polarity]) # it's free now and can be reused
155
- self.hb_signals[self.hb_polarity] = self.device.alloc_signal(reusable=False)
156
-
157
- fo.readinto(to_mv(self.hb[self.hb_polarity], local_size))
158
- check(hsa.hsa_amd_memory_async_copy_on_engine(dest+copied_in, self.device.agent, self.hb[self.hb_polarity]+minor_offset, HSADevice.cpu_agent,
159
- copy_size, 1, ctypes.byref(sync_signal), self.hb_signals[self.hb_polarity],
160
- self.sdma[self.hb_polarity], True))
161
- copied_in += copy_size
162
- self.hb_polarity = (self.hb_polarity + 1) % len(self.hb)
163
- minor_offset = 0 # only on the first
164
- copies_called += 1
165
-
166
- wait_signals = [self.hb_signals[self.hb_polarity - 1]]
167
- if copies_called > 1: wait_signals.append(self.hb_signals[self.hb_polarity])
168
- self.device.hw_queue.submit_barrier(wait_signals)
169
-
170
- def copyout(self, dest:memoryview, src:T):
171
- HSADevice.synchronize_system()
172
- copy_signal = self.device.alloc_signal(reusable=True)
173
- c_agents = (hsa.hsa_agent_t*2)(self.device.agent, HSADevice.cpu_agent)
174
- check(hsa.hsa_amd_memory_lock_to_pool(from_mv(dest), dest.nbytes, c_agents, 2, HSADevice.cpu_mempool, 0, ctypes.byref(addr:=ctypes.c_void_p())))
175
- check(hsa.hsa_amd_memory_async_copy(addr, HSADevice.cpu_agent, src, self.device.agent, dest.nbytes, 0, None, copy_signal))
176
- hsa.hsa_signal_wait_scacquire(copy_signal, hsa.HSA_SIGNAL_CONDITION_LT, 1, (1 << 64) - 1, hsa.HSA_WAIT_STATE_ACTIVE)
177
- check(hsa.hsa_amd_memory_unlock(from_mv(dest)))
178
- if PROFILE: Profiler.track(copy_signal, self.device, f"copyout: HSA:{self.device.device_id} -> CPU", is_copy=True)
179
-
180
- def transfer(self, dest:T, src:T, sz:int, src_dev=None, dest_dev=None):
181
- src_dev.hw_queue.submit_barrier([], sync_signal_1 := src_dev.alloc_signal(reusable=True))
182
- dest_dev.hw_queue.submit_barrier([], sync_signal_2 := dest_dev.alloc_signal(reusable=True))
183
- c_wait_signal = (hsa.hsa_signal_t*2)(sync_signal_1, sync_signal_2)
184
- check(hsa.hsa_amd_memory_async_copy_on_engine(dest, dest_dev.agent, src, src_dev.agent, sz, 2, c_wait_signal,
185
- copy_signal := dest_dev.alloc_signal(reusable=False), hsa.HSA_AMD_SDMA_ENGINE_0, True))
186
- src_dev.hw_queue.submit_barrier([copy_signal])
187
- dest_dev.hw_queue.submit_barrier([copy_signal])
188
- if PROFILE: Profiler.track(copy_signal, src_dev, f"transfer: HSA:{src_dev.device_id} -> HSA:{dest_dev.device_id}", is_copy=True)
189
-
190
- class HSADevice(Compiled):
191
- devices: List[HSADevice] = []
192
- agents: Dict[int, List[hsa.hsa_agent_t]] = {}
193
- cpu_agent: hsa.hsa_agent_t
194
- cpu_mempool: hsa.hsa_amd_memory_pool_t
195
- def __init__(self, device:str=""):
196
- if not HSADevice.agents:
197
- check(hsa.hsa_init())
198
- atexit.register(hsa_terminate)
199
- HSADevice.agents = scan_agents()
200
- HSADevice.cpu_agent = HSADevice.agents[hsa.HSA_DEVICE_TYPE_CPU][0]
201
- HSADevice.cpu_mempool = find_memory_pool(HSADevice.cpu_agent, segtyp=hsa.HSA_AMD_SEGMENT_GLOBAL, location=hsa.HSA_AMD_MEMORY_POOL_LOCATION_CPU)
202
- if PROFILE: check(hsa.hsa_amd_profiling_async_copy_enable(1))
203
-
204
- self.device_id = int(device.split(":")[1]) if ":" in device else 0
205
- self.agent = HSADevice.agents[hsa.HSA_DEVICE_TYPE_GPU][self.device_id]
206
- self.gpu_mempool = find_memory_pool(self.agent, segtyp=hsa.HSA_AMD_SEGMENT_GLOBAL, location=hsa.HSA_AMD_MEMORY_POOL_LOCATION_GPU)
207
- self.hw_queue = AQLQueue(self)
208
- HSADevice.devices.append(self)
209
-
210
- check(hsa.hsa_agent_get_info(self.agent, hsa.HSA_AGENT_INFO_NAME, ctypes.byref(agent_name_buf := ctypes.create_string_buffer(256))))
211
- self.arch = ctypes.string_at(agent_name_buf).decode()
212
-
213
- check(hsa.hsa_system_get_info(hsa.HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, ctypes.byref(gpu_freq := ctypes.c_uint64())))
214
- self.clocks_to_time: float = 1 / gpu_freq.value
215
-
216
- check(hsa.hsa_agent_get_info(self.agent, hsa.HSA_AMD_AGENT_INFO_HDP_FLUSH, ctypes.byref(hdp_flush := hsa.hsa_amd_hdp_flush_t())))
217
- self.hdp_flush = hdp_flush
218
-
219
- self.delayed_free: List[int] = []
220
- self.reusable_signals: List[hsa.hsa_signal_t] = []
221
-
222
- from tinygrad.runtime.graph.hsa import HSAGraph
223
- super().__init__(device, HSAAllocator(self), HIPRenderer(), HSACompiler(self.arch), functools.partial(HSAProgram, self), HSAGraph)
224
-
225
- # Finish init: preallocate some signals + space for kernargs
226
- self.signal_pool = [init_c_var(hsa.hsa_signal_t(), lambda x: check(hsa.hsa_signal_create(1, 0, None, ctypes.byref(x)))) for _ in range(4096)]
227
- self._new_kernargs_region(16 << 20) # initial region size is 16mb
228
-
229
- def synchronize(self):
230
- self.hw_queue.wait()
231
-
232
- for sig in self.reusable_signals: hsa.hsa_signal_silent_store_relaxed(sig, 1)
233
- self.signal_pool.extend(self.reusable_signals)
234
- self.reusable_signals.clear()
235
-
236
- for opaque_to_free in self.delayed_free: check(hsa.hsa_amd_memory_pool_free(opaque_to_free))
237
- self.delayed_free.clear()
238
-
239
- self.kernarg_next_addr = self.kernarg_start_addr
240
- Profiler.process(self)
241
-
242
- @staticmethod
243
- def synchronize_system():
244
- for d in HSADevice.devices: d.synchronize()
245
-
246
- def alloc_signal(self, reusable=False):
247
- if len(self.signal_pool): signal = self.signal_pool.pop()
248
- else: check(hsa.hsa_amd_signal_create(1, 0, None, 0, ctypes.byref(signal := hsa.hsa_signal_t())))
249
-
250
- # reusable means a signal could be reused after synchronize for the device it's allocated from is called.
251
- if reusable: self.reusable_signals.append(signal)
252
- return signal
253
-
254
- def alloc_kernargs(self, sz):
255
- if self.kernarg_next_addr + sz >= self.kernarg_start_addr + self.kernarg_pool_sz: self._new_kernargs_region(int(self.kernarg_pool_sz * 2))
256
- result = self.kernarg_next_addr
257
- self.kernarg_next_addr = round_up(self.kernarg_next_addr + sz, 16)
258
- return result
259
-
260
- def _new_kernargs_region(self, sz:int):
261
- if hasattr(self, 'kernarg_start_addr'): self.delayed_free.append(self.kernarg_start_addr)
262
- self.kernarg_start_addr: int = self.allocator._alloc(sz, BufferOptions())
263
- self.kernarg_next_addr = self.kernarg_start_addr
264
- self.kernarg_pool_sz: int = sz
265
-
266
- def flush_hdp(self): self.hdp_flush.HDP_MEM_FLUSH_CNTL[0] = 1
267
-
268
- def hsa_terminate():
269
- # Need to stop/delete aql queue before hsa shut down, this leads to gpu hangs.
270
- for dev in HSADevice.devices:
271
- Profiler.process(dev)
272
- del dev.hw_queue
273
-
274
- # hsa_shut_down cleans up all hsa-related resources.
275
- hsa.hsa_shut_down()
276
- HSADevice.synchronize = lambda: None #type:ignore
277
- HSAProgram.__del__ = lambda _: None #type:ignore
278
- if Profiler.collected_events: Profiler.save("/tmp/profile.json")
@@ -1,60 +0,0 @@
1
- tinygrad/__init__.py,sha256=jC-35zswLSXLuRRThG_o6yar6qQjLCqmeaFCj_XKN08,449
2
- tinygrad/device.py,sha256=zXcrFjBsiV1rW0aXupszDjD98TWLHin7u8pBd5fdJqo,10446
3
- tinygrad/dtype.py,sha256=xg2BlFIPcQw0onHW_0ktGXjved9SXgQcLNrqe6gCXto,6221
4
- tinygrad/function.py,sha256=0xkWst2tRsOeN6YcQS65MfVfWwKQYFkAacgkTys0VdQ,9616
5
- tinygrad/helpers.py,sha256=XI8MIeBE35wQ4q0NEsUCvkj3QdY0adI80SfCbOySOVI,12773
6
- tinygrad/lazy.py,sha256=xqaEqXaIpt_77SP_2U6Pyfw8YeGd_0PzNDXJOOnRJ24,13379
7
- tinygrad/multi.py,sha256=gyGXYVviaPfzAkoJjLFUiusVd3no6HRJunOOxD0DaaY,11362
8
- tinygrad/ops.py,sha256=aNk1jLuJl--Z_u8DE8Du1iN1AFhMH-6Le5mnvRHLDvI,7124
9
- tinygrad/tensor.py,sha256=nznRGHH7-64bMpFeu8gvdSDfJ5CEEoezIcHSxPgeJ7k,129223
10
- tinygrad/codegen/kernel.py,sha256=RRRmOX3iOOgu5ISABW_UTVh5vfGdLFfK1UOtKtpghuY,38169
11
- tinygrad/codegen/linearizer.py,sha256=jxwEcxxcpWOvYlIgXmlGkgNYZ4sDykiY_5Ve3a8tpYg,27622
12
- tinygrad/codegen/uops.py,sha256=yKS-3w9teuS_3BLnHAN4vWtSRvZHmsx194YRBXMOhFI,21872
13
- tinygrad/engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- tinygrad/engine/graph.py,sha256=eEbb17qbJ0A-2VjN4l7SCbA4yI7jh6YN6EybPShmcbg,5221
15
- tinygrad/engine/jit.py,sha256=TrdZQEXnF-SowCAyy69iEp85NPFFtOVaIz_2i1cwCvQ,11049
16
- tinygrad/engine/realize.py,sha256=H2CgiLWRqTQSr9udJb1iLX7DFdLcPLwxSJZ2bUfHXDs,11077
17
- tinygrad/engine/schedule.py,sha256=I3OxiNwveWbVuhhFWdR1B5G8goEyqXOHWe7UQQ3Ogz8,18487
18
- tinygrad/engine/search.py,sha256=M11qHlufIffBS9b7mjk8gniyoGRaengwXnMfUQTHmyw,11289
19
- tinygrad/nn/__init__.py,sha256=DoHrq9pUFs1vm9FUA5eet5_tvhlZJlC_uC4kBqo98kI,12932
20
- tinygrad/nn/datasets.py,sha256=Mvf_0eCEEqtB9-8iiyDFhm-B7rTyWXwmSY_3xjeHleo,458
21
- tinygrad/nn/optim.py,sha256=zf85kwumpS17fk1NBjUfe7tOUE7-XH37SL-LjgAfva8,6799
22
- tinygrad/nn/state.py,sha256=nGR05s3kuDNp9lliCIr4-6Ek7Korha7jCAWie5S2rB4,10138
23
- tinygrad/renderer/__init__.py,sha256=-LjQ9tC2rI8fveaS_xn24X_knXKILFj-iZFcRTk8fNM,2672
24
- tinygrad/renderer/assembly.py,sha256=MD-SSC7-Nqwt3zrwe0aDXVX08W9Ox6Vj_byPS1k1bAQ,17923
25
- tinygrad/renderer/cstyle.py,sha256=tFWWW-egorLFEDwX6fA9-rYxvNLc67LjxlZ6JzrWCF0,24043
26
- tinygrad/renderer/llvmir.py,sha256=BZViWXj2G6JtEcOgc-CtnIj-d9xP0ZjgNXdTKQT_PJ8,10315
27
- tinygrad/runtime/ops_amd.py,sha256=3jOrFqxk8JkPX043tEUFLvyKSgX7Fls785g_gOkdzVM,31811
28
- tinygrad/runtime/ops_clang.py,sha256=XWqwobReRdu-Tj-chbWEJFMx6AQfgdGCcpdWcLWUTOQ,1468
29
- tinygrad/runtime/ops_cuda.py,sha256=cgeoVpY9bOGU22Eh78XR5YOYY2cgsJt4Vnxl6u8N6co,10840
30
- tinygrad/runtime/ops_disk.py,sha256=75-iihZxkhNvA5O3VaW61LOXwmlSX4XwegpnV1C4D5A,2738
31
- tinygrad/runtime/ops_gpu.py,sha256=FB3Fp-VVEDGEt_6CfJxsM_TWzhp5giXCP1TSSRMXE80,7532
32
- tinygrad/runtime/ops_hsa.py,sha256=YNQLqZjJ9twTJRKS41l2oIrncOAu3wdOdBegs9zYlgo,16188
33
- tinygrad/runtime/ops_llvm.py,sha256=dODiyVSlPofHyDIZrD-V74h8W1d94VPnr_-A4gNbSO4,2229
34
- tinygrad/runtime/ops_metal.py,sha256=fGSNpwmYIHaif9a5SiwyMX2bub-r5hTNpnrqlaPMeUc,5815
35
- tinygrad/runtime/ops_npy.py,sha256=qaAi0AEo6nt7iZ-eWqM8z2aQfNJgZUpmBCEDmrIzWL0,369
36
- tinygrad/runtime/ops_nv.py,sha256=PCMAHMrW4J7esgnkpwq3bB91Q3h6hBATr8JuykR9vcA,37633
37
- tinygrad/runtime/ops_python.py,sha256=mmsDj1hJ3BtreAq5dfCuuUGbgoIhCKlVwNqMDmXBISs,10871
38
- tinygrad/runtime/autogen/amd_gpu.py,sha256=1NDH0ualiZ8OtgTjaYcQ1HjKs_SQ7eUHuJvdrDodvCk,65022
39
- tinygrad/runtime/autogen/comgr.py,sha256=Z99Y6K8D_nuMpOs0qDWiA0MV-RxueV65o2OyPFdcsHE,38563
40
- tinygrad/runtime/autogen/cuda.py,sha256=GgRl4AfU54JG0G1XJj2dq2FbrUZ8XG_AnFrPAZJpSSg,250380
41
- tinygrad/runtime/autogen/hip.py,sha256=1yUHDCwL3KkD15if2Q1Ud3GbJiR7DxsNorKZTCINw54,245532
42
- tinygrad/runtime/autogen/hsa.py,sha256=tGpnXUhhQkAIEr0yyCxRImzajmt-nN0KzJn4KnT_bH8,270073
43
- tinygrad/runtime/autogen/kfd.py,sha256=dDmLFL6HL_QXRW2rZOCArY55PRuXuLN9563XCipV2jM,29935
44
- tinygrad/runtime/autogen/nv_gpu.py,sha256=K9WwwdIitHrY2AXpYy8bbdD9aEwdbz9vL7748pz6Re0,1672024
45
- tinygrad/runtime/autogen/opencl.py,sha256=aW-luGFF5PXFEZ6SgrGANhA9qpkI-fZyEsmDfpez2Ss,82654
46
- tinygrad/runtime/driver/hip_comgr.py,sha256=rFQRsOYo4XquwcHFTe2mGzMfozdL9hitO3DRYBDFSuM,3376
47
- tinygrad/runtime/driver/hsa.py,sha256=PoNy8gHBPoRUhUZligFp0z_Le9fyEXbJrnlgwInt_R0,7218
48
- tinygrad/runtime/graph/clang.py,sha256=10Bs64J0r12g6upqCHVoK3LoTrdbBBHQD43efMhlBjo,1957
49
- tinygrad/runtime/graph/cuda.py,sha256=LNx6RQLcQSKMlHfVK5r_efujN0lRPhKqi8yp249OAIs,5265
50
- tinygrad/runtime/graph/hcq.py,sha256=mspwzroBTwNNHDob7oK-JCt48mhuIhX_G0qNYvFVuVM,8089
51
- tinygrad/runtime/graph/hsa.py,sha256=UJgSg2irrKT87LBZ3DfaGmoK7rJk8OZhIHEHhtF8rUE,10035
52
- tinygrad/runtime/graph/metal.py,sha256=bwB6uAsqjEbwv5ML5ziWduBtmTpseJboo6J9ssVa4v4,4579
53
- tinygrad/shape/shapetracker.py,sha256=hWqh2uWsbBp3lKlRpY8Fj1oTWvEx1YwVsKl0QiA-QnU,6334
54
- tinygrad/shape/symbolic.py,sha256=hn2khLoHAJSwyZ91i679oJZCLTaz0Sf2dUG-HRJMtVw,16688
55
- tinygrad/shape/view.py,sha256=KMf_KzNwXmcX1NbFPq862-Jv_E6TgeO27lcPjrAweF4,17092
56
- tinygrad-0.9.0.dist-info/LICENSE,sha256=ABRhUPEILzINYIukgazD-_rPipkUNUwslrb0RxnV6Xc,1058
57
- tinygrad-0.9.0.dist-info/METADATA,sha256=oyGO3WSmMQ7NTAK3RGk0ZXCkr-L3XKltKYhYrKEuifk,10227
58
- tinygrad-0.9.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
59
- tinygrad-0.9.0.dist-info/top_level.txt,sha256=vDABMCWBFQnx2kn9Azueu88FP-1klQdePoHikQhHymc,9
60
- tinygrad-0.9.0.dist-info/RECORD,,