tinygrad 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tinygrad/__init__.py +6 -0
 - tinygrad/codegen/kernel.py +572 -83
 - tinygrad/codegen/linearizer.py +415 -395
 - tinygrad/codegen/uops.py +415 -0
 - tinygrad/device.py +183 -0
 - tinygrad/dtype.py +113 -0
 - tinygrad/engine/__init__.py +0 -0
 - tinygrad/engine/graph.py +100 -0
 - tinygrad/engine/jit.py +195 -0
 - tinygrad/engine/realize.py +191 -0
 - tinygrad/engine/schedule.py +362 -0
 - tinygrad/engine/search.py +196 -0
 - tinygrad/{mlops.py → function.py} +76 -55
 - tinygrad/helpers.py +196 -89
 - tinygrad/lazy.py +210 -371
 - tinygrad/multi.py +169 -0
 - tinygrad/nn/__init__.py +202 -22
 - tinygrad/nn/datasets.py +7 -0
 - tinygrad/nn/optim.py +112 -32
 - tinygrad/nn/state.py +136 -39
 - tinygrad/ops.py +119 -202
 - tinygrad/renderer/__init__.py +61 -0
 - tinygrad/renderer/assembly.py +276 -0
 - tinygrad/renderer/cstyle.py +353 -166
 - tinygrad/renderer/llvmir.py +150 -138
 - tinygrad/runtime/autogen/amd_gpu.py +1900 -0
 - tinygrad/runtime/autogen/comgr.py +865 -0
 - tinygrad/runtime/autogen/cuda.py +5923 -0
 - tinygrad/runtime/autogen/hip.py +5909 -0
 - tinygrad/runtime/autogen/hsa.py +5761 -0
 - tinygrad/runtime/autogen/kfd.py +812 -0
 - tinygrad/runtime/autogen/nv_gpu.py +33328 -0
 - tinygrad/runtime/autogen/opencl.py +1795 -0
 - tinygrad/runtime/driver/hip_comgr.py +47 -0
 - tinygrad/runtime/driver/hsa.py +143 -0
 - tinygrad/runtime/graph/clang.py +38 -0
 - tinygrad/runtime/graph/cuda.py +81 -0
 - tinygrad/runtime/graph/hcq.py +143 -0
 - tinygrad/runtime/graph/hsa.py +171 -0
 - tinygrad/runtime/graph/metal.py +75 -0
 - tinygrad/runtime/ops_amd.py +564 -0
 - tinygrad/runtime/ops_clang.py +24 -77
 - tinygrad/runtime/ops_cuda.py +175 -89
 - tinygrad/runtime/ops_disk.py +56 -33
 - tinygrad/runtime/ops_gpu.py +92 -95
 - tinygrad/runtime/ops_hsa.py +278 -0
 - tinygrad/runtime/ops_llvm.py +39 -60
 - tinygrad/runtime/ops_metal.py +92 -74
 - tinygrad/runtime/ops_npy.py +9 -0
 - tinygrad/runtime/ops_nv.py +630 -0
 - tinygrad/runtime/ops_python.py +204 -0
 - tinygrad/shape/shapetracker.py +86 -254
 - tinygrad/shape/symbolic.py +166 -141
 - tinygrad/shape/view.py +296 -0
 - tinygrad/tensor.py +2619 -448
 - {tinygrad-0.7.0.dist-info → tinygrad-0.9.0.dist-info}/LICENSE +1 -1
 - tinygrad-0.9.0.dist-info/METADATA +227 -0
 - tinygrad-0.9.0.dist-info/RECORD +60 -0
 - {tinygrad-0.7.0.dist-info → tinygrad-0.9.0.dist-info}/WHEEL +1 -1
 - tinygrad/codegen/assembly.py +0 -190
 - tinygrad/codegen/optimizer.py +0 -379
 - tinygrad/codegen/search.py +0 -72
 - tinygrad/graph.py +0 -83
 - tinygrad/jit.py +0 -57
 - tinygrad/nn/image.py +0 -100
 - tinygrad/renderer/assembly_arm64.py +0 -169
 - tinygrad/renderer/assembly_ptx.py +0 -98
 - tinygrad/renderer/wgsl.py +0 -53
 - tinygrad/runtime/lib.py +0 -113
 - tinygrad/runtime/ops_cpu.py +0 -51
 - tinygrad/runtime/ops_hip.py +0 -82
 - tinygrad/runtime/ops_shm.py +0 -29
 - tinygrad/runtime/ops_torch.py +0 -30
 - tinygrad/runtime/ops_webgpu.py +0 -45
 - tinygrad-0.7.0.dist-info/METADATA +0 -212
 - tinygrad-0.7.0.dist-info/RECORD +0 -40
 - {tinygrad-0.7.0.dist-info → tinygrad-0.9.0.dist-info}/top_level.txt +0 -0
 
    
        tinygrad/runtime/ops_metal.py
    CHANGED
    
    | 
         @@ -1,88 +1,106 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
            import os, subprocess, pathlib,  
     | 
| 
       3 
     | 
    
         
            -
            import Metal,  
     | 
| 
       4 
     | 
    
         
            -
            from typing import List, Any
         
     | 
| 
       5 
     | 
    
         
            -
            from tinygrad. 
     | 
| 
       6 
     | 
    
         
            -
            from tinygrad. 
     | 
| 
       7 
     | 
    
         
            -
            from tinygrad. 
     | 
| 
       8 
     | 
    
         
            -
            from tinygrad.ops import Compiled
         
     | 
| 
       9 
     | 
    
         
            -
            from tinygrad.runtime.lib import RawBufferMapped, LRUAllocator
         
     | 
| 
      
 1 
     | 
    
         
            +
            from __future__ import annotations
         
     | 
| 
      
 2 
     | 
    
         
            +
            import os, subprocess, pathlib, ctypes, tempfile, functools
         
     | 
| 
      
 3 
     | 
    
         
            +
            import Metal, libdispatch
         
     | 
| 
      
 4 
     | 
    
         
            +
            from typing import List, Set, Any, Tuple, Optional
         
     | 
| 
      
 5 
     | 
    
         
            +
            from tinygrad.helpers import prod, getenv, DEBUG, unwrap2
         
     | 
| 
      
 6 
     | 
    
         
            +
            from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator
         
     | 
| 
      
 7 
     | 
    
         
            +
            from tinygrad.renderer.cstyle import MetalRenderer
         
     | 
| 
       10 
8 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
      
 9 
     | 
    
         
            +
            def wait_check(cbuf: Any):
         
     | 
| 
      
 10 
     | 
    
         
            +
              cbuf.waitUntilCompleted()
         
     | 
| 
      
 11 
     | 
    
         
            +
              if (error := cbuf.error()) is not None:
         
     | 
| 
      
 12 
     | 
    
         
            +
                raise RuntimeError(error)
         
     | 
| 
       12 
13 
     | 
    
         | 
| 
       13 
     | 
    
         
            -
            class  
     | 
| 
       14 
     | 
    
         
            -
              def  
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
              def __init__(self):
         
     | 
| 
       20 
     | 
    
         
            -
                self.mtl_buffers_in_flight: List[Any] = []
         
     | 
| 
       21 
     | 
    
         
            -
                self.device = Metal.MTLCreateSystemDefaultDevice()
         
     | 
| 
       22 
     | 
    
         
            -
                self.mtl_queue = self.device.newCommandQueueWithMaxCommandBufferCount_(1024)
         
     | 
| 
       23 
     | 
    
         
            -
                self.allocator = MetalAllocator(self.device.dedicatedMemorySize() or self.device.sharedMemorySize())
         
     | 
| 
       24 
     | 
    
         
            -
              # TODO: is there a better way to do this?
         
     | 
| 
       25 
     | 
    
         
            -
              def synchronize(self):
         
     | 
| 
       26 
     | 
    
         
            -
                for cbuf in self.mtl_buffers_in_flight: cbuf.waitUntilCompleted()
         
     | 
| 
       27 
     | 
    
         
            -
                self.mtl_buffers_in_flight.clear()
         
     | 
| 
       28 
     | 
    
         
            -
            METAL = _METAL()
         
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
            class RawMetalBuffer(RawBufferMapped):
         
     | 
| 
       31 
     | 
    
         
            -
              def __init__(self, size:int, dtype:DType):
         
     | 
| 
       32 
     | 
    
         
            -
                assert dtype != dtypes.double, f"METAL does not support {dtype.name}"
         
     | 
| 
       33 
     | 
    
         
            -
                super().__init__(size, dtype, allocator=METAL.allocator)
         
     | 
| 
       34 
     | 
    
         
            -
              def _buffer(self):
         
     | 
| 
       35 
     | 
    
         
            -
                METAL.synchronize()
         
     | 
| 
       36 
     | 
    
         
            -
                return self._buf.contents().as_buffer(self._buf.length())
         
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
            def unwrap(x):
         
     | 
| 
       39 
     | 
    
         
            -
              ret, err = x
         
     | 
| 
       40 
     | 
    
         
            -
              assert err is None, str(err)
         
     | 
| 
       41 
     | 
    
         
            -
              return ret
         
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
       43 
     | 
    
         
            -
            class MetalProgram:
         
     | 
| 
       44 
     | 
    
         
            -
              def __init__(self, name:str, prg:str, binary:bool=False):
         
     | 
| 
       45 
     | 
    
         
            -
                if METAL_XCODE:
         
     | 
| 
       46 
     | 
    
         
            -
                  air = subprocess.check_output(['xcrun', '-sdk', 'macosx', 'metal', '-x', 'metal', '-c', '-', '-o', '-'], input=prg.encode('utf-8'))
         
     | 
| 
      
 14 
     | 
    
         
            +
            class MetalCompiler(Compiler):
         
     | 
| 
      
 15 
     | 
    
         
            +
              def __init__(self, device:Optional[MetalDevice]):
         
     | 
| 
      
 16 
     | 
    
         
            +
                self.device = device
         
     | 
| 
      
 17 
     | 
    
         
            +
                super().__init__("compile_metal")
         
     | 
| 
      
 18 
     | 
    
         
            +
              def compile(self, src:str) -> bytes:
         
     | 
| 
      
 19 
     | 
    
         
            +
                if self.device is None:
         
     | 
| 
       47 
20 
     | 
    
         
             
                  # NOTE: if you run llvm-dis on "air" you can see the llvm bytecode
         
     | 
| 
       48 
     | 
    
         
            -
                   
     | 
| 
       49 
     | 
    
         
            -
                   
     | 
| 
       50 
     | 
    
         
            -
                  self.library = unwrap(METAL.device.newLibraryWithData_error_(data, None))
         
     | 
| 
      
 21 
     | 
    
         
            +
                  air = subprocess.check_output(['xcrun', '-sdk', 'macosx', 'metal', '-x', 'metal', '-c', '-', '-o', '-'], input=src.encode('utf-8'))
         
     | 
| 
      
 22 
     | 
    
         
            +
                  return subprocess.check_output(['xcrun', '-sdk', 'macosx', 'metallib', '-', '-o', '-'], input=air)
         
     | 
| 
       51 
23 
     | 
    
         
             
                else:
         
     | 
| 
       52 
     | 
    
         
            -
                  options = Metal.MTLCompileOptions. 
     | 
| 
       53 
     | 
    
         
            -
                   
     | 
| 
      
 24 
     | 
    
         
            +
                  options = Metal.MTLCompileOptions.new()
         
     | 
| 
      
 25 
     | 
    
         
            +
                  options.setFastMathEnabled_(getenv("METAL_FAST_MATH"))
         
     | 
| 
      
 26 
     | 
    
         
            +
                  try: library = unwrap2(self.device.device.newLibraryWithSource_options_error_(src, options, None))
         
     | 
| 
      
 27 
     | 
    
         
            +
                  except AssertionError as e: raise CompileError(e)
         
     | 
| 
      
 28 
     | 
    
         
            +
                  return library.libraryDataContents().bytes().tobytes()
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            class MetalProgram:
         
     | 
| 
      
 31 
     | 
    
         
            +
              def __init__(self, device:MetalDevice, name:str, lib:bytes):
         
     | 
| 
      
 32 
     | 
    
         
            +
                self.device, self.name, self.lib = device, name, lib
         
     | 
| 
      
 33 
     | 
    
         
            +
                if DEBUG >= 6:
         
     | 
| 
      
 34 
     | 
    
         
            +
                  with tempfile.NamedTemporaryFile(delete=True) as shader:
         
     | 
| 
      
 35 
     | 
    
         
            +
                    shader.write(lib)
         
     | 
| 
      
 36 
     | 
    
         
            +
                    shader.flush()
         
     | 
| 
      
 37 
     | 
    
         
            +
                    os.system(f"cd {pathlib.Path(__file__).parents[2]}/extra/disassemblers/applegpu && python3 compiler_explorer.py {shader.name}")
         
     | 
| 
      
 38 
     | 
    
         
            +
                assert lib[:4] == b"MTLB", "Invalid Metal library. Could be due to using conda. Try system python or METAL_XCODE=1 DISABLE_COMPILER_CACHE=1."
         
     | 
| 
      
 39 
     | 
    
         
            +
                data = libdispatch.dispatch_data_create(lib, len(lib), None, None)
         
     | 
| 
      
 40 
     | 
    
         
            +
                self.library = unwrap2(self.device.device.newLibraryWithData_error_(data, None))
         
     | 
| 
       54 
41 
     | 
    
         
             
                self.fxn = self.library.newFunctionWithName_(name)
         
     | 
| 
       55 
     | 
    
         
            -
                 
     | 
| 
       56 
     | 
    
         
            -
                if DEBUG >= 5:
         
     | 
| 
       57 
     | 
    
         
            -
                  arc = unwrap(METAL.device.newBinaryArchiveWithDescriptor_error_(Metal.MTLBinaryArchiveDescriptor.alloc().init(), None))
         
     | 
| 
       58 
     | 
    
         
            -
                  desc = Metal.MTLComputePipelineDescriptor.alloc().init()
         
     | 
| 
       59 
     | 
    
         
            -
                  desc.setComputeFunction_(self.fxn)
         
     | 
| 
       60 
     | 
    
         
            -
                  unwrap(arc.addComputePipelineFunctionsWithDescriptor_error_(desc, None))
         
     | 
| 
       61 
     | 
    
         
            -
                  unwrap(arc.serializeToURL_error_(Cocoa.NSURL.URLWithString_("file:///tmp/shader.bin"), None))
         
     | 
| 
       62 
     | 
    
         
            -
                  # clone https://github.com/dougallj/applegpu.git in tinygrad/disassemblers
         
     | 
| 
       63 
     | 
    
         
            -
                  os.system(f"cd {pathlib.Path(__file__).parent.parent.parent}/disassemblers/applegpu && python3 compiler_explorer.py /tmp/shader.bin")
         
     | 
| 
       64 
     | 
    
         
            -
                self.pipeline_state = unwrap(METAL.device.newComputePipelineStateWithFunction_error_(self.fxn, None))
         
     | 
| 
      
 42 
     | 
    
         
            +
                self.pipeline_state = unwrap2(self.device.device.newComputePipelineStateWithFunction_error_(self.fxn, None))
         
     | 
| 
       65 
43 
     | 
    
         | 
| 
       66 
     | 
    
         
            -
              def __call__(self, global_size, local_size,  
     | 
| 
       67 
     | 
    
         
            -
                 
     | 
| 
       68 
     | 
    
         
            -
                command_buffer =  
     | 
| 
      
 44 
     | 
    
         
            +
              def __call__(self, *bufs, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1), vals:Tuple[int, ...]=(), wait=False):
         
     | 
| 
      
 45 
     | 
    
         
            +
                if prod(local_size) > self.pipeline_state.maxTotalThreadsPerThreadgroup(): raise RuntimeError(f"local size {local_size} bigger than {self.pipeline_state.maxTotalThreadsPerThreadgroup()} with exec width {self.pipeline_state.threadExecutionWidth()} memory length {self.pipeline_state.staticThreadgroupMemoryLength()}")  # noqa: E501
         
     | 
| 
      
 46 
     | 
    
         
            +
                command_buffer = self.device.mtl_queue.commandBuffer()
         
     | 
| 
       69 
47 
     | 
    
         
             
                encoder = command_buffer.computeCommandEncoder()
         
     | 
| 
       70 
48 
     | 
    
         
             
                encoder.setComputePipelineState_(self.pipeline_state)
         
     | 
| 
       71 
     | 
    
         
            -
                for i,a in enumerate(bufs):
         
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
                  elif isinstance(a, int): encoder.setBytes_length_atIndex_((arg:=ctypes.c_int32(a)), ctypes.sizeof(arg), i)
         
     | 
| 
       74 
     | 
    
         
            -
                  else: raise RuntimeError(f"arg at index {i} has unsupported type {type(a)}")
         
     | 
| 
      
 49 
     | 
    
         
            +
                for i,a in enumerate(bufs): encoder.setBuffer_offset_atIndex_(a, 0, i)
         
     | 
| 
      
 50 
     | 
    
         
            +
                for i,a in enumerate(vals,start=len(bufs)): encoder.setBytes_length_atIndex_(ctypes.c_int32(a), 4, i)
         
     | 
| 
       75 
51 
     | 
    
         
             
                encoder.dispatchThreadgroups_threadsPerThreadgroup_(Metal.MTLSize(*global_size), Metal.MTLSize(*local_size))
         
     | 
| 
       76 
52 
     | 
    
         
             
                encoder.endEncoding()
         
     | 
| 
       77 
53 
     | 
    
         
             
                command_buffer.commit()
         
     | 
| 
       78 
54 
     | 
    
         
             
                if wait:
         
     | 
| 
       79 
     | 
    
         
            -
                  command_buffer 
     | 
| 
      
 55 
     | 
    
         
            +
                  wait_check(command_buffer)
         
     | 
| 
       80 
56 
     | 
    
         
             
                  return command_buffer.GPUEndTime() - command_buffer.GPUStartTime()
         
     | 
| 
       81 
     | 
    
         
            -
                 
     | 
| 
      
 57 
     | 
    
         
            +
                self.device.mtl_buffers_in_flight.append(command_buffer)
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            class MetalAllocator(LRUAllocator):
         
     | 
| 
      
 60 
     | 
    
         
            +
              def __init__(self, device:MetalDevice):
         
     | 
| 
      
 61 
     | 
    
         
            +
                self.device:MetalDevice = device
         
     | 
| 
      
 62 
     | 
    
         
            +
                self.track_cross_device: Set[MetalDevice] = set()
         
     | 
| 
      
 63 
     | 
    
         
            +
                super().__init__()
         
     | 
| 
      
 64 
     | 
    
         
            +
              def free_cache(self):
         
     | 
| 
      
 65 
     | 
    
         
            +
                self.device.synchronize()
         
     | 
| 
      
 66 
     | 
    
         
            +
                for x in self.track_cross_device: x.synchronize()
         
     | 
| 
      
 67 
     | 
    
         
            +
                self.track_cross_device.clear()
         
     | 
| 
      
 68 
     | 
    
         
            +
                return super().free_cache()
         
     | 
| 
      
 69 
     | 
    
         
            +
              def _alloc(self, size:int, options) -> Any:
         
     | 
| 
      
 70 
     | 
    
         
            +
                ret = self.device.device.newBufferWithLength_options_(size, Metal.MTLResourceStorageModeShared)
         
     | 
| 
      
 71 
     | 
    
         
            +
                if ret is None: raise MemoryError(f"Metal OOM while allocating {size=}")
         
     | 
| 
      
 72 
     | 
    
         
            +
                return ret
         
     | 
| 
      
 73 
     | 
    
         
            +
              def transfer(self, dest:Any, src:Any, sz:int, src_dev: MetalDevice, **kwargs):
         
     | 
| 
      
 74 
     | 
    
         
            +
                src_dev.synchronize()
         
     | 
| 
      
 75 
     | 
    
         
            +
                command_buffer = self.device.mtl_queue.commandBuffer()
         
     | 
| 
      
 76 
     | 
    
         
            +
                encoder = command_buffer.blitCommandEncoder()
         
     | 
| 
      
 77 
     | 
    
         
            +
                encoder.copyFromBuffer_sourceOffset_toBuffer_destinationOffset_size_(src, 0, dest, 0, sz)
         
     | 
| 
      
 78 
     | 
    
         
            +
                encoder.endEncoding()
         
     | 
| 
      
 79 
     | 
    
         
            +
                command_buffer.commit()
         
     | 
| 
      
 80 
     | 
    
         
            +
                self.device.mtl_buffers_in_flight.append(command_buffer)
         
     | 
| 
      
 81 
     | 
    
         
            +
              def from_buffer(self, src:memoryview) -> Optional[Any]:
         
     | 
| 
      
 82 
     | 
    
         
            +
                ret = self.device.device.newBufferWithBytesNoCopy_length_options_deallocator_(src, len(src), Metal.MTLResourceStorageModeShared, None)
         
     | 
| 
      
 83 
     | 
    
         
            +
                if ret: self.device.mv_in_metal.append(src)
         
     | 
| 
      
 84 
     | 
    
         
            +
                return ret
         
     | 
| 
      
 85 
     | 
    
         
            +
              def _free(self, opaque:Any, options): opaque.release()
         
     | 
| 
      
 86 
     | 
    
         
            +
              def as_buffer(self, src:Any) -> memoryview:
         
     | 
| 
      
 87 
     | 
    
         
            +
                self.device.synchronize()
         
     | 
| 
      
 88 
     | 
    
         
            +
                return src.contents().as_buffer(src.length())
         
     | 
| 
      
 89 
     | 
    
         
            +
              def copyin(self, dest:Any, src:memoryview): self.as_buffer(dest)[:] = src
         
     | 
| 
      
 90 
     | 
    
         
            +
              def copyout(self, dest:memoryview, src:Any): dest[:] = self.as_buffer(src)
         
     | 
| 
       82 
91 
     | 
    
         | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
       84 
     | 
    
         
            -
               
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
      
 92 
     | 
    
         
            +
            class MetalDevice(Compiled):
         
     | 
| 
      
 93 
     | 
    
         
            +
              def __init__(self, device:str):
         
     | 
| 
      
 94 
     | 
    
         
            +
                self.device = Metal.MTLCreateSystemDefaultDevice()
         
     | 
| 
      
 95 
     | 
    
         
            +
                self.mtl_queue = self.device.newCommandQueueWithMaxCommandBufferCount_(1024)
         
     | 
| 
      
 96 
     | 
    
         
            +
                self.mtl_buffers_in_flight: List[Any] = []
         
     | 
| 
      
 97 
     | 
    
         
            +
                self.mv_in_metal: List[memoryview] = []
         
     | 
| 
      
 98 
     | 
    
         
            +
                self.track_cross_buffer: List[Any] = []
         
     | 
| 
      
 99 
     | 
    
         
            +
                from tinygrad.runtime.graph.metal import MetalGraph
         
     | 
| 
      
 100 
     | 
    
         
            +
                super().__init__(device, MetalAllocator(self), MetalRenderer(), MetalCompiler(None if getenv("METAL_XCODE") else self),
         
     | 
| 
      
 101 
     | 
    
         
            +
                                 functools.partial(MetalProgram, self), MetalGraph)
         
     | 
| 
      
 102 
     | 
    
         
            +
              def synchronize(self):
         
     | 
| 
      
 103 
     | 
    
         
            +
                for cbuf in self.mtl_buffers_in_flight: wait_check(cbuf)
         
     | 
| 
      
 104 
     | 
    
         
            +
                self.mv_in_metal.clear()
         
     | 
| 
      
 105 
     | 
    
         
            +
                self.mtl_buffers_in_flight.clear()
         
     | 
| 
      
 106 
     | 
    
         
            +
                self.track_cross_buffer.clear()
         
     | 
| 
         @@ -0,0 +1,9 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            import numpy as np
         
     | 
| 
      
 2 
     | 
    
         
            +
            from tinygrad.helpers import flat_mv
         
     | 
| 
      
 3 
     | 
    
         
            +
            from tinygrad.device import Compiled, Allocator
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            class NpyAllocator(Allocator):
         
     | 
| 
      
 6 
     | 
    
         
            +
              def copyout(self, dest:memoryview, src:np.ndarray): dest[:] = flat_mv(np.require(src, requirements='C').data)
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            class NpyDevice(Compiled):
         
     | 
| 
      
 9 
     | 
    
         
            +
              def __init__(self, device:str): super().__init__(device, NpyAllocator(), None, None, None)
         
     |