PyPI - tinygrad - Versions diffs - 0.10.0__py3-none-any.whl → 0.10.1__py3-none-any.whl - Mend

tinygrad 0.10.0py3-none-any.whl → 0.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

tinygrad/codegen/kernel.py +114 -172
tinygrad/codegen/linearize.py +211 -81
tinygrad/codegen/lowerer.py +30 -35
tinygrad/codegen/{uopgraph.py → rewriter.py} +69 -59
tinygrad/codegen/transcendental.py +12 -13
tinygrad/device.py +170 -47
tinygrad/dtype.py +28 -26
tinygrad/engine/jit.py +80 -63
tinygrad/engine/memory.py +4 -5
tinygrad/engine/multi.py +162 -0
tinygrad/engine/realize.py +58 -107
tinygrad/engine/schedule.py +381 -314
tinygrad/engine/search.py +40 -44
tinygrad/gradient.py +70 -0
tinygrad/helpers.py +77 -58
tinygrad/nn/__init__.py +30 -32
tinygrad/nn/datasets.py +1 -2
tinygrad/nn/optim.py +22 -26
tinygrad/nn/state.py +89 -64
tinygrad/ops.py +562 -446
tinygrad/renderer/__init__.py +79 -36
tinygrad/renderer/cstyle.py +70 -84
tinygrad/renderer/llvmir.py +32 -20
tinygrad/renderer/ptx.py +79 -99
tinygrad/renderer/wgsl.py +87 -0
tinygrad/runtime/autogen/amd_gpu.py +39507 -12
tinygrad/runtime/autogen/comgr.py +2 -0
tinygrad/runtime/autogen/kfd.py +4 -3
tinygrad/runtime/autogen/kgsl.py +1 -1
tinygrad/runtime/autogen/libpciaccess.py +2023 -0
tinygrad/runtime/autogen/llvm.py +11379 -0
tinygrad/runtime/autogen/vfio.py +891 -0
tinygrad/runtime/graph/cuda.py +8 -9
tinygrad/runtime/graph/hcq.py +84 -79
tinygrad/runtime/graph/metal.py +19 -21
tinygrad/runtime/ops_amd.py +488 -327
tinygrad/runtime/ops_clang.py +15 -28
tinygrad/runtime/ops_cloud.py +34 -34
tinygrad/runtime/ops_cuda.py +30 -27
tinygrad/runtime/ops_disk.py +62 -63
tinygrad/runtime/ops_dsp.py +129 -38
tinygrad/runtime/ops_gpu.py +30 -30
tinygrad/runtime/ops_hip.py +29 -31
tinygrad/runtime/ops_llvm.py +45 -40
tinygrad/runtime/ops_metal.py +93 -73
tinygrad/runtime/ops_npy.py +2 -2
tinygrad/runtime/ops_nv.py +232 -270
tinygrad/runtime/ops_python.py +51 -46
tinygrad/runtime/ops_qcom.py +129 -157
tinygrad/runtime/ops_webgpu.py +63 -0
tinygrad/runtime/support/allocator.py +94 -0
tinygrad/runtime/support/am/__init__.py +0 -0
tinygrad/runtime/support/am/amdev.py +384 -0
tinygrad/runtime/support/am/ip.py +463 -0
tinygrad/runtime/support/compiler_cuda.py +4 -2
tinygrad/runtime/support/elf.py +26 -4
tinygrad/runtime/support/hcq.py +254 -324
tinygrad/runtime/support/llvm.py +32 -0
tinygrad/shape/shapetracker.py +84 -53
tinygrad/shape/view.py +103 -138
tinygrad/spec.py +154 -0
tinygrad/tensor.py +744 -496
{tinygrad-0.10.0.dist-info → tinygrad-0.10.1.dist-info}/METADATA +32 -21
tinygrad-0.10.1.dist-info/RECORD +86 -0
{tinygrad-0.10.0.dist-info → tinygrad-0.10.1.dist-info}/WHEEL +1 -1
tinygrad/engine/lazy.py +0 -228
tinygrad/function.py +0 -212
tinygrad/multi.py +0 -177
tinygrad/runtime/graph/clang.py +0 -39
tinygrad-0.10.0.dist-info/RECORD +0 -77
{tinygrad-0.10.0.dist-info → tinygrad-0.10.1.dist-info}/LICENSE +0 -0
{tinygrad-0.10.0.dist-info → tinygrad-0.10.1.dist-info}/top_level.txt +0 -0

tinygrad/runtime/ops_metal.py CHANGED Viewed

@@ -1,8 +1,7 @@
-from __future__ import annotations
-import os, subprocess, pathlib, ctypes, tempfile, functools
-from typing import List, Any, Tuple, Optional, cast
-from tinygrad.helpers import prod, getenv, T
-from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator
+import os, pathlib, struct, ctypes, tempfile, functools, decimal
+from typing import Any, Union, cast
+from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, T, init_c_struct_t, PROFILE
+from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator, cpu_profile, ProfileDeviceEvent, ProfileRangeEvent
 from tinygrad.renderer.cstyle import MetalRenderer
 class objc_id(ctypes.c_void_p): # This prevents ctypes from converting response to plain int, and dict.fromkeys() can use it to dedup
@@ -22,14 +21,19 @@ class MTLResourceOptions:
 class MTLPipelineOption:
   MTLPipelineOptionNone = 0
+# 13 is requestType that metal uses to compile source code into MTLB, there aren't any docs or symbols.
+REQUEST_TYPE_COMPILE = 13
 libobjc = ctypes.CDLL("/usr/lib/libobjc.dylib")
 libmetal = ctypes.CDLL("/System/Library/Frameworks/Metal.framework/Metal")
+compiler = ctypes.CDLL("/System/Library/PrivateFrameworks/MTLCompiler.framework/MTLCompiler")
 # Must be loaded for default Metal Device: https://developer.apple.com/documentation/metal/1433401-mtlcreatesystemdefaultdevice?language=objc
 ctypes.CDLL("/System/Library/Frameworks/CoreGraphics.framework/CoreGraphics")
 libdispatch = ctypes.CDLL("/usr/lib/libSystem.dylib") # libdispatch is part of libSystem on mac
 libobjc.objc_getClass.restype = objc_id
 libobjc.sel_registerName.restype = objc_id
 libmetal.MTLCreateSystemDefaultDevice.restype = objc_instance
+compiler.MTLCodeGenServiceCreate.restype = ctypes.c_void_p
 libdispatch.dispatch_data_create.restype = objc_instance
 # Ignore mypy error reporting incompatible default, because typevar default only works on python 3.12
@@ -39,46 +43,83 @@ def msg(ptr: objc_id, selector: str, /, *args: Any, restype: type[T] = objc_id)
   return sender(ptr, sel(selector), *args)
 def to_ns_str(s: str): return msg(libobjc.objc_getClass(b"NSString"), "stringWithUTF8String:", s.encode(), restype=objc_instance)
+def from_ns_str(s): return bytes(msg(s, "UTF8String", restype=ctypes.c_char_p)).decode()
-def to_struct(*t: int, _type: type = ctypes.c_ulong):
-  class Struct(ctypes.Structure): pass
-  Struct._fields_ = [(f"field{i}", _type) for i in range(len(t))]
-  return Struct(*t)
+def to_struct(*t: int, _type: type = ctypes.c_ulong): return init_c_struct_t(tuple([(f"field{i}", _type) for i in range(len(t))]))(*t)
 def wait_check(cbuf: Any):
   msg(cbuf, "waitUntilCompleted")
   error_check(msg(cbuf, "error", restype=objc_instance))
-def elapsed_time(cbuf: objc_id):
-  return cast(float, msg(cbuf, "GPUEndTime", restype=ctypes.c_double)) - cast(float, msg(cbuf, "GPUStartTime", restype=ctypes.c_double))
+def cmdbuf_label(cbuf: objc_id) -> str|None: return from_ns_str(label) if (label:=msg(cbuf, "label", restype=objc_id)).value is not None else None
+def cmdbuf_st_time(cbuf: objc_id) -> float: return cast(float, msg(cbuf, "GPUStartTime", restype=ctypes.c_double))
+def cmdbuf_en_time(cbuf: objc_id) -> float: return cast(float, msg(cbuf, "GPUEndTime", restype=ctypes.c_double))
 def error_check(error: objc_instance, error_constructor: type[Exception] = RuntimeError):
   if error.value is None: return None
-  raise error_constructor(bytes(msg(msg(error, "localizedDescription", restype=objc_instance), "UTF8String", restype=ctypes.c_char_p)).decode())
+  raise error_constructor(from_ns_str(msg(error, "localizedDescription", restype=objc_instance)))
+class MetalDevice(Compiled):
+  def __init__(self, device:str):
+    self.sysdevice = libmetal.MTLCreateSystemDefaultDevice()
+    self.mtl_queue = msg(self.sysdevice, "newCommandQueueWithMaxCommandBufferCount:", 1024, restype=objc_instance)
+    if self.mtl_queue is None: raise RuntimeError("Cannot allocate a new command queue")
+    self.mtl_buffers_in_flight: list[Any] = []
+    self.timeline_signal = msg(self.sysdevice, "newSharedEvent", restype=objc_instance)
+    self.timeline_value = 0
+    Compiled.profile_events += [ProfileDeviceEvent(device)]
+    from tinygrad.runtime.graph.metal import MetalGraph
+    super().__init__(device, MetalAllocator(self), MetalRenderer(), MetalCompiler() if getenv("METAL_DIRECT", 1) else Compiler(),
+                     functools.partial(MetalProgram, self), MetalGraph)
+  def synchronize(self):
+    for cbuf in self.mtl_buffers_in_flight:
+      wait_check(cbuf)
+      st, en = decimal.Decimal(cmdbuf_st_time(cbuf)) * 1000000, decimal.Decimal(cmdbuf_en_time(cbuf)) * 1000000
+      if PROFILE and (lb:=cmdbuf_label(cbuf)) is not None:
+        Compiled.profile_events += [ProfileRangeEvent(self.device, lb, st, en, is_copy=lb.startswith("COPY"))]
+    self.mtl_buffers_in_flight.clear()
 def metal_src_to_library(device:MetalDevice, src:str) -> objc_instance:
   options = msg(libobjc.objc_getClass(b"MTLCompileOptions"), "new", restype=objc_instance)
   msg(options, "setFastMathEnabled:", getenv("METAL_FAST_MATH"))
-  library = msg(device.device, "newLibraryWithSource:options:error:", to_ns_str(src), options,
+  library = msg(device.sysdevice, "newLibraryWithSource:options:error:", to_ns_str(src), options,
                 ctypes.byref(compileError:=objc_instance()), restype=objc_instance)
   error_check(compileError, CompileError)
   return library
 class MetalCompiler(Compiler):
-  def __init__(self, device:Optional[MetalDevice]=None):
-    self.device = device
-    super().__init__("compile_metal_xcode" if self.device is None else "compile_metal")
+  def __init__(self):
+    self.cgs = ctypes.c_void_p(compiler.MTLCodeGenServiceCreate(b"tinygrad"))
+    super().__init__("compile_metal_direct")
+  def __reduce__(self): return (MetalCompiler,()) # force pickle to create new instance for each multiprocessing fork
   def compile(self, src:str) -> bytes:
-    if self.device is None:
-      # NOTE: if you run llvm-dis on "air" you can see the llvm bytecode
-      air = subprocess.check_output(['xcrun', '-sdk', 'macosx', 'metal', '-x', 'metal', '-c', '-', '-o', '-'], input=src.encode('utf-8'))
-      lib = subprocess.check_output(['xcrun', '-sdk', 'macosx', 'metallib', '-', '-o', '-'], input=air)
-    else:
-      library = metal_src_to_library(self.device, src)
-      library_contents = msg(library, "libraryDataContents", restype=objc_instance)
-      lib = ctypes.string_at(msg(library_contents, "bytes"), cast(int, msg(library_contents, "length", restype=ctypes.c_ulong)))
-    assert lib[:4] == b"MTLB", "Invalid Metal library. Using conda? Corrupt XCode?"
-    return lib
+    ret: Union[Exception, bytes] = CompileError("MTLCodeGenServiceBuildRequest returned without calling the callback")
+    @ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_int32, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_char_p)
+    def callback(blockptr, error, dataPtr, dataLen, errorMessage):
+      nonlocal ret
+      if error == 0:
+        reply = bytes(to_mv(dataPtr, dataLen))
+        # offset from beginning to data = header size + warning size
+        ret = reply[sum(struct.unpack('<LL', reply[8:16])):]
+      else:
+        ret = CompileError(errorMessage.decode())
+    # llvm will create modules.timestamp in cache path and cache compilation of metal stdlib (250ms => 8ms compilation time)
+    # note that llvm won't necessarily create anything else here as apple has prebuilt versions of many standard libraries
+    params = f'-fno-fast-math -std=metal3.1 --driver-mode=metal -x metal -fmodules-cache-path="{cache_dir}" -fno-caret-diagnostics'
+    # source blob has to be padded to multiple of 4 but at least one 'b\x00' should be added, params blob just has to be null terminated
+    src_padded, params_padded = src.encode() + b'\x00'*(round_up(len(src) + 1, 4) - len(src)), params.encode() + b'\x00'
+    request = struct.pack('<QQ', len(src_padded), len(params_padded)) + src_padded + params_padded
+    # The callback is actually not a callback but a block which is apple's non-standard extension to add closures to C.
+    # See https://clang.llvm.org/docs/Block-ABI-Apple.html#high-level for struct layout.
+    # Fields other than invoke are unused in this case so we can just use ctypes.byref with negative offset to invoke field, add blockptr as a first
+    # argument and pretend it's a normal callback
+    compiler.MTLCodeGenServiceBuildRequest(self.cgs, None, REQUEST_TYPE_COMPILE, request, len(request), ctypes.byref(callback, -0x10))
+    if isinstance(ret, Exception): raise ret
+    assert ret[:4] == b"MTLB" and ret[-4:] == b"ENDT", f"Invalid Metal library. {ret!r}"
+    return ret
   def disassemble(self, lib:bytes):
     with tempfile.NamedTemporaryFile(delete=True) as shader:
       shader.write(lib)
@@ -87,59 +128,60 @@ class MetalCompiler(Compiler):
       if ret: print("Disassembler Error: Make sure you have https://github.com/dougallj/applegpu cloned to tinygrad/extra/disassemblers/applegpu")
 class MetalProgram:
-  def __init__(self, device:MetalDevice, name:str, lib:bytes):
-    self.device, self.name, self.lib = device, name, lib
+  def __init__(self, dev:MetalDevice, name:str, lib:bytes):
+    self.dev, self.name, self.lib = dev, name, lib
     if lib[:4] == b"MTLB":
       # binary metal library
       data = libdispatch.dispatch_data_create(lib, len(lib), None, None)
-      error_library_creation = objc_instance()
-      self.library = msg(self.device.device, "newLibraryWithData:error:", data, ctypes.byref(error_library_creation), restype=objc_instance)
-      error_check(error_library_creation)
+      self.library = msg(self.dev.sysdevice, "newLibraryWithData:error:", data, ctypes.byref(error_lib:=objc_instance()), restype=objc_instance)
+      error_check(error_lib)
     else:
       # metal source. rely on OS caching
-      try: self.library = metal_src_to_library(self.device, lib.decode())
+      try: self.library = metal_src_to_library(self.dev, lib.decode())
       except CompileError as e: raise RuntimeError from e
     self.fxn = msg(self.library, "newFunctionWithName:", to_ns_str(name), restype=objc_instance)
     descriptor = msg(libobjc.objc_getClass(b"MTLComputePipelineDescriptor"), "new", restype=objc_instance)
     msg(descriptor, "setComputeFunction:", self.fxn)
     msg(descriptor, "setSupportIndirectCommandBuffers:", True)
-    self.pipeline_state = msg(self.device.device, "newComputePipelineStateWithDescriptor:options:reflection:error:",
+    self.pipeline_state = msg(self.dev.sysdevice, "newComputePipelineStateWithDescriptor:options:reflection:error:",
       descriptor, MTLPipelineOption.MTLPipelineOptionNone, None, ctypes.byref(error_pipeline_creation:=objc_instance()), restype=objc_instance)
     error_check(error_pipeline_creation)
-  def __call__(self, *bufs, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1), vals:Tuple[int, ...]=(), wait=False):
+  def __call__(self, *bufs, global_size:tuple[int,int,int]=(1,1,1), local_size:tuple[int,int,int]=(1,1,1), vals:tuple[int, ...]=(), wait=False):
     max_total_threads = msg(self.pipeline_state, "maxTotalThreadsPerThreadgroup", restype=ctypes.c_ulong)
     if prod(local_size) > cast(int, max_total_threads):
       exec_width = msg(self.pipeline_state, "threadExecutionWidth", restype=ctypes.c_ulong)
       memory_length = msg(self.pipeline_state, "staticThreadgroupMemoryLength", restype=ctypes.c_ulong)
       raise RuntimeError(f"local size {local_size} bigger than {max_total_threads} with exec width {exec_width} memory length {memory_length}")
-    command_buffer = msg(self.device.mtl_queue, "commandBuffer", restype=objc_instance)
+    command_buffer = msg(self.dev.mtl_queue, "commandBuffer", restype=objc_instance)
     encoder = msg(command_buffer, "computeCommandEncoder", restype=objc_instance)
     msg(encoder, "setComputePipelineState:", self.pipeline_state)
     for i,a in enumerate(bufs): msg(encoder, "setBuffer:offset:atIndex:", a.buf, a.offset, i)
-    for i,a in enumerate(vals,start=len(bufs)): msg(encoder, "setBytes:length:atIndex:", bytes(ctypes.c_int(a)), 4, i)
+    for i,a in enumerate(vals, start=len(bufs)): msg(encoder, "setBytes:length:atIndex:", bytes(ctypes.c_int(a)), 4, i)
     msg(encoder, "dispatchThreadgroups:threadsPerThreadgroup:", to_struct(*global_size), to_struct(*local_size))
     msg(encoder, "endEncoding")
+    msg(command_buffer, "setLabel:", to_ns_str(self.name))
     msg(command_buffer, "commit")
+    self.dev.mtl_buffers_in_flight.append(command_buffer)
     if wait:
       wait_check(command_buffer)
-      return elapsed_time(command_buffer)
-    self.device.mtl_buffers_in_flight.append(command_buffer)
+      return cmdbuf_en_time(command_buffer) - cmdbuf_st_time(command_buffer)
 class MetalBuffer:
   def __init__(self, buf:Any, size:int, offset=0): self.buf, self.size, self.offset = buf, size, offset
 class MetalAllocator(LRUAllocator):
-  def __init__(self, device:MetalDevice):
-    self.device:MetalDevice = device
+  def __init__(self, dev:MetalDevice):
+    self.dev:MetalDevice = dev
     super().__init__()
   def _alloc(self, size:int, options) -> MetalBuffer:
     # Buffer is explicitly released in _free() rather than garbage collected via reference count
-    ret = msg(self.device.device, "newBufferWithLength:options:", size, MTLResourceOptions.MTLResourceStorageModeShared, restype=objc_id)
+    ret = msg(self.dev.sysdevice, "newBufferWithLength:options:", ctypes.c_ulong(size), MTLResourceOptions.MTLResourceStorageModeShared,
+              restype=objc_id)
     if ret.value is None: raise MemoryError(f"Metal OOM while allocating {size=}")
     return MetalBuffer(ret, size)
   def _free(self, opaque:MetalBuffer, options): msg(opaque.buf, "release")
-  def transfer(self, dest:MetalBuffer, src:MetalBuffer, sz:int, src_dev:MetalDevice, dest_dev:MetalDevice):
+  def _transfer(self, dest:MetalBuffer, src:MetalBuffer, sz:int, src_dev:MetalDevice, dest_dev:MetalDevice):
     dest_dev.synchronize()
     src_command_buffer = msg(src_dev.mtl_queue, "commandBuffer", restype=objc_instance)
     encoder = msg(src_command_buffer, "blitCommandEncoder", restype=objc_instance)
@@ -153,36 +195,14 @@ class MetalAllocator(LRUAllocator):
       msg(dest_command_buffer, "commit")
       dest_dev.mtl_buffers_in_flight.append(dest_command_buffer)
       src_dev.timeline_value += 1
+    msg(src_command_buffer, "setLabel:", to_ns_str(f"COPY {src_dev.device} -> {dest_dev.device}"))
     msg(src_command_buffer, "commit")
     src_dev.mtl_buffers_in_flight.append(src_command_buffer)
-  def from_buffer(self, src:memoryview) -> Optional[Any]:
-    ptr = (ctypes.c_char * src.nbytes).from_buffer(src)
-    ret = msg(self.device.device, "newBufferWithBytesNoCopy:length:options:deallocator:", ptr, src.nbytes, 0, None, restype=objc_instance)
-    if ret: self.device.mv_in_metal.append(src)
-    return MetalBuffer(ret, src.nbytes)
-  def as_buffer(self, src:MetalBuffer) -> memoryview:
-    self.device.synchronize()
-    ptr = msg(src.buf, "contents", restype=objc_id) # Shared memory, do not release here
-    array = (ctypes.c_char * (src.offset + src.size)).from_address(ptr.value)
-    return memoryview(array).cast("B")[src.offset:]
-  def copyin(self, dest:MetalBuffer, src:memoryview): self.as_buffer(dest)[:] = src
-  def copyout(self, dest:memoryview, src:MetalBuffer): dest[:] = self.as_buffer(src)
-  def offset(self, buf:MetalBuffer, size:int, offset:int): return MetalBuffer(buf.buf, size, offset)
-class MetalDevice(Compiled):
-  def __init__(self, device:str):
-    self.device = libmetal.MTLCreateSystemDefaultDevice()
-    self.mtl_queue = msg(self.device, "newCommandQueueWithMaxCommandBufferCount:", 1024, restype=objc_instance)
-    if self.mtl_queue is None: raise RuntimeError("Cannot allocate a new command queue")
-    self.mtl_buffers_in_flight: List[Any] = []
-    self.mv_in_metal: List[memoryview] = []
-    self.timeline_signal = msg(self.device, "newSharedEvent", restype=objc_instance)
-    self.timeline_value = 0
-    from tinygrad.runtime.graph.metal import MetalGraph
-    super().__init__(device, MetalAllocator(self), MetalRenderer(), MetalCompiler() if getenv("METAL_XCODE") else Compiler(),
-                     functools.partial(MetalProgram, self), MetalGraph)
-  def synchronize(self):
-    for cbuf in self.mtl_buffers_in_flight: wait_check(cbuf)
-    self.mv_in_metal.clear()
-    self.mtl_buffers_in_flight.clear()
+  def _cp_mv(self, dst, src, prof_desc):
+    with cpu_profile(prof_desc, self.dev.device, is_copy=True): dst[:] = src
+  def _as_buffer(self, src:MetalBuffer) -> memoryview:
+    self.dev.synchronize()
+    return to_mv(cast(int, msg(src.buf, "contents", restype=objc_id).value), src.size + src.offset)[src.offset:]
+  def _copyin(self, dest:MetalBuffer, src:memoryview): self._cp_mv(self._as_buffer(dest), src, "CPU -> METAL")
+  def _copyout(self, dest:memoryview, src:MetalBuffer): self._cp_mv(dest, self._as_buffer(src), "METAL -> CPU")
+  def _offset(self, buf:MetalBuffer, size:int, offset:int): return MetalBuffer(buf.buf, size, offset)

tinygrad/runtime/ops_npy.py CHANGED Viewed

@@ -2,8 +2,8 @@ import numpy as np
 from tinygrad.helpers import flat_mv
 from tinygrad.device import Compiled, Allocator
-class NpyAllocator(Allocator):  # pylint: disable=abstract-method
-  def copyout(self, dest:memoryview, src:np.ndarray): dest[:] = flat_mv(np.require(src, requirements='C').data)
+class NpyAllocator(Allocator):
+  def _copyout(self, dest:memoryview, src:np.ndarray): dest[:] = flat_mv(np.require(src, requirements='C').data)
 class NpyDevice(Compiled):
   def __init__(self, device:str): super().__init__(device, NpyAllocator(), None, None, None)

tinygrad 0.10.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

tinygrad 0.10.0py3-none-any.whl → 0.10.1py3-none-any.whl