PyPI - tinygrad - Versions diffs - 0.9.2__tar.gz → 0.10.0__tar.gz - Mend

tinygrad 0.9.2tar.gz → 0.10.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (171) hide show

{tinygrad-0.9.2/tinygrad.egg-info → tinygrad-0.10.0}/PKG-INFO RENAMED Viewed

@@ -1,17 +1,14 @@
 Metadata-Version: 2.1
 Name: tinygrad
-Version: 0.9.2
+Version: 0.10.0
 Summary: You like pytorch? You like micrograd? You love tinygrad! <3
 Author: George Hotz
 License: MIT
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
-Requires-Python: >=3.8
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: numpy
-Requires-Dist: pyobjc-framework-Metal; platform_system == "Darwin"
-Requires-Dist: pyobjc-framework-libdispatch; platform_system == "Darwin"
 Provides-Extra: llvm
 Requires-Dist: llvmlite; extra == "llvm"
 Provides-Extra: arm
@@ -20,12 +17,13 @@ Provides-Extra: triton
 Requires-Dist: triton-nightly>=2.1.0.dev20231014192330; extra == "triton"
 Provides-Extra: linting
 Requires-Dist: pylint; extra == "linting"
-Requires-Dist: mypy; extra == "linting"
+Requires-Dist: mypy==1.11.2; extra == "linting"
 Requires-Dist: typing-extensions; extra == "linting"
 Requires-Dist: pre-commit; extra == "linting"
 Requires-Dist: ruff; extra == "linting"
 Requires-Dist: types-tqdm; extra == "linting"
 Provides-Extra: testing
+Requires-Dist: numpy; extra == "testing"
 Requires-Dist: torch; extra == "testing"
 Requires-Dist: pillow; extra == "testing"
 Requires-Dist: pytest; extra == "testing"
@@ -45,6 +43,7 @@ Requires-Dist: networkx; extra == "testing"
 Requires-Dist: hypothesis; extra == "testing"
 Requires-Dist: nibabel; extra == "testing"
 Requires-Dist: bottle; extra == "testing"
+Requires-Dist: ggml-python; extra == "testing"
 Provides-Extra: docs
 Requires-Dist: mkdocs; extra == "docs"
 Requires-Dist: mkdocs-material; extra == "docs"
@@ -52,6 +51,7 @@ Requires-Dist: mkdocstrings[python]; extra == "docs"
 Requires-Dist: markdown-callouts; extra == "docs"
 Requires-Dist: markdown-exec[ansi]; extra == "docs"
 Requires-Dist: black; extra == "docs"
+Requires-Dist: numpy; extra == "docs"
 Provides-Extra: testing-tf
 Requires-Dist: tensorflow==2.15.1; extra == "testing-tf"
 Requires-Dist: tensorflow_addons; extra == "testing-tf"
@@ -145,9 +145,12 @@ tinygrad already supports numerous accelerators, including:
 - [x] [CUDA](tinygrad/runtime/ops_cuda.py)
 - [x] [AMD](tinygrad/runtime/ops_amd.py)
 - [x] [NV](tinygrad/runtime/ops_nv.py)
+- [x] [QCOM](tinygrad/runtime/ops_qcom.py)
 And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
+To check default accelerator run: `python3 -c "from tinygrad import Device; print(Device.DEFAULT)"`
 ## Installation
 The current recommended way to install tinygrad is from source.
@@ -233,4 +236,4 @@ python3 -m pytest test/                 # whole test suite
 #### Process replay tests
-[Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/process_replay.py) compares your PR's generated kernels against master. If your PR is a refactor or speedup without any expected behavior change, It should include [run_process_replay] in the PR title, [example](https://github.com/tinygrad/tinygrad/pull/4995). Note that you should keep your branch up-to-date with master.
+[Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/README.md) compares your PR's generated kernels against master. If your PR is a refactor or speedup without any expected behavior change, It should include [pr] in the pull request title.

{tinygrad-0.9.2 → tinygrad-0.10.0}/README.md RENAMED Viewed

@@ -87,9 +87,12 @@ tinygrad already supports numerous accelerators, including:
 - [x] [CUDA](tinygrad/runtime/ops_cuda.py)
 - [x] [AMD](tinygrad/runtime/ops_amd.py)
 - [x] [NV](tinygrad/runtime/ops_nv.py)
+- [x] [QCOM](tinygrad/runtime/ops_qcom.py)
 And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
+To check default accelerator run: `python3 -c "from tinygrad import Device; print(Device.DEFAULT)"`
 ## Installation
 The current recommended way to install tinygrad is from source.
@@ -175,4 +178,4 @@ python3 -m pytest test/                 # whole test suite
 #### Process replay tests
-[Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/process_replay.py) compares your PR's generated kernels against master. If your PR is a refactor or speedup without any expected behavior change, It should include [run_process_replay] in the PR title, [example](https://github.com/tinygrad/tinygrad/pull/4995). Note that you should keep your branch up-to-date with master.
+[Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/README.md) compares your PR's generated kernels against master. If your PR is a refactor or speedup without any expected behavior change, It should include [pr] in the pull request title.

{tinygrad-0.9.2 → tinygrad-0.10.0}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ with open(directory / 'README.md', encoding='utf-8') as f:
   long_description = f.read()
 setup(name='tinygrad',
-      version='0.9.2',
+      version='0.10.0',
       description='You like pytorch? You like micrograd? You love tinygrad! <3',
       author='George Hotz',
       license='MIT',
@@ -16,28 +16,28 @@ setup(name='tinygrad',
       long_description_content_type='text/markdown',
       packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine',
                   'tinygrad.runtime', 'tinygrad.runtime.support', 'tinygrad.runtime.graph', 'tinygrad.shape'],
+      package_data = {'tinygrad': ['py.typed']},
       classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License"
       ],
-      install_requires=["numpy",
-                        "pyobjc-framework-Metal; platform_system=='Darwin'",
-                        "pyobjc-framework-libdispatch; platform_system=='Darwin'"],
-      python_requires='>=3.8',
+      install_requires=[],
+      python_requires='>=3.10',
       extras_require={
         'llvm': ["llvmlite"],
         'arm': ["unicorn"],
         'triton': ["triton-nightly>=2.1.0.dev20231014192330"],
         'linting': [
             "pylint",
-            "mypy",
+            "mypy==1.11.2",
             "typing-extensions",
             "pre-commit",
             "ruff",
             "types-tqdm",
         ],
-        #'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@4.0.0-rc2"],
+        #'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@4.1.0-rc3"],
         'testing': [
+            "numpy",
             "torch",
             "pillow",
             "pytest",
@@ -57,6 +57,7 @@ setup(name='tinygrad',
             "hypothesis",
             "nibabel",
             "bottle",
+            "ggml-python"
         ],
         'docs': [
             "mkdocs",
@@ -64,7 +65,8 @@ setup(name='tinygrad',
             "mkdocstrings[python]",
             "markdown-callouts",
             "markdown-exec[ansi]",
-            "black"
+            "black",
+            "numpy",
         ],
         'testing_tf': [
             "tensorflow==2.15.1",

{tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_arange.py RENAMED Viewed

@@ -23,16 +23,21 @@ class TestArange(unittest.TestCase):
     np.testing.assert_equal(tt.numpy(), np.arange(N))
     return p.op_estimate
-  def test_complexity(self, opts=None):
+  def test_complexity(self, opts=None, limit=None):
     # add 1 to avoid divide by 0. arange is 0 flops now!
     f1 = self._get_flops(256, opts) + 1
     f2 = self._get_flops(2560, opts) + 1
     print(f"{f1=}, {f2=}")
     assert (f1 < 5000 and f2 < 5000) or (f2 / f1 < 15), f"bad complexity, flops {f2/f1:.1f}X while inputs 10X"
+    if limit is not None and not getenv("PTX"):
+      # PTX counts index ALU in flops
+      assert f1 <= limit, f"{f1=}, {limit=}"
-  def test_complexity_w_upcast(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4)])
-  def test_complexity_w_unroll(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 4)])
-  def test_complexity_w_upcast_and_unroll(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)])
+  def test_complexity_w_upcast(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4)], limit=1)
+  def test_complexity_w_unroll2(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 2)], limit=1)
+  def test_complexity_w_unroll4(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 4)], limit=1)
+  def test_complexity_w_unroll8(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 8)], limit=1)
+  def test_complexity_w_upcast_and_unroll(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)], limit=1)
   @unittest.skip("doesn't work yet")
   def test_complexity_w_local_and_padto(self): return self.test_complexity([Opt(OptOps.LOCAL, 0, 16), Opt(op=OptOps.PADTO, axis=1, amt=32)])
@@ -125,23 +130,30 @@ class TestIndexing(unittest.TestCase):
   @unittest.skip("not ready")
   def test_index_fused_opt(self): self.test_index_fused(0)
+  def test_index_fused_out_of_bounds(self):
+    dataset = Tensor.rand(256, 256).realize()
+    idxs = Tensor([-19238, -257, 256, 495, 10982377]).realize()
+    with Context(NOOPT=1, FUSE_ARANGE=1):
+      X = dataset[idxs]
+      np.testing.assert_equal(X.numpy(), 0)
   @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
-  def test_index_mnist(self, noopt=1):
+  def test_index_mnist(self, noopt=1, op_limit=512*784*5):
     from tinygrad.nn.datasets import mnist
     X_train, Y_train, _, _ = mnist()
     with Context(NOOPT=noopt, FUSE_ARANGE=1, SPLIT_REDUCEOP=0):
+      samples = Tensor.randint(getenv("BS", 512), high=X_train.shape[0]).realize()
       GlobalCounters.reset()
-      samples = Tensor.randint(getenv("BS", 512), high=X_train.shape[0])
       x = X_train[samples].numpy()
       y = Y_train[samples].numpy()
-      assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops} != {4*16384}"
+      assert GlobalCounters.global_ops < op_limit, f"too many ops {GlobalCounters.global_ops} != {op_limit}"
     np.testing.assert_allclose(X_train.numpy()[samples.numpy()], x)
     np.testing.assert_allclose(Y_train.numpy()[samples.numpy()], y)
   @unittest.skip("not ready")
   def test_index_mnist_opt(self): self.test_index_mnist(0)
   @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
-  def test_llama_embedding(self, noopt=1, op_limit=0):
+  def test_llama_embedding(self, noopt=1, op_limit=65536):
     # llama3 is 128256
     vocab_size, embed_size = (10, 3) if CI else (32000, 4096)
     emb = nn.Embedding(vocab_size, embed_size)
@@ -161,7 +173,7 @@ class TestIndexing(unittest.TestCase):
       # TODO: reshape to match torch, should we do this in nn?
       np.testing.assert_allclose(z.numpy().reshape(4, embed_size), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8)
   # at least the arange is being fused
-  def test_llama_embedding_opt(self): self.test_llama_embedding(0, 1736704000)
+  def test_llama_embedding_opt(self): self.test_llama_embedding(0, 1_736_704_000 if CI else 5_898_240_000)
 if __name__ == "__main__":
   unittest.main()

{tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_assign.py RENAMED Viewed

@@ -2,6 +2,7 @@
 import unittest
 import numpy as np
 from tinygrad import dtypes, Tensor, TinyJit, GlobalCounters, Variable
+from tinygrad.engine.schedule import create_schedule
 N = 200  # has to be bigger than the cache to fail
@@ -57,10 +58,12 @@ class TestAssign(unittest.TestCase):
       x.realize()
     x = Tensor([0])
     f(x)
-    assert (out:=x.item()) == 1, f"expected 1, got {out}"
+    out = x.item()
+    assert out == 1, f"expected 1, got {out}"
     x = Tensor([0])
     f(x)
-    assert (out:=x.item()) == 1, f"expected 1, got {out}"
+    out = x.item()
+    assert out == 1, f"expected 1, got {out}"
   def test_assign_add_jit(self):
     @TinyJit
@@ -165,6 +168,16 @@ class TestAssign(unittest.TestCase):
     a += 1
     np.testing.assert_allclose(a.numpy(), 3)
+  # NOTE: this is similar to the resnet failure
+  #@unittest.expectedFailure
+  def test_double_assign_alt(self):
+    a = Tensor.ones(4).contiguous().realize()
+    b = Tensor([1, 2, 3, 4]).realize().lazydata
+    a1 = a.lazydata.assign(b)
+    a2 = a.lazydata.assign(b)
+    sched = create_schedule([a1, a2])
+    self.assertEqual(len(sched), 1)
   def test_crossover_assign(self):
     a = Tensor.full((4,), 2).contiguous().realize()
     b = Tensor.full((4,), 3).contiguous().realize()
@@ -347,7 +360,7 @@ class TestAssign(unittest.TestCase):
   def test_permuted_assignment_masked_view_possible(self):
     a = Tensor.ones(4, 4).contiguous().realize()
-    b = a.shrink((None, (0, 2))).pad((None, (0, 2)), 2)
+    b = a.shrink((None, (0, 2))).pad((None, (0, 2)), value=2)
     a.assign(a + b)
     kc = GlobalCounters.kernel_count
     a.realize()
@@ -357,7 +370,7 @@ class TestAssign(unittest.TestCase):
   def test_permuted_assignment_masked_view_not_contiguous(self):
     a = Tensor.ones(4, 4).contiguous().realize()
     with self.assertRaisesRegex(RuntimeError, "contiguous"):
-      b = a.shrink((None, (0, 2))).pad((None, (0, 2)), 2).permute(1, 0)
+      b = a.shrink((None, (0, 2))).pad((None, (0, 2)), value=2).permute(1, 0)
       a.assign(a + b)
       a.realize()

tinygrad-0.10.0/test/test_compile_failures.py ADDED Viewed

@@ -0,0 +1,18 @@
+import unittest
+from tinygrad import Tensor, dtypes, Device
+from tinygrad.engine.realize import lower_schedule
+from tinygrad.device import is_dtype_supported
+class TestCompileFailures(unittest.TestCase):
+  def compile(self, out:Tensor):
+    for _ in lower_schedule(out.schedule()): pass
+  @unittest.skipUnless(is_dtype_supported(dtypes.uchar, Device.DEFAULT), f"no uint8 on {Device.DEFAULT}")
+  def test_interpolate_atari(self):
+    self.compile(Tensor.empty(210, 160, dtype='uint8').interpolate((64, 64)))
+  def test_add_max_uchar(self):
+    self.compile((Tensor.empty(1024, dtype='uint8') + Tensor.empty(1024, dtype='uint8')).max())
+if __name__ == '__main__':
+  unittest.main()

{tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_const_folding.py RENAMED Viewed

@@ -1,15 +1,15 @@
 import unittest, math
 from tinygrad import Tensor, Device, dtypes
+from tinygrad.ops import Ops
 from tinygrad.engine.schedule import create_schedule
 from tinygrad.helpers import CI
-from tinygrad.ops import MetaOps
 import numpy as np
-from test.helpers import is_dtype_supported
+from tinygrad.device import is_dtype_supported
 def _check_ast_count(desired_count:int, t:Tensor):
   # NOTE: this has side effect because everything can be scheduled only once
   schedule = create_schedule(t.lazydata.lbs)
-  asts = [s for s in schedule if s.ast.op is MetaOps.KERNEL]
+  asts = [s for s in schedule if s.ast.op is Ops.SINK]
   assert len(asts) == desired_count
 class TestUnaryOpsConstFolding(unittest.TestCase):
@@ -23,6 +23,7 @@ class TestUnaryOpsConstFolding(unittest.TestCase):
     _check_ast_count(0, Tensor.ones(4).cast(dtypes.int16))
     _check_ast_count(0, Tensor.full(4, fill_value=-1).cast(dtypes.uint16))
+  @unittest.expectedFailure  # no two level fold at lazybuffer
   def test_neg_folding(self):
     _check_ast_count(0, Tensor([1, 2, 3]).mul(-1).neg())
     _check_ast_count(0, Tensor([1, 2, 3]).neg().mul(-1))
@@ -78,6 +79,11 @@ class TestBinaryOpsConstFolding(unittest.TestCase):
   def test_div_tensor_one(self):
     _check_ast_count(0, Tensor([1.0, 2, 3, 4]) / Tensor.ones(4))
+  def test_idiv_literal_one(self):
+    _check_ast_count(0, Tensor([1, 2, 3, 4]) // 1)
+  def test_idiv_tensor_one(self):
+    _check_ast_count(0, Tensor([1, 2, 3, 4]) // Tensor.ones(4, dtype=dtypes.int32))
   def test_pow_literal_zero(self):
     _check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** 0)
   def test_pow_tensor_zero(self):
@@ -124,13 +130,16 @@ class TestMovedConstFolding(unittest.TestCase):
   def test_cast_padded(self):
     # NOTE: this is folded due to CAST_BEFORE_VIEW
-    _check_ast_count(0, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16))
-    np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16).numpy(), [0, 1, 1, 1, 1, 0])
-    _check_ast_count(0, Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16))
-    np.testing.assert_equal(Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16).numpy(), [0, 65535, 65535, 65535, 65535, 0])
+    if is_dtype_supported(dtypes.int16):
+      _check_ast_count(0, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16))
+      np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16).numpy(), [0, 1, 1, 1, 1, 0])
+    if is_dtype_supported(dtypes.uint16):
+      _check_ast_count(0, Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16))
+      np.testing.assert_equal(Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16).numpy(), [0, 65535, 65535, 65535, 65535, 0])
     # not folded
-    _check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64))
-    np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64).numpy(), [0, 1, 1, 1, 1, 0])
+    if is_dtype_supported(dtypes.int64):
+      _check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64))
+      np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64).numpy(), [0, 1, 1, 1, 1, 0])
 class TestReduceOpsConstFolding(unittest.TestCase):
   def test_const_sum(self):
@@ -145,10 +154,18 @@ class TestReduceOpsConstFolding(unittest.TestCase):
     _check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).sum())
     np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).sum().numpy(), 4)
-    # NOTE: cannot just count the non-padded area because some UnaryOps f do not have f(0) = 0.
+    # NOTE: cannot just count the non-padded area because some Ops f do not have f(0) = 0.
     _check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).exp().sum())
     np.testing.assert_allclose(Tensor.ones(4).pad(((1, 1),)).exp().sum().numpy(), 4 * math.e + 2)
+  def test_const_prod(self):
+    _check_ast_count(0, Tensor.full((2, 3), fill_value=2).prod())
+    np.testing.assert_equal(Tensor.full((2, 3), fill_value=2).prod().numpy(), 2**(2*3))
+    _check_ast_count(0, Tensor.full((4, 5, 6), fill_value=2).prod(axis=0))
+    np.testing.assert_equal(Tensor.full((4, 5, 6), fill_value=2).prod(axis=0).numpy(), np.full((5, 6), 2**4))
+    _check_ast_count(0, Tensor(4).prod())
+    np.testing.assert_equal(Tensor(4).prod().numpy(), 4)
   def test_const_max(self):
     _check_ast_count(0, Tensor.ones(4, 5, 6).max())
     np.testing.assert_equal(Tensor.ones(4, 5, 6).max().numpy(), 1)
@@ -234,7 +251,6 @@ class TestTautologicalCompare(unittest.TestCase):
     np.testing.assert_equal((Tensor(True) < Tensor(False)).numpy(), False)
     np.testing.assert_equal((Tensor(True) < Tensor(True)).numpy(), False)
-  @unittest.skip("not implemented yet")
   def test_a_eq_a(self):
     # self eq is always true for int or bool
     a = Tensor([1, 2, 3])
@@ -244,7 +260,6 @@ class TestTautologicalCompare(unittest.TestCase):
     a = Tensor([math.nan, 1.0, 2.0])
     np.testing.assert_equal((a == a).numpy(), [False, True, True])
-  @unittest.skip("not implemented yet")
   def test_a_ne_a(self):
     # self not eq is always false for int or bool
     a = Tensor([1, 2, 3])

{tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_conv_shapetracker.py RENAMED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import unittest
+from tinygrad.ops import Ops
 from tinygrad.tensor import Tensor
-from tinygrad.ops import MetaOps, BufferOps
 from tinygrad.nn import Conv2d
 from tinygrad.engine.schedule import create_schedule
 from tinygrad.shape.shapetracker import ShapeTracker, View
@@ -11,25 +11,23 @@ from test.unit.test_shapetracker import shapetracker_getitem
 class TestConvShapetracker(unittest.TestCase):
   def test_conv_3x3_one_view(self):
     conv = Conv2d(16, 32, (3, 3))
-    seen = set()
-    # first run to init the weights, they are saved in seen
-    create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata], seen)
+    # first run to init the weights, they are scheduled.
+    create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata])
     # run it again to get the kernels
-    sched = [si for si in create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata], seen) if si.ast.op is MetaOps.KERNEL]
+    sched = [si for si in create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata]) if si.ast.op is Ops.SINK]
     assert len(sched) == 1, f"conv should only have one kernel, getting {len(sched)}"
-    for st in [x.arg.st for x in sched[0].ast.lazyops if x.op is BufferOps.LOAD]:
+    for st in [x.st_arg for x in sched[0].ast.parents if x.op is Ops.LOAD]:
       assert len(st.views) == 1
-  @unittest.expectedFailure
   def test_conv_2x2_backward_one_view(self):
     X = Tensor.rand(1, 1, 3, 3, requires_grad=True)
     conv = Conv2d(1, 1, (2, 2), bias=False)
     conv(X).mean().backward()
     si = X.grad.schedule()[-1]
     print(si)
-    ldb = [x for x in si.ast.lazyops if x.op is BufferOps.LOAD][0]
-    st: ShapeTracker = ldb.arg.st.simplify()
+    ldb = [x for x in si.ast.parents if x.op is Ops.LOAD][0]
+    st: ShapeTracker = ldb.st_arg.simplify()
     # NOTE: st.real_size() is broken
     print(si.inputs[0].size)
     #self.assertEqual(si.inputs[0].size, st.real_size())
@@ -53,11 +51,8 @@ class TestConvShapetracker(unittest.TestCase):
       print(i, i1, i2, si.inputs[0].size, i1==i2)
       #self.assertEqual(i1, i2)
-    for stt in [st, test_st]:
-      s,va = stt.expr_idxs()
-      print(s)
-      print(va)
-    assert len(st.views) <= 2
+    with self.assertRaises(AssertionError):
+      assert len(st.views) <= 2
 if __name__ == '__main__':
   unittest.main()

{tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_copy_speed.py RENAMED Viewed

@@ -4,7 +4,7 @@ from tinygrad import Device
 from tinygrad.helpers import Timing, CI, OSX
 import multiprocessing.shared_memory as shared_memory
-N = 4096 if CI else 16384
+N = 4096
 class TestCopySpeed(unittest.TestCase):
   @classmethod
   def setUpClass(cls): Device[Device.DEFAULT].synchronize()

{tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_device_speed.py RENAMED Viewed

@@ -1,13 +1,12 @@
 import unittest
 from tinygrad import Device
-from tinygrad.codegen.uopgraph import UOpGraph
 from tinygrad.helpers import Timing, Profiling
 class TestDeviceSpeed(unittest.TestCase):
   @classmethod
   def setUpClass(cls):
     cls.dev = Device[Device.DEFAULT]
-    cls.empty = Device[Device.DEFAULT].renderer.render("test", UOpGraph([]))
+    cls.empty = Device[Device.DEFAULT].renderer.render("test", [])
   def test_empty_compile(self):
     with Timing("compiler "):

tinygrad 0.9.2__tar.gz → 0.10.0__tar.gz

tinygrad 0.9.2tar.gz → 0.10.0tar.gz