PyPI - tinygrad - Versions diffs - 0.10.1__tar.gz → 0.10.2__tar.gz - Mend

tinygrad 0.10.1tar.gz → 0.10.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

{tinygrad-0.10.1 → tinygrad-0.10.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: tinygrad
-Version: 0.10.1
+Version: 0.10.2
 Summary: You like pytorch? You like micrograd? You love tinygrad! <3
 Author: George Hotz
 License: MIT
@@ -20,13 +20,28 @@ Requires-Dist: typing-extensions; extra == "linting"
 Requires-Dist: pre-commit; extra == "linting"
 Requires-Dist: ruff; extra == "linting"
 Requires-Dist: types-tqdm; extra == "linting"
+Provides-Extra: testing-minimal
+Requires-Dist: numpy; extra == "testing-minimal"
+Requires-Dist: torch; extra == "testing-minimal"
+Requires-Dist: pytest; extra == "testing-minimal"
+Requires-Dist: pytest-xdist; extra == "testing-minimal"
+Requires-Dist: hypothesis; extra == "testing-minimal"
+Provides-Extra: testing-unit
+Requires-Dist: numpy; extra == "testing-unit"
+Requires-Dist: torch; extra == "testing-unit"
+Requires-Dist: pytest; extra == "testing-unit"
+Requires-Dist: pytest-xdist; extra == "testing-unit"
+Requires-Dist: hypothesis; extra == "testing-unit"
+Requires-Dist: tqdm; extra == "testing-unit"
+Requires-Dist: safetensors; extra == "testing-unit"
+Requires-Dist: tabulate; extra == "testing-unit"
 Provides-Extra: testing
 Requires-Dist: numpy; extra == "testing"
 Requires-Dist: torch; extra == "testing"
-Requires-Dist: jax; extra == "testing"
-Requires-Dist: pillow; extra == "testing"
 Requires-Dist: pytest; extra == "testing"
 Requires-Dist: pytest-xdist; extra == "testing"
+Requires-Dist: hypothesis; extra == "testing"
+Requires-Dist: pillow; extra == "testing"
 Requires-Dist: onnx==1.16.0; extra == "testing"
 Requires-Dist: onnx2torch; extra == "testing"
 Requires-Dist: opencv-python; extra == "testing"
@@ -39,13 +54,10 @@ Requires-Dist: tiktoken; extra == "testing"
 Requires-Dist: blobfile; extra == "testing"
 Requires-Dist: librosa; extra == "testing"
 Requires-Dist: networkx; extra == "testing"
-Requires-Dist: hypothesis; extra == "testing"
 Requires-Dist: nibabel; extra == "testing"
 Requires-Dist: bottle; extra == "testing"
 Requires-Dist: ggml-python; extra == "testing"
 Requires-Dist: capstone; extra == "testing"
-Provides-Extra: webgpu
-Requires-Dist: wgpu; extra == "webgpu"
 Provides-Extra: docs
 Requires-Dist: mkdocs; extra == "docs"
 Requires-Dist: mkdocs-material; extra == "docs"
@@ -149,7 +161,7 @@ See [examples/beautiful_mnist.py](examples/beautiful_mnist.py) for the full vers
 tinygrad already supports numerous accelerators, including:
 - [x] [GPU (OpenCL)](tinygrad/runtime/ops_gpu.py)
-- [x] [CLANG (C Code)](tinygrad/runtime/ops_clang.py)
+- [x] [CPU (C Code)](tinygrad/runtime/ops_cpu.py)
 - [x] [LLVM](tinygrad/runtime/ops_llvm.py)
 - [x] [METAL](tinygrad/runtime/ops_metal.py)
 - [x] [CUDA](tinygrad/runtime/ops_cuda.py)
@@ -219,7 +231,7 @@ We'll start with what will get your PR closed with a pointer to this section:
 - No code golf! While low line count is a guiding light of this project, anything that remotely looks like code golf will be closed. The true goal is reducing complexity and increasing readability, and deleting `\n`s does nothing to help with that.
 - All docs and whitespace changes will be closed unless you are a well-known contributor. The people writing the docs should be those who know the codebase the absolute best. People who have not demonstrated that shouldn't be messing with docs. Whitespace changes are both useless *and* carry a risk of introducing bugs.
-- Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with maintainablity and readablity.
+- Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with maintainability and readability.
 - In general, the code outside the core `tinygrad/` folder is not well tested, so unless the current code there is broken, you shouldn't be changing it.
 - If your PR looks "complex", is a big diff, or adds lots of lines, it won't be reviewed or merged. Consider breaking it up into smaller PRs that are individually clear wins. A common pattern I see is prerequisite refactors before adding new functionality. If you can (cleanly) refactor to the point that the feature is a 3 line change, this is great, and something easy for us to review.

{tinygrad-0.10.1 → tinygrad-0.10.2}/README.md RENAMED Viewed

@@ -81,7 +81,7 @@ See [examples/beautiful_mnist.py](examples/beautiful_mnist.py) for the full vers
 tinygrad already supports numerous accelerators, including:
 - [x] [GPU (OpenCL)](tinygrad/runtime/ops_gpu.py)
-- [x] [CLANG (C Code)](tinygrad/runtime/ops_clang.py)
+- [x] [CPU (C Code)](tinygrad/runtime/ops_cpu.py)
 - [x] [LLVM](tinygrad/runtime/ops_llvm.py)
 - [x] [METAL](tinygrad/runtime/ops_metal.py)
 - [x] [CUDA](tinygrad/runtime/ops_cuda.py)
@@ -151,7 +151,7 @@ We'll start with what will get your PR closed with a pointer to this section:
 - No code golf! While low line count is a guiding light of this project, anything that remotely looks like code golf will be closed. The true goal is reducing complexity and increasing readability, and deleting `\n`s does nothing to help with that.
 - All docs and whitespace changes will be closed unless you are a well-known contributor. The people writing the docs should be those who know the codebase the absolute best. People who have not demonstrated that shouldn't be messing with docs. Whitespace changes are both useless *and* carry a risk of introducing bugs.
-- Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with maintainablity and readablity.
+- Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with maintainability and readability.
 - In general, the code outside the core `tinygrad/` folder is not well tested, so unless the current code there is broken, you shouldn't be changing it.
 - If your PR looks "complex", is a big diff, or adds lots of lines, it won't be reviewed or merged. Consider breaking it up into smaller PRs that are individually clear wins. A common pattern I see is prerequisite refactors before adding new functionality. If you can (cleanly) refactor to the point that the feature is a 3 line change, this is great, and something easy for us to review.

{tinygrad-0.10.1 → tinygrad-0.10.2}/setup.py RENAMED Viewed

@@ -7,16 +7,24 @@ directory = Path(__file__).resolve().parent
 with open(directory / 'README.md', encoding='utf-8') as f:
   long_description = f.read()
+testing_minimal = [
+  "numpy",
+  "torch",
+  "pytest",
+  "pytest-xdist",
+  "hypothesis",
+]
 setup(name='tinygrad',
-      version='0.10.1',
+      version='0.10.2',
       description='You like pytorch? You like micrograd? You love tinygrad! <3',
       author='George Hotz',
       license='MIT',
       long_description=long_description,
       long_description_content_type='text/markdown',
-      packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine',
+      packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine', 'tinygrad.viz',
                   'tinygrad.runtime', 'tinygrad.runtime.support', 'tinygrad.runtime.support.am', 'tinygrad.runtime.graph', 'tinygrad.shape'],
-      package_data = {'tinygrad': ['py.typed']},
+      package_data = {'tinygrad': ['py.typed'], 'tinygrad.viz': ['index.html', 'perfetto.html', 'assets/**/*']},
       classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License"
@@ -35,13 +43,14 @@ setup(name='tinygrad',
             "types-tqdm",
         ],
         #'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@4.1.0-rc3"],
-        'testing': [
-            "numpy",
-            "torch",
-            "jax",
+        'testing_minimal': testing_minimal,
+        'testing_unit': testing_minimal + [
+            "tqdm",
+            "safetensors",
+            "tabulate"  # for sz.py
+        ],
+        'testing': testing_minimal + [
             "pillow",
-            "pytest",
-            "pytest-xdist",
             "onnx==1.16.0",
             "onnx2torch",
             "opencv-python",
@@ -54,13 +63,11 @@ setup(name='tinygrad',
             "blobfile",
             "librosa",
             "networkx",
-            "hypothesis",
             "nibabel",
             "bottle",
             "ggml-python",
             "capstone"
         ],
-        'webgpu': ["wgpu"],
         'docs': [
             "mkdocs",
             "mkdocs-material",
@@ -73,6 +80,6 @@ setup(name='tinygrad',
         'testing_tf': [
             "tensorflow==2.15.1",
             "tensorflow_addons",
-        ]
+        ],
       },
       include_package_data=True)

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_arange.py RENAMED Viewed

@@ -66,20 +66,17 @@ class TestArange(unittest.TestCase):
     return self.test_all_opts([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)], [Opt(op=OptOps.GROUP, axis=0, arg=0)])
 class TestIndexing(unittest.TestCase):
-  # update: passing after CAST_BEFORE_VIEW=1 deletion
-  # @unittest.expectedFailure
   def test_arange_2_reduce(self):
     needle = Tensor.zeros(16384, dtype=dtypes.int).contiguous()
     needle[1337] = 1
     needle.realize()
     with Context(NOOPT=1, FUSE_ARANGE=1):
       GlobalCounters.reset()
-      # TODO: it should work without these reshapes
-      out = ((Tensor.arange(1,16385).reshape(16384,1)-1)*needle.reshape(16384,1)).sum()
+      out = ((Tensor.arange(1,16385)-1)*needle).sum()
       sched = out.schedule()
-      assert len(sched) == 1
+      self.assertEqual(len(sched), 1)
       run_schedule(sched)
-    assert out.item() == 1337, f"expected 1337, got {out.item()}"
+    self.assertEqual(out.item(), 1337)
   @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
   def test_manual_index(self):
@@ -95,7 +92,7 @@ class TestIndexing(unittest.TestCase):
       full = (rng==idxs).where(reshape_dataset, Tensor.zeros(4, 256, 16384, 1))
       X = full.sum(axis=(2,3))
       sched = X.schedule()
-      assert len(sched) == 1
+      self.assertEqual(len(sched), 1)
       run_schedule(sched)
       assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops}"
     np.testing.assert_allclose(real_index, X.numpy())
@@ -111,7 +108,7 @@ class TestIndexing(unittest.TestCase):
       assert X.shape == (4,256)
       sched = X.schedule()
       # TODO: enable these asserts when the scheduler can handle this
-      #assert len(sched) == 1, f"{len(sched)} != 1"
+      #self.assertEqual(len(sched), 1)
       run_schedule(sched)
       #assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops}"
     np.testing.assert_allclose(real_index, X.numpy())
@@ -126,7 +123,7 @@ class TestIndexing(unittest.TestCase):
       X = dataset[idxs]
       assert X.shape == (4,256)
       sched = X.schedule()
-      assert len(sched) == 2
+      self.assertEqual(len(sched), 2)
       run_schedule(sched)
       assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops} != {4*16384}"
     np.testing.assert_allclose(real_index, X.numpy())

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_assign.py RENAMED Viewed

@@ -203,6 +203,7 @@ class TestAssign(unittest.TestCase):
     np.testing.assert_equal(b0.numpy(), 128)
     np.testing.assert_equal(b1.numpy(), 608)
+  @unittest.skip("TODO: bring this assert back")
   def test_crossunder_assign(self):
     # NOTE: should *not* raise AssertionError from numpy
     with self.assertRaisesRegex(RuntimeError, "cycle"):

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_const_folding.py RENAMED Viewed

@@ -1,7 +1,10 @@
-import unittest, math
+import unittest, itertools, math
+from typing import Any
 from tinygrad import Tensor, Device, dtypes
-from tinygrad.ops import Ops
+from tinygrad.dtype import DType
+from tinygrad.ops import Ops, UOp
 from tinygrad.helpers import CI
+from tinygrad.codegen.devectorizer import full_graph_rewrite
 import numpy as np
 from tinygrad.device import is_dtype_supported
@@ -94,18 +97,50 @@ class TestBinaryOpsConstFolding(unittest.TestCase):
     _check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** Tensor.ones(4))
   def test_literal_one_pow(self):
     _check_ast_count(0, 1 ** Tensor([1.0, 2, 3, 4]))
-  # TODO: pow simplification
   def test_tensor_one_pow(self):
-    _check_ast_count(1, Tensor.ones(4) ** Tensor([1.0, 2, 3, 4]))
+    _check_ast_count(0, Tensor.ones(4) ** Tensor([1.0, 2, 3, 4]))
+class TestBitcastConstFolding(unittest.TestCase):
+  def test_scalar_bitcast(self):
+    def t(cases: dict[DType, Any]):
+      for (from_dt, from_v), (to_dt, to_v) in itertools.product(cases.items(), cases.items()):
+        if not math.isnan(from_v):
+          r = full_graph_rewrite(UOp.const(from_dt, from_v).bitcast(to_dt).sink()).src[0]
+          self.assertEqual(r.op, Ops.CONST, msg:=f"{from_dt} -> {to_dt} ({from_v} -> {to_v})")
+          self.assertEqual(r.dtype, to_dt, msg)
+          np.testing.assert_equal(r.arg, to_v, msg)
+    t({dtypes.int8: 0, dtypes.uint8: 0, dtypes.bool: False})
+    t({dtypes.int8: 1, dtypes.uint8: 1, dtypes.bool: True})
+    t({dtypes.int8:  -1, dtypes.uint8:  2**8-1})
+    t({dtypes.int16: -1, dtypes.uint16: 2**16-1, dtypes.float16: float('nan')})
+    t({dtypes.int32: -1, dtypes.uint32: 2**32-1, dtypes.float32: float('nan')})
+    t({dtypes.int64: -1, dtypes.uint64: 2**64-1, dtypes.float64: float('nan')})
+    t({dtypes.int8:  -2**7,  dtypes.uint8:  2**7})
+    t({dtypes.int16: -2**15, dtypes.uint16: 2**15})
+    t({dtypes.int32: -2**31, dtypes.uint32: 2**31})
+    t({dtypes.int64: -2**63, dtypes.uint64: 2**63})
+    t({dtypes.int16: 13496, dtypes.uint16: 13496, dtypes.float16: 0.294921875})
+    t({dtypes.int32: 1050081145, dtypes.uint32: 1050081145, dtypes.float32: 0.29485681653022766})
+    t({dtypes.int64: 4598983288165178391, dtypes.uint64: 4598983288165178391, dtypes.float64: 0.29485681936461233})
+  def test_vec_bitcast(self):
+    r = full_graph_rewrite(UOp.const(dtypes.int32.vec(3), (-1, -2**31, 75)).bitcast(dtypes.uint32.vec(3)).sink()).src[0]
+    self.assertEqual(r.op, Ops.VECTORIZE)
+    self.assertEqual(r.dtype, dtypes.uint32.vec(3))
+    self.assertEqual(tuple(x.arg for x in r.src), (2**32-1, 2**31, 75))
 # folds advance indexing into basic indexing
 class TestIndexingConstFolding(unittest.TestCase):
   def test_scalar_index(self):
     t = Tensor.arange(16).float().reshape(1,1,4,4).realize()
-    _check_ast_count(0, t[:,:,Tensor(1),:])
-    # NOTE: this is no longer supported because the 1+2 isn't folding early.
-    #_check_ast_count(0, t[:,:,Tensor(1)+2,:])
-    _check_ast_count(0, t[:,:,Tensor(1),Tensor(0)])
+    # TODO: fold these
+    _check_ast_count(2, t[:,:,Tensor(1),:])
+    _check_ast_count(2, t[:,:,Tensor(1)+2,:])
+    _check_ast_count(2, t[:,:,Tensor(1),Tensor(0)])
   @unittest.expectedFailure
   def test_const_tensor_index(self):

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_conv_shapetracker.py RENAMED Viewed

@@ -26,9 +26,8 @@ class TestConvShapetracker(unittest.TestCase):
     print(si)
     ldb = [x for x in si.ast.toposort if x.op is Ops.LOAD][0]
     st: ShapeTracker = ldb.st_arg.simplify()
-    # NOTE: st.real_size() is broken
     print(si.inputs[0].size)
-    #self.assertEqual(si.inputs[0].size, st.real_size())
+    self.assertEqual(si.inputs[0].size, st.real_size())
     for v in st.views: print(v)
     # same st

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_copy_speed.py RENAMED Viewed

@@ -24,7 +24,7 @@ class TestCopySpeed(unittest.TestCase):
     s.unlink()
   def testCopyCPUtoDefault(self):
-    t = Tensor.rand(N, N, device="clang").realize()
+    t = Tensor.ones(N, N, device="CPU").contiguous().realize()
     print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
     for _ in range(3):
       with Timing("sync:  ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
@@ -35,7 +35,7 @@ class TestCopySpeed(unittest.TestCase):
   def testCopyCPUtoDefaultFresh(self):
     print("fresh copy")
     for _ in range(3):
-      t = Tensor.rand(N, N, device="clang").realize()
+      t = Tensor.ones(N, N, device="CPU").contiguous().realize()
       with Timing("sync:  ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"): # noqa: F821
         with Timing("queue: "):
           t.to(Device.DEFAULT).realize()
@@ -43,18 +43,18 @@ class TestCopySpeed(unittest.TestCase):
       del t
   def testCopyDefaulttoCPU(self):
-    t = Tensor.rand(N, N).realize()
+    t = Tensor.ones(N, N).contiguous().realize()
     print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
     for _ in range(3):
       with Timing("sync:  ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
-        t.to('clang').realize()
+        t.to('CPU').realize()
   @unittest.skipIf(CI, "CI doesn't have 6 GPUs")
   @unittest.skipIf(Device.DEFAULT != "GPU", "only test this on GPU")
   def testCopyCPUto6GPUs(self):
     from tinygrad.runtime.ops_gpu import CLDevice
     if len(CLDevice.device_ids) != 6: raise unittest.SkipTest("computer doesn't have 6 GPUs")
-    t = Tensor.rand(N, N, device="clang").realize()
+    t = Tensor.ones(N, N, device="CPU").contiguous().realize()
     print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
     for _ in range(3):
       with Timing("sync:  ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s ({t.nbytes()*6/ns:.2f} GB/s total)"):

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_device_speed.py RENAMED Viewed

@@ -6,7 +6,7 @@ class TestDeviceSpeed(unittest.TestCase):
   @classmethod
   def setUpClass(cls):
     cls.dev = Device[Device.DEFAULT]
-    cls.empty = Device[Device.DEFAULT].renderer.render("test", [])
+    cls.empty = Device[Device.DEFAULT].renderer.render([])
   def test_empty_compile(self):
     with Timing("compiler "):

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_dtype.py RENAMED Viewed

@@ -1,10 +1,10 @@
-import unittest, operator, subprocess, math
+import unittest, operator, subprocess, struct, math
 import numpy as np
 import torch
 from typing import Any, List
 from tinygrad.device import is_dtype_supported
 from tinygrad.helpers import getenv, DEBUG, CI
-from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype, truncate_fp16, to_dtype
+from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype, truncate_fp16, truncate_bf16, to_dtype
 from tinygrad import Device, Tensor, dtypes
 from tinygrad.tensor import _to_np_dtype
 from hypothesis import assume, given, settings, strategies as strat
@@ -327,6 +327,11 @@ class TestPtrDType(unittest.TestCase):
     dt = dtypes.float.vec(4).ptr().vec(4)
     self.assertEqual(dt, eval(str(dt)))
+  def test_vec_ptr_sz(self):
+    dt = dtypes.float.ptr(1024).vec(4)
+    self.assertEqual(dt, eval(str(dt)))
+    self.assertEqual(str(dt), "dtypes.float.ptr(1024).vec(4)")
   def test_vcount(self):
     dt = dtypes.float.ptr().vec(4)
     self.assertEqual(dt.vcount, 4)
@@ -434,6 +439,14 @@ class TestHelpers(unittest.TestCase):
     self.assertEqual(truncate_fp16(65519.999), 65504)
     self.assertEqual(truncate_fp16(65520), math.inf)
+  def test_truncate_bf16(self):
+    self.assertEqual(truncate_bf16(1), 1)
+    self.assertAlmostEqual(truncate_bf16(1.1), 1.09375, places=7)
+    max_bf16 = struct.unpack('f', struct.pack('I', 0x7f7f0000))[0]
+    self.assertEqual(truncate_bf16(max_bf16), max_bf16)
+    self.assertEqual(truncate_bf16(min_bf16:=-max_bf16), min_bf16)
+    self.assertEqual(truncate_bf16(max_bf16 * 1.001), math.inf)
 class TestTypeSpec(unittest.TestCase):
   def setUp(self):
     self.old_default_int, self.old_default_float = dtypes.default_int, dtypes.default_float
@@ -796,7 +809,8 @@ class TestAutoCastType(unittest.TestCase):
     t.reshape(2, 1).expand(2, 10001).max().backward()
     np.testing.assert_allclose(t.grad.numpy(), [1, 0])
-  @unittest.skipIf(Device.DEFAULT=="PYTHON", "very slow")
+  @unittest.skipIf(Device.DEFAULT == "PYTHON", "very slow")
+  @unittest.skipIf(Device.DEFAULT == "WEBGPU", "Binding size is larger than the maximum storage buffer binding size")
   @unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
   def test_mean_half_precision_underflow(self):
     N = 10000
@@ -812,6 +826,7 @@ class TestAutoCastType(unittest.TestCase):
     t.square().mean().backward()
     np.testing.assert_allclose(t.grad.numpy().flatten(), [60000 * 2 / (N*N)] * N*N)
+  @unittest.skipIf(Device.DEFAULT == "WEBGPU", "Precision error")
   @unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
   def test_softmax_dtype(self):
     data = [1, 2, 3]

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_dtype_alu.py RENAMED Viewed

@@ -88,9 +88,8 @@ def universal_test_cast(a, in_dtype, dtype):
   numpy_value = np.array([a], dtype=_to_np_dtype(in_dtype)).astype(_to_np_dtype(dtype))
   np.testing.assert_equal(tensor_value.numpy(), numpy_value)
+@unittest.skipIf(Device.DEFAULT == "WEBGPU", "Inf and nan cases are wrong on WebGPU")
 def universal_test_midcast(a, b, c, op1, op2, d1:DType, d2:DType):
-  # the 'inf' and 'nan' cases are wrong on WEBGPU
-  if (any(map(math.isnan, [a, b, c])) or math.isinf(c)) and Device.DEFAULT == "WEBGPU": return
   if not isinstance(op1, tuple): op1 = (op1, op1)
   if not isinstance(op2, tuple): op2 = (op2, op2)
   at, bt, ct = Tensor([a], dtype=d1), Tensor([b], dtype=d1), Tensor([c], dtype=d2)

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_fuzz_shape_ops.py RENAMED Viewed

@@ -38,7 +38,7 @@ def apply(tor, ten, tor_fn, ten_fn=None):
   except: ten, ok = None, not ok  # noqa: E722
   return tor, ten, ok
-@unittest.skipIf(CI and Device.DEFAULT == "CLANG", "slow")
+@unittest.skipIf(CI and Device.DEFAULT in ("CPU", "NV"), "slow")
 class TestShapeOps(unittest.TestCase):
   @settings.get_profile(__file__)
   @given(st_shape(), st_int32, st.one_of(st_int32, st.lists(st_int32)))

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_jit.py RENAMED Viewed

@@ -22,7 +22,7 @@ def _simple_test(add, extract=lambda x: x, N=10):
 class TestJit(unittest.TestCase):
   @settings(deadline=2e4)
-  @unittest.skipUnless(Device.DEFAULT in ["LLVM", "CLANG"], f"no support on {Device.DEFAULT}")
+  @unittest.skipUnless(Device.DEFAULT in ["LLVM", "CPU"], f"no support on {Device.DEFAULT}")
   @given(strat.sampled_from([Tensor.exp2, Tensor.log2, Tensor.sin]))
   def test_approx_jit_timeout(self, op):
     with Context(TRANSCENDENTAL=2):
@@ -497,8 +497,8 @@ class TestCopyInsideJit(unittest.TestCase):
     @TinyJit
     def add(x,y) -> Tensor: return x.to(Device.DEFAULT)+y
     for _ in range(5):
-      # create a Tensor in CLANG
-      a = Tensor.rand(16,16,device="CLANG").realize()
+      # create a Tensor on CPU
+      a = Tensor.rand(16,16,device="CPU").realize()
       b = Tensor.rand(16,16).realize()
       out = add(a,b)
       np.testing.assert_allclose(out.flatten().tolist(), [x+y for x,y in zip(a.flatten().tolist(), b.flatten().tolist())])
@@ -529,12 +529,12 @@ class TestJitPrune(unittest.TestCase):
     w2_prune = TinyJit(w2, prune=True)
     for _ in range(3):
-      a = Tensor.rand(16, device="CLANG").realize()
+      a = Tensor.rand(16, device="CPU").realize()
       out = w2_noprune(a)
       np.testing.assert_allclose(out.tolist(), [x*2+y for x,y in zip(weights.tolist(), a.tolist())])
     for _ in range(3):
-      a = Tensor.rand(16, device="CLANG").realize()
+      a = Tensor.rand(16, device="CPU").realize()
       out = w2_prune(a)
       np.testing.assert_allclose(out.tolist(), [x*2+y for x,y in zip(weights.tolist(), a.tolist())])

{tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_kernel_cache.py RENAMED Viewed

@@ -5,7 +5,7 @@ from tinygrad import Device
 class TestKernelCache(unittest.TestCase):
   def test_kernel_cache_in_action(self):
-    if Device.DEFAULT not in ["CLANG"]:
+    if Device.DEFAULT not in ["CPU"]:
       self.skipTest("No custom kernel cache is implemented")
     unique_const = 0.6765677269
@@ -16,14 +16,14 @@ class TestKernelCache(unittest.TestCase):
     a1 = Tensor.rand(4,4).realize()
     b1 = Tensor.rand(4,4).realize()
-    orig_compile_func = Device['CLANG'].compiler
-    Device['CLANG'].compiler = None # making it not callable
+    orig_compile_func = Device['CPU'].compiler
+    Device['CPU'].compiler = None # making it not callable
     try:
       x1 = a1 + b1 + unique_const
       x1.realize() # Same kernel should be from cache.
     finally:
-      Device['CLANG'].compiler = orig_compile_func
+      Device['CPU'].compiler = orig_compile_func
 if __name__ == "__main__":
   unittest.main()

tinygrad 0.10.1__tar.gz → 0.10.2__tar.gz

tinygrad 0.10.1tar.gz → 0.10.2tar.gz