tensorcircuit-nightly 1.4.0.dev20251014__tar.gz → 1.4.0.dev20251125__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tensorcircuit-nightly might be problematic. Click here for more details.

Files changed (106) hide show
  1. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/CHANGELOG.md +8 -0
  2. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/PKG-INFO +3 -2
  3. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/README.md +2 -1
  4. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/README_cn.md +2 -2
  5. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/__init__.py +1 -1
  6. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/experimental.py +447 -66
  7. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/interfaces/tensortrans.py +6 -2
  8. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/interfaces/torch.py +14 -4
  9. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit_nightly.egg-info/PKG-INFO +3 -2
  10. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/HISTORY.md +0 -0
  11. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/LICENSE +0 -0
  12. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/MANIFEST.in +0 -0
  13. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/pyproject.toml +0 -0
  14. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/setup.cfg +0 -0
  15. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/setup.py +0 -0
  16. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/about.py +0 -0
  17. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/abstractcircuit.py +0 -0
  18. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/analogcircuit.py +0 -0
  19. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/__init__.py +0 -0
  20. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/ai/__init__.py +0 -0
  21. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/ai/ensemble.py +0 -0
  22. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/dqas.py +0 -0
  23. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/finance/__init__.py +0 -0
  24. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/finance/portfolio.py +0 -0
  25. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/graphdata.py +0 -0
  26. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/layers.py +0 -0
  27. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/optimization.py +0 -0
  28. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/physics/__init__.py +0 -0
  29. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/physics/baseline.py +0 -0
  30. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/physics/fss.py +0 -0
  31. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/utils.py +0 -0
  32. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/vags.py +0 -0
  33. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/van.py +0 -0
  34. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/applications/vqes.py +0 -0
  35. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/asciiart.py +0 -0
  36. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/__init__.py +0 -0
  37. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/abstract_backend.py +0 -0
  38. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/backend_factory.py +0 -0
  39. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/cupy_backend.py +0 -0
  40. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/jax_backend.py +0 -0
  41. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/jax_ops.py +0 -0
  42. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/numpy_backend.py +0 -0
  43. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/pytorch_backend.py +0 -0
  44. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/pytorch_ops.py +0 -0
  45. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/tensorflow_backend.py +0 -0
  46. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/backends/tf_ops.py +0 -0
  47. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/basecircuit.py +0 -0
  48. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/channels.py +0 -0
  49. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/circuit.py +0 -0
  50. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/cloud/__init__.py +0 -0
  51. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/cloud/abstraction.py +0 -0
  52. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/cloud/apis.py +0 -0
  53. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/cloud/config.py +0 -0
  54. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/cloud/local.py +0 -0
  55. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/cloud/quafu_provider.py +0 -0
  56. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/cloud/tencent.py +0 -0
  57. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/cloud/utils.py +0 -0
  58. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/cloud/wrapper.py +0 -0
  59. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/compiler/__init__.py +0 -0
  60. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/compiler/composed_compiler.py +0 -0
  61. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/compiler/qiskit_compiler.py +0 -0
  62. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/compiler/simple_compiler.py +0 -0
  63. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/cons.py +0 -0
  64. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/densitymatrix.py +0 -0
  65. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/fgs.py +0 -0
  66. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/gates.py +0 -0
  67. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/interfaces/__init__.py +0 -0
  68. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/interfaces/jax.py +0 -0
  69. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/interfaces/numpy.py +0 -0
  70. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/interfaces/scipy.py +0 -0
  71. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/interfaces/tensorflow.py +0 -0
  72. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/keras.py +0 -0
  73. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/mps_base.py +0 -0
  74. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/mpscircuit.py +0 -0
  75. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/noisemodel.py +0 -0
  76. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/quantum.py +0 -0
  77. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/quditcircuit.py +0 -0
  78. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/quditgates.py +0 -0
  79. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/results/__init__.py +0 -0
  80. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/results/counts.py +0 -0
  81. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/results/qem/__init__.py +0 -0
  82. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/results/qem/benchmark_circuits.py +0 -0
  83. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/results/qem/qem_methods.py +0 -0
  84. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/results/readout_mitigation.py +0 -0
  85. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/shadows.py +0 -0
  86. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/simplify.py +0 -0
  87. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/stabilizercircuit.py +0 -0
  88. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/templates/__init__.py +0 -0
  89. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/templates/ansatz.py +0 -0
  90. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/templates/blocks.py +0 -0
  91. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/templates/chems.py +0 -0
  92. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/templates/conversions.py +0 -0
  93. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/templates/dataset.py +0 -0
  94. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/templates/graphs.py +0 -0
  95. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/templates/hamiltonians.py +0 -0
  96. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/templates/lattice.py +0 -0
  97. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/templates/measurements.py +0 -0
  98. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/timeevol.py +0 -0
  99. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/torchnn.py +0 -0
  100. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/translation.py +0 -0
  101. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/utils.py +0 -0
  102. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit/vis.py +0 -0
  103. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit_nightly.egg-info/SOURCES.txt +0 -0
  104. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit_nightly.egg-info/dependency_links.txt +0 -0
  105. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit_nightly.egg-info/requires.txt +0 -0
  106. {tensorcircuit_nightly-1.4.0.dev20251014 → tensorcircuit_nightly-1.4.0.dev20251125}/tensorcircuit_nightly.egg-info/top_level.txt +0 -0
@@ -12,6 +12,14 @@
12
12
 
13
13
  - Add `su4` as a generic parameterized two-qubit gates.
14
14
 
15
+ - Add multi controller jax support for distrubuted contraction.
16
+
17
+ ### Fixed
18
+
19
+ - Fix the breaking logic change in jax from dlpack API, dlcapsule -> tensor.
20
+
21
+ - Better torch interface for dlpack translation.
22
+
15
23
  ## v1.4.0
16
24
 
17
25
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tensorcircuit-nightly
3
- Version: 1.4.0.dev20251014
3
+ Version: 1.4.0.dev20251125
4
4
  Summary: High performance unified quantum computing framework for the NISQ era
5
5
  Author-email: TensorCircuit Authors <znfesnpbh@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -70,7 +70,7 @@ TensorCircuit-NG is the only actively maintained official version and a [fully c
70
70
 
71
71
  Please begin with [Quick Start](/docs/source/quickstart.rst) in the [full documentation](https://tensorcircuit-ng.readthedocs.io/).
72
72
 
73
- For more information on software usage, sota algorithm implementation and engineer paradigm demonstration, please refer to 90+ [example scripts](/examples) and 40+ [tutorial notebooks](https://tensorcircuit-ng.readthedocs.io/en/latest/#tutorials). API docstrings and test cases in [tests](/tests) are also informative. One can also refer to AI-native docs for tensorcircuit-ng: [Devin Deepwiki](https://deepwiki.com/tensorcircuit/tensorcircuit-ng) and [Context7 MCP](https://context7.com/tensorcircuit/tensorcircuit-ng).
73
+ For more information on software usage, sota algorithm implementation and engineer paradigm demonstration, please refer to 100+ [example scripts](/examples) and 40+ [tutorial notebooks](https://tensorcircuit-ng.readthedocs.io/en/latest/#tutorials). API docstrings and test cases in [tests](/tests) are also informative. One can also refer to AI-native docs for tensorcircuit-ng: [Devin Deepwiki](https://deepwiki.com/tensorcircuit/tensorcircuit-ng) and [Context7 MCP](https://context7.com/tensorcircuit/tensorcircuit-ng).
74
74
 
75
75
  For beginners, please refer to [quantum computing lectures with TC-NG](https://github.com/sxzgroup/qc_lecture) to learn both quantum computing basics and representative usage of TensorCircuit-NG.
76
76
 
@@ -347,6 +347,7 @@ TensorCircuit-NG is open source, released under the Apache License, Version 2.0.
347
347
  <td align="center" valign="top" width="16.66%"><a href="https://github.com/Charlespkuer"><img src="https://avatars.githubusercontent.com/u/112697147?v=4?s=100" width="100px;" alt="Huang"/><br /><sub><b>Huang</b></sub></a><br /><a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Charlespkuer" title="Code">💻</a> <a href="#example-Charlespkuer" title="Examples">💡</a> <a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Charlespkuer" title="Tests">⚠️</a></td>
348
348
  <td align="center" valign="top" width="16.66%"><a href="https://github.com/Huang-Xu-Yang"><img src="https://avatars.githubusercontent.com/u/227286661?v=4?s=100" width="100px;" alt="Huang-Xu-Yang"/><br /><sub><b>Huang-Xu-Yang</b></sub></a><br /><a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Huang-Xu-Yang" title="Code">💻</a> <a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Huang-Xu-Yang" title="Tests">⚠️</a></td>
349
349
  <td align="center" valign="top" width="16.66%"><a href="https://github.com/WeiguoMa"><img src="https://avatars.githubusercontent.com/u/108172530?v=4?s=100" width="100px;" alt="Weiguo_M"/><br /><sub><b>Weiguo_M</b></sub></a><br /><a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=WeiguoMa" title="Code">💻</a> <a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=WeiguoMa" title="Tests">⚠️</a> <a href="#example-WeiguoMa" title="Examples">💡</a> <a href="#tutorial-WeiguoMa" title="Tutorials">✅</a></td>
350
+ <td align="center" valign="top" width="16.66%"><a href="https://github.com/QuiXamii"><img src="https://avatars.githubusercontent.com/u/136054857?v=4?s=100" width="100px;" alt="Qixiang WANG"/><br /><sub><b>Qixiang WANG</b></sub></a><br /><a href="#example-QuiXamii" title="Examples">💡</a></td>
350
351
  </tr>
351
352
  </tbody>
352
353
  </table>
@@ -35,7 +35,7 @@ TensorCircuit-NG is the only actively maintained official version and a [fully c
35
35
 
36
36
  Please begin with [Quick Start](/docs/source/quickstart.rst) in the [full documentation](https://tensorcircuit-ng.readthedocs.io/).
37
37
 
38
- For more information on software usage, sota algorithm implementation and engineer paradigm demonstration, please refer to 90+ [example scripts](/examples) and 40+ [tutorial notebooks](https://tensorcircuit-ng.readthedocs.io/en/latest/#tutorials). API docstrings and test cases in [tests](/tests) are also informative. One can also refer to AI-native docs for tensorcircuit-ng: [Devin Deepwiki](https://deepwiki.com/tensorcircuit/tensorcircuit-ng) and [Context7 MCP](https://context7.com/tensorcircuit/tensorcircuit-ng).
38
+ For more information on software usage, sota algorithm implementation and engineer paradigm demonstration, please refer to 100+ [example scripts](/examples) and 40+ [tutorial notebooks](https://tensorcircuit-ng.readthedocs.io/en/latest/#tutorials). API docstrings and test cases in [tests](/tests) are also informative. One can also refer to AI-native docs for tensorcircuit-ng: [Devin Deepwiki](https://deepwiki.com/tensorcircuit/tensorcircuit-ng) and [Context7 MCP](https://context7.com/tensorcircuit/tensorcircuit-ng).
39
39
 
40
40
  For beginners, please refer to [quantum computing lectures with TC-NG](https://github.com/sxzgroup/qc_lecture) to learn both quantum computing basics and representative usage of TensorCircuit-NG.
41
41
 
@@ -312,6 +312,7 @@ TensorCircuit-NG is open source, released under the Apache License, Version 2.0.
312
312
  <td align="center" valign="top" width="16.66%"><a href="https://github.com/Charlespkuer"><img src="https://avatars.githubusercontent.com/u/112697147?v=4?s=100" width="100px;" alt="Huang"/><br /><sub><b>Huang</b></sub></a><br /><a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Charlespkuer" title="Code">💻</a> <a href="#example-Charlespkuer" title="Examples">💡</a> <a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Charlespkuer" title="Tests">⚠️</a></td>
313
313
  <td align="center" valign="top" width="16.66%"><a href="https://github.com/Huang-Xu-Yang"><img src="https://avatars.githubusercontent.com/u/227286661?v=4?s=100" width="100px;" alt="Huang-Xu-Yang"/><br /><sub><b>Huang-Xu-Yang</b></sub></a><br /><a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Huang-Xu-Yang" title="Code">💻</a> <a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Huang-Xu-Yang" title="Tests">⚠️</a></td>
314
314
  <td align="center" valign="top" width="16.66%"><a href="https://github.com/WeiguoMa"><img src="https://avatars.githubusercontent.com/u/108172530?v=4?s=100" width="100px;" alt="Weiguo_M"/><br /><sub><b>Weiguo_M</b></sub></a><br /><a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=WeiguoMa" title="Code">💻</a> <a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=WeiguoMa" title="Tests">⚠️</a> <a href="#example-WeiguoMa" title="Examples">💡</a> <a href="#tutorial-WeiguoMa" title="Tutorials">✅</a></td>
315
+ <td align="center" valign="top" width="16.66%"><a href="https://github.com/QuiXamii"><img src="https://avatars.githubusercontent.com/u/136054857?v=4?s=100" width="100px;" alt="Qixiang WANG"/><br /><sub><b>Qixiang WANG</b></sub></a><br /><a href="#example-QuiXamii" title="Examples">💡</a></td>
315
316
  </tr>
316
317
  </tbody>
317
318
  </table>
@@ -21,7 +21,7 @@
21
21
 
22
22
  <p align="center"> <a href="README.md">English</a> | 简体中文 </p>
23
23
 
24
- TensorCircuit-NG 是下一代量子软件框架,完美支持自动微分、即时编译、硬件加速、向量并行化和分布式训练。
24
+ TensorCircuit-NG 是下一代量子软件框架,完美支持自动微分、即时编译、硬件加速、向量并行化和分布式训练,是量超智融合的首选平台。
25
25
 
26
26
  TensorCircuit-NG 建立在现代机器学习框架 Jax, TensorFlow, PyTorch 之上,支持机器学习后端无关的统一界面。 其特别适用于理想情况、含噪声情况、稳定子情况、可控近似情况、连续动力学情况及费米子情况下,大规模量子经典混合范式和变分量子算法的高效模拟。其可以高效地编织和模拟量子线路、张量网络和神经网络组成的混合计算图。
27
27
 
@@ -33,7 +33,7 @@ TensorCircuit-NG 是目前积极维护的唯一官方版本,是 TensorCircuit
33
33
 
34
34
  请从 [完整文档](https://tensorcircuit-ng.readthedocs.io/) 中的 [快速上手](/docs/source/quickstart.rst) 开始。
35
35
 
36
- 有关软件用法,算法实现和工程范式演示的更多信息和介绍,请参阅 90+ [示例脚本](/examples) 和 40+ [案例教程](https://tensorcircuit-ng.readthedocs.io/en/latest/#tutorials)。 [测试](/tests) 用例和 API docstring 也提供了丰富的使用信息。
36
+ 有关软件用法,算法实现和工程范式演示的更多信息和介绍,请参阅 100+ [示例脚本](/examples) 和 40+ [案例教程](https://tensorcircuit-ng.readthedocs.io/en/latest/#tutorials)。 [测试](/tests) 用例和 API docstring 也提供了丰富的使用信息。
37
37
 
38
38
  TensorCircuit-NG 也支持 AI 原生编程资源:[Devin Deepwiki](https://deepwiki.com/tensorcircuit/tensorcircuit-ng) 和 [Context7 MCP](https://context7.com/tensorcircuit/tensorcircuit-ng).
39
39
 
@@ -1,4 +1,4 @@
1
- __version__ = "1.4.0.dev20251014"
1
+ __version__ = "1.4.0.dev20251125"
2
2
  __author__ = "TensorCircuit Authors"
3
3
  __creator__ = "refraction-ray"
4
4
 
@@ -7,6 +7,10 @@ Experimental features
7
7
  from functools import partial
8
8
  import logging
9
9
  from typing import Any, Callable, Dict, Optional, Tuple, List, Sequence, Union
10
+ import pickle
11
+ import uuid
12
+ import time
13
+ import os
10
14
 
11
15
  import numpy as np
12
16
 
@@ -489,6 +493,229 @@ jax_func_load = jax_jitted_function_load
489
493
  PADDING_VALUE = -1
490
494
  jaxlib: Any
491
495
  ctg: Any
496
+ Mesh: Any
497
+ NamedSharding: Any
498
+ P: Any
499
+
500
+
501
+ def broadcast_py_object(obj: Any, shared_dir: Optional[str] = None) -> Any:
502
+ """
503
+ Broadcast a picklable Python object from process 0 to all other processes,
504
+ with fallback mechanism from gRPC to file system based approach.
505
+
506
+ This function first attempts to use gRPC-based broadcast. If that fails due to
507
+ pickling issues, it falls back to a file system based approach that is more robust.
508
+
509
+ :param obj: The Python object to broadcast. It must be picklable.
510
+ This object should exist on process 0 and can be None on others.
511
+ :type obj: Any
512
+ :param shared_dir: Directory path for shared file system broadcast fallback.
513
+ If None, uses current directory. Only used in fallback mode.
514
+ :type shared_dir: Optional[str], optional
515
+ :return: The broadcasted object, now present on all processes.
516
+ :rtype: Any
517
+ """
518
+ import jax
519
+ from jax.experimental import multihost_utils
520
+
521
+ try:
522
+ result = broadcast_py_object_jax(obj)
523
+ return result
524
+
525
+ except pickle.UnpicklingError as e:
526
+ # This block is executed if any process fails during the gRPC attempt.
527
+
528
+ multihost_utils.sync_global_devices("grpc_broadcast_failed_fallback_sync")
529
+
530
+ if jax.process_index() == 0:
531
+ border = "=" * 80
532
+ logger.warning(
533
+ "\n%s\nJAX gRPC broadcast failed with error: %s\n"
534
+ "--> Falling back to robust Shared File System broadcast method.\n%s",
535
+ border,
536
+ e,
537
+ border,
538
+ )
539
+
540
+ return broadcast_py_object_fs(obj, shared_dir)
541
+
542
+
543
+ def broadcast_py_object_jax(obj: Any) -> Any:
544
+ """
545
+ Broadcast a picklable Python object from process 0 to all other processes
546
+ within jax ditribution system.
547
+
548
+ This function uses a two-step broadcast: first the size, then the data.
549
+ This is necessary because `broadcast_one_to_all` requires the same
550
+ shaped array on all hosts.
551
+
552
+ :param obj: The Python object to broadcast. It must be picklable.
553
+ This object should exist on process 0 and can be None on others.
554
+
555
+ :return: The broadcasted object, now present on all processes.
556
+ """
557
+ import jax as jaxlib
558
+ import pickle
559
+ from jax.experimental import multihost_utils
560
+
561
+ # Serialize to bytes on process 0, empty bytes on others
562
+ if jaxlib.process_index() == 0:
563
+ if obj is None:
564
+ raise ValueError("Object to broadcast from process 0 cannot be None.")
565
+ data = pickle.dumps(obj)
566
+ logger.info(
567
+ f"--- Size of object to be broadcast: {len(data) / 1024**2:.3f} MB ---"
568
+ )
569
+
570
+ else:
571
+ data = b""
572
+
573
+ # Step 1: Broadcast the length of the serialized data.
574
+ # We send a single-element int32 array.
575
+ length = np.array([len(data)], dtype=np.int32)
576
+ length = multihost_utils.broadcast_one_to_all(length)
577
+
578
+ length = int(length[0]) # type: ignore
579
+
580
+ # Step 2: Broadcast the actual data.
581
+ # Convert byte string to a uint8 array for broadcasting.
582
+ send_arr_uint8 = np.frombuffer(data, dtype=np.uint8)
583
+ padded_length = (length + 3) // 4 * 4
584
+ if send_arr_uint8.size < padded_length:
585
+ send_arr_uint8 = np.pad( # type: ignore
586
+ send_arr_uint8, (0, padded_length - send_arr_uint8.size), mode="constant"
587
+ )
588
+ send_arr_int32 = send_arr_uint8.astype(np.int32)
589
+ # send_arr_int32 = jaxlib.numpy.array(send_arr_int32, dtype=np.int32)
590
+ send_arr_int32 = jaxlib.device_put(send_arr_int32)
591
+
592
+ jaxlib.experimental.multihost_utils.sync_global_devices("bulk_before")
593
+
594
+ received_arr = multihost_utils.broadcast_one_to_all(send_arr_int32)
595
+
596
+ received_arr = np.array(received_arr)
597
+ received_arr_uint8 = received_arr.astype(np.uint8)
598
+
599
+ # Step 3: Reconstruct the object from the received bytes.
600
+ # Convert the NumPy array back to bytes, truncate any padding, and unpickle.
601
+ received_data = received_arr_uint8[:length].tobytes()
602
+ # if jaxlib.process_index() == 0:
603
+ # logger.info(f"Broadcasted object {obj}")
604
+ return pickle.loads(received_data)
605
+
606
+
607
+ def broadcast_py_object_fs(
608
+ obj: Any, shared_dir: Optional[str] = None, timeout_seconds: int = 300
609
+ ) -> Any:
610
+ """
611
+ Broadcast a picklable Python object from process 0 to all other processes
612
+ using a shared file system approach.
613
+
614
+ This is a fallback method when gRPC-based broadcast fails. It uses UUID-based
615
+ file communication to share objects between processes through a shared file system.
616
+
617
+ :param obj: The Python object to broadcast. Must be picklable.
618
+ Should exist on process 0, can be None on others.
619
+ :type obj: Any
620
+ :param shared_dir: Directory path for shared file system communication.
621
+ If None, uses current directory.
622
+ :type shared_dir: Optional[str], optional
623
+ :param timeout_seconds: Maximum time to wait for file operations before timing out.
624
+ Defaults to 300 seconds.
625
+ :type timeout_seconds: int, optional
626
+ :return: The broadcasted object, now present on all processes.
627
+ :rtype: Any
628
+ """
629
+ # to_avoid very subtle bugs for broadcast tree_data on A800 clusters
630
+ import jax
631
+ from jax.experimental import multihost_utils
632
+
633
+ if shared_dir is None:
634
+ shared_dir = "."
635
+ if jax.process_index() == 0:
636
+ os.makedirs(shared_dir, exist_ok=True)
637
+
638
+ id_comm_path = os.path.join(shared_dir, f".broadcast_temp_12318")
639
+ transfer_id = ""
640
+
641
+ if jax.process_index() == 0:
642
+ transfer_id = str(uuid.uuid4())
643
+ # print(f"[Process 0] Generated unique transfer ID: {transfer_id}", flush=True)
644
+ with open(id_comm_path, "w") as f:
645
+ f.write(transfer_id)
646
+
647
+ multihost_utils.sync_global_devices("fs_broadcast_id_written")
648
+
649
+ if jax.process_index() != 0:
650
+ start_time = time.time()
651
+ while not os.path.exists(id_comm_path):
652
+ time.sleep(0.1)
653
+ if time.time() - start_time > timeout_seconds:
654
+ raise TimeoutError(
655
+ f"Process {jax.process_index()} timed out waiting for ID file: {id_comm_path}"
656
+ )
657
+ with open(id_comm_path, "r") as f:
658
+ transfer_id = f.read()
659
+
660
+ multihost_utils.sync_global_devices("fs_broadcast_id_read")
661
+ if jax.process_index() == 0:
662
+ try:
663
+ os.remove(id_comm_path)
664
+ except OSError:
665
+ pass # 如果文件已被其他进程快速清理,忽略错误
666
+
667
+ # 定义本次传输使用的数据文件和标志文件路径
668
+ data_path = os.path.join(shared_dir, f"{transfer_id}.data")
669
+ done_path = os.path.join(shared_dir, f"{transfer_id}.done")
670
+
671
+ result_obj = None
672
+
673
+ if jax.process_index() == 0:
674
+ if obj is None:
675
+ raise ValueError("None cannot be broadcasted.")
676
+
677
+ # print(f"[Process 0] Pickling object...", flush=True)
678
+ pickled_data = pickle.dumps(obj)
679
+ logger.info(
680
+ f"[Process 0] Writing {len(pickled_data) / 1024**2:.3f} MB to {data_path}"
681
+ )
682
+ with open(data_path, "wb") as f:
683
+ f.write(pickled_data)
684
+
685
+ with open(done_path, "w") as f:
686
+ pass
687
+ logger.info(f"[Process 0] Write complete.")
688
+ result_obj = obj
689
+ else:
690
+ # print(f"[Process {jax.process_index()}] Waiting for done file: {done_path}", flush=True)
691
+ start_time = time.time()
692
+ while not os.path.exists(done_path):
693
+ time.sleep(0.1)
694
+ if time.time() - start_time > timeout_seconds:
695
+ raise TimeoutError(
696
+ f"Process {jax.process_index()} timed out waiting for done file: {done_path}"
697
+ )
698
+
699
+ # print(f"[Process {jax.process_index()}] Done file found. Reading data from {data_path}", flush=True)
700
+ with open(data_path, "rb") as f:
701
+ pickled_data = f.read()
702
+
703
+ result_obj = pickle.loads(pickled_data)
704
+ logger.info(f"[Process {jax.process_index()}] Object successfully loaded.")
705
+
706
+ multihost_utils.sync_global_devices("fs_broadcast_read_complete")
707
+
708
+ if jax.process_index() == 0:
709
+ try:
710
+ os.remove(data_path)
711
+ os.remove(done_path)
712
+ # print(f"[Process 0] Cleaned up temporary files for transfer {transfer_id}.", flush=True)
713
+ except OSError as e:
714
+ logger.info(
715
+ f"[Process 0]: Failed to clean up temporary files: {e}",
716
+ )
717
+
718
+ return result_obj
492
719
 
493
720
 
494
721
  class DistributedContractor:
@@ -513,8 +740,10 @@ class DistributedContractor:
513
740
  :type params: Tensor
514
741
  :param cotengra_options: Configuration options passed to the cotengra optimizer. Defaults to None
515
742
  :type cotengra_options: Optional[Dict[str, Any]], optional
516
- :param devices: List of devices to use. If None, uses all available local devices
743
+ :param devices: List of devices to use. If None, uses all available devices
517
744
  :type devices: Optional[List[Any]], optional
745
+ :param mesh: Mesh object to use for distributed computation. If None, uses all available devices
746
+ :type mesh: Optional[Any], optional
518
747
  """
519
748
 
520
749
  def __init__(
@@ -522,23 +751,30 @@ class DistributedContractor:
522
751
  nodes_fn: Callable[[Tensor], List[Gate]],
523
752
  params: Tensor,
524
753
  cotengra_options: Optional[Dict[str, Any]] = None,
525
- devices: Optional[List[Any]] = None,
754
+ devices: Optional[List[Any]] = None, # backward compatibility
755
+ mesh: Optional[Any] = None,
756
+ tree_data: Optional[Dict[str, Any]] = None,
526
757
  ) -> None:
527
758
  global jaxlib
528
759
  global ctg
760
+ global Mesh
761
+ global NamedSharding
762
+ global P
529
763
 
530
764
  logger.info("Initializing DistributedContractor...")
531
765
  import cotengra as ctg
766
+ from cotengra import ContractionTree
532
767
  import jax as jaxlib
768
+ from jax.sharding import Mesh, NamedSharding, PartitionSpec as P
533
769
 
534
770
  self.nodes_fn = nodes_fn
535
- if devices is None:
536
- self.num_devices = jaxlib.local_device_count()
537
- self.devices = jaxlib.local_devices()
538
- # TODO(@refraction-ray): multi host support
771
+ if mesh is not None:
772
+ self.mesh = mesh
773
+ elif devices is not None:
774
+ self.mesh = Mesh(devices, axis_names=("devices",))
539
775
  else:
540
- self.devices = devices
541
- self.num_devices = len(devices)
776
+ self.mesh = Mesh(jaxlib.devices(), axis_names=("devices",))
777
+ self.num_devices = len(self.mesh.devices)
542
778
 
543
779
  if self.num_devices <= 1:
544
780
  logger.info("DistributedContractor is running on a single device.")
@@ -555,41 +791,68 @@ class DistributedContractor:
555
791
  ] = {}
556
792
 
557
793
  logger.info("Running cotengra pathfinder... (This may take a while)")
558
- nodes = self.nodes_fn(self._params_template)
559
- tn_info, _ = get_tn_info(nodes)
560
- default_cotengra_options = {
561
- "slicing_reconf_opts": {"target_size": 2**28},
562
- "max_repeats": 128,
563
- "progbar": True,
564
- "minimize": "write",
565
- "parallel": "auto",
566
- }
567
- if cotengra_options:
568
- default_cotengra_options = cotengra_options
794
+ if tree_data is None:
795
+ if params is None:
796
+ raise ValueError("Please provide specific circuit parameters array.")
797
+ if jaxlib.process_index() == 0:
798
+ logger.info("Process 0: Running cotengra pathfinder...")
799
+ tree_data = self._get_tree_data(
800
+ self.nodes_fn, self._params_template, cotengra_options # type: ignore
801
+ )
569
802
 
570
- opt = ctg.ReusableHyperOptimizer(**default_cotengra_options)
571
- self.tree = opt.search(*tn_info)
572
- actual_num_slices = self.tree.nslices
803
+ # Step 2: Use the robust helper function to broadcast the tree object.
804
+ # Process 0 sends its computed `tree_object`.
805
+ # Other processes send `None`, but receive the object from process 0.
573
806
 
574
- print("\n--- Contraction Path Info ---")
575
- stats = self.tree.contract_stats()
576
- print(f"Path found with {actual_num_slices} slices.")
577
- print(
578
- f"Arithmetic Intensity (higher is better): {self.tree.arithmetic_intensity():.2f}"
807
+ if jaxlib.process_count() > 1:
808
+ # self.tree = broadcast_py_object(tree_object)
809
+ jaxlib.experimental.multihost_utils.sync_global_devices("tree_before")
810
+ logger.info(
811
+ f"Process {jaxlib.process_index()}: Synchronizing contraction path..."
812
+ )
813
+ tree_data = broadcast_py_object(tree_data)
814
+ jaxlib.experimental.multihost_utils.sync_global_devices("tree_after")
815
+ else:
816
+ logger.info("Using pre-computed contraction path.")
817
+ if tree_data is None:
818
+ raise ValueError("Contraction path data is missing.")
819
+
820
+ self.tree = ContractionTree.from_path(
821
+ inputs=tree_data["inputs"],
822
+ output=tree_data["output"],
823
+ size_dict=tree_data["size_dict"],
824
+ path=tree_data["path"],
579
825
  )
580
- print("flops (TFlops):", stats["flops"] / 2**40 / self.num_devices)
581
- print("write (GB):", stats["write"] / 2**27 / actual_num_slices)
582
- print("size (GB):", stats["size"] / 2**27)
583
- print("-----------------------------\n")
826
+
827
+ # Restore slicing information
828
+ for ind, _ in tree_data["sliced_inds"].items():
829
+ self.tree.remove_ind_(ind)
830
+
831
+ logger.info(
832
+ f"Process {jaxlib.process_index()}: Contraction path successfully synchronized."
833
+ )
834
+ actual_num_slices = self.tree.nslices
835
+
836
+ self._report_tree_info()
584
837
 
585
838
  slices_per_device = int(np.ceil(actual_num_slices / self.num_devices))
586
839
  padded_size = slices_per_device * self.num_devices
587
840
  slice_indices = np.arange(actual_num_slices)
588
841
  padded_slice_indices = np.full(padded_size, PADDING_VALUE, dtype=np.int32)
589
842
  padded_slice_indices[:actual_num_slices] = slice_indices
590
- self.batched_slice_indices = backend.convert_to_tensor(
591
- padded_slice_indices.reshape(self.num_devices, slices_per_device)
843
+
844
+ # Reshape for distribution and define the sharding rule
845
+ batched_indices = padded_slice_indices.reshape(
846
+ self.num_devices, slices_per_device
592
847
  )
848
+ # Sharding rule: split the first axis (the one for devices) across the 'devices' mesh axis
849
+ self.sharding = NamedSharding(self.mesh, P("devices", None))
850
+ # Place the tensor on devices according to the rule
851
+ self.batched_slice_indices = jaxlib.device_put(batched_indices, self.sharding)
852
+
853
+ # self.batched_slice_indices = backend.convert_to_tensor(
854
+ # padded_slice_indices.reshape(self.num_devices, slices_per_device)
855
+ # )
593
856
  print(
594
857
  f"Distributing across {self.num_devices} devices. Each device will sequentially process "
595
858
  f"up to {slices_per_device} slices."
@@ -600,6 +863,89 @@ class DistributedContractor:
600
863
 
601
864
  logger.info("Initialization complete.")
602
865
 
866
+ def _report_tree_info(self) -> None:
867
+ print("\n--- Contraction Path Info ---")
868
+ actual_num_slices = self.tree.nslices
869
+ stats = self.tree.contract_stats()
870
+ print(f"Path found with {actual_num_slices} slices.")
871
+ print(
872
+ f"Arithmetic Intensity (higher is better): {self.tree.arithmetic_intensity():.2f}"
873
+ )
874
+ print("flops (TFlops):", stats["flops"] / 2**40 / self.num_devices)
875
+ print("write (GB):", stats["write"] / 2**27 / actual_num_slices)
876
+ print("size (GB):", stats["size"] / 2**27)
877
+ print("-----------------------------\n")
878
+
879
+ @staticmethod
880
+ def _get_tree_data(
881
+ nodes_fn: Callable[[Tensor], List[Gate]],
882
+ params: Tensor,
883
+ cotengra_options: Optional[Dict[str, Any]] = None,
884
+ ) -> Dict[str, Any]:
885
+ global ctg
886
+
887
+ import cotengra as ctg
888
+
889
+ local_cotengra_options = (cotengra_options or {}).copy()
890
+
891
+ nodes = nodes_fn(params)
892
+ tn_info, _ = get_tn_info(nodes)
893
+ default_cotengra_options = {
894
+ "slicing_reconf_opts": {"target_size": 2**28},
895
+ "max_repeats": 128,
896
+ "minimize": "write",
897
+ "parallel": "auto",
898
+ "progbar": True,
899
+ }
900
+ default_cotengra_options.update(local_cotengra_options)
901
+
902
+ opt = ctg.ReusableHyperOptimizer(**default_cotengra_options)
903
+ tree_object = opt.search(*tn_info)
904
+ tree_data = {
905
+ "inputs": tree_object.inputs,
906
+ "output": tree_object.output,
907
+ "size_dict": tree_object.size_dict,
908
+ "path": tree_object.get_path(),
909
+ "sliced_inds": tree_object.sliced_inds,
910
+ }
911
+ return tree_data
912
+
913
+ @staticmethod
914
+ def find_path(
915
+ nodes_fn: Callable[[Tensor], Tensor],
916
+ params: Tensor,
917
+ cotengra_options: Optional[Dict[str, Any]] = None,
918
+ filepath: Optional[str] = None,
919
+ ) -> None:
920
+ tree_data = DistributedContractor._get_tree_data(
921
+ nodes_fn, params, cotengra_options
922
+ )
923
+ if filepath is not None:
924
+ with open(filepath, "wb") as f:
925
+ pickle.dump(tree_data, f)
926
+ logger.info(f"Contraction path data successfully saved to '{filepath}'.")
927
+
928
+ @classmethod
929
+ def from_path(
930
+ cls,
931
+ filepath: str,
932
+ nodes_fn: Callable[[Tensor], List[Gate]],
933
+ devices: Optional[List[Any]] = None, # backward compatibility
934
+ mesh: Optional[Any] = None,
935
+ ) -> "DistributedContractor":
936
+ with open(filepath, "rb") as f:
937
+ tree_data = pickle.load(f)
938
+
939
+ # Each process loads the file independently. No broadcast is needed.
940
+ # We pass the loaded `tree_data` directly to __init__ to trigger the second workflow.
941
+ return cls(
942
+ nodes_fn=nodes_fn,
943
+ params=None,
944
+ mesh=mesh,
945
+ devices=devices,
946
+ tree_data=tree_data,
947
+ )
948
+
603
949
  def _get_single_slice_contraction_fn(
604
950
  self, op: Optional[Callable[[Tensor], Tensor]] = None
605
951
  ) -> Callable[[Any, Tensor, int], Tensor]:
@@ -716,6 +1062,7 @@ class DistributedContractor:
716
1062
  fn_getter: Callable[..., Any],
717
1063
  op: Optional[Callable[[Tensor], Tensor]],
718
1064
  output_dtype: Optional[str],
1065
+ is_grad_fn: bool,
719
1066
  ) -> Callable[[Any, Tensor, Tensor], Tensor]:
720
1067
  """
721
1068
  Gets a compiled pmap-ed function from cache or compiles and caches it.
@@ -728,15 +1075,64 @@ class DistributedContractor:
728
1075
  cache_key = (op, output_dtype)
729
1076
  if cache_key not in cache:
730
1077
  device_fn = fn_getter(op=op, output_dtype=output_dtype)
731
- compiled_fn = jaxlib.pmap(
732
- device_fn,
733
- in_axes=(
734
- None,
735
- None,
736
- 0,
737
- ), # tree: broadcast, params: broadcast, indices: map
738
- static_broadcasted_argnums=(0,), # arg 0 (tree) is a static argument
739
- devices=self.devices,
1078
+
1079
+ def global_aggregated_fn(
1080
+ tree: Any, params: Any, batched_slice_indices: Tensor
1081
+ ) -> Any:
1082
+ # Use jax.vmap to apply the per-device function across the sharded data.
1083
+ # vmap maps `device_fn` over the first axis (0) of `batched_slice_indices`.
1084
+ # `tree` and `params` are broadcasted (in_axes=None) to each call.
1085
+ vmapped_device_fn = jaxlib.vmap(
1086
+ device_fn, in_axes=(None, None, 0), out_axes=0
1087
+ )
1088
+ device_results = vmapped_device_fn(tree, params, batched_slice_indices)
1089
+
1090
+ # Now, `device_results` is a sharded PyTree (one result per device).
1091
+ # We aggregate them using jnp.sum, which JAX automatically compiles
1092
+ # into a cross-device AllReduce operation.
1093
+
1094
+ if is_grad_fn:
1095
+ # `device_results` is a (value, grad) tuple of sharded arrays
1096
+ device_values, device_grads = device_results
1097
+
1098
+ # Replace psum with jnp.sum
1099
+ global_value = jaxlib.numpy.sum(device_values, axis=0)
1100
+ global_grad = jaxlib.tree_util.tree_map(
1101
+ lambda g: jaxlib.numpy.sum(g, axis=0), device_grads
1102
+ )
1103
+ return global_value, global_grad
1104
+ else:
1105
+ # `device_results` is just the sharded values
1106
+ return jaxlib.numpy.sum(device_results, axis=0)
1107
+
1108
+ # Compile the global function with jax.jit and specify shardings.
1109
+ # `params` are replicated (available everywhere).
1110
+ params_sharding = jaxlib.tree_util.tree_map(
1111
+ lambda x: NamedSharding(self.mesh, P(*((None,) * x.ndim))),
1112
+ self._params_template,
1113
+ )
1114
+
1115
+ in_shardings = (params_sharding, self.sharding)
1116
+
1117
+ if is_grad_fn:
1118
+ # Returns (value, grad), so out_sharding must be a 2-tuple.
1119
+ # `value` is a replicated scalar -> P()
1120
+ sharding_for_value = NamedSharding(self.mesh, P())
1121
+ # `grad` is a replicated PyTree with the same structure as params.
1122
+ sharding_for_grad = params_sharding
1123
+ out_shardings = (sharding_for_value, sharding_for_grad)
1124
+ else:
1125
+ # Returns a single scalar value -> P()
1126
+ out_shardings = NamedSharding(self.mesh, P())
1127
+
1128
+ compiled_fn = jaxlib.jit(
1129
+ global_aggregated_fn,
1130
+ # `tree` is a static argument, its value is compiled into the function.
1131
+ static_argnums=(0,),
1132
+ # Specify how inputs are sharded.
1133
+ in_shardings=in_shardings,
1134
+ # Specify how the output should be sharded.
1135
+ out_shardings=out_shardings,
740
1136
  )
741
1137
  cache[cache_key] = compiled_fn # type: ignore
742
1138
  return cache[cache_key] # type: ignore
@@ -744,7 +1140,7 @@ class DistributedContractor:
744
1140
  def value_and_grad(
745
1141
  self,
746
1142
  params: Tensor,
747
- aggregate: bool = True,
1143
+ # aggregate: bool = True,
748
1144
  op: Optional[Callable[[Tensor], Tensor]] = None,
749
1145
  output_dtype: Optional[str] = None,
750
1146
  ) -> Tuple[Tensor, Tensor]:
@@ -753,8 +1149,6 @@ class DistributedContractor:
753
1149
 
754
1150
  :param params: Parameters for the `nodes_fn` input
755
1151
  :type params: Tensor
756
- :param aggregate: Whether to aggregate (sum) the results across devices, defaults to True
757
- :type aggregate: bool, optional
758
1152
  :param op: Optional post-processing function for the output, defaults to None (corresponding to `backend.real`)
759
1153
  op is a cache key, so dont directly pass lambda function for op
760
1154
  :type op: Optional[Callable[[Tensor], Tensor]], optional
@@ -766,24 +1160,18 @@ class DistributedContractor:
766
1160
  fn_getter=self._get_device_sum_vg_fn,
767
1161
  op=op,
768
1162
  output_dtype=output_dtype,
1163
+ is_grad_fn=True,
769
1164
  )
770
1165
 
771
- device_values, device_grads = compiled_vg_fn(
1166
+ total_value, total_grad = compiled_vg_fn(
772
1167
  self.tree, params, self.batched_slice_indices
773
1168
  )
774
-
775
- if aggregate:
776
- total_value = backend.sum(device_values)
777
- total_grad = jaxlib.tree_util.tree_map(
778
- lambda x: backend.sum(x, axis=0), device_grads
779
- )
780
- return total_value, total_grad
781
- return device_values, device_grads
1169
+ return total_value, total_grad
782
1170
 
783
1171
  def value(
784
1172
  self,
785
1173
  params: Tensor,
786
- aggregate: bool = True,
1174
+ # aggregate: bool = True,
787
1175
  op: Optional[Callable[[Tensor], Tensor]] = None,
788
1176
  output_dtype: Optional[str] = None,
789
1177
  ) -> Tensor:
@@ -792,8 +1180,6 @@ class DistributedContractor:
792
1180
 
793
1181
  :param params: Parameters for the `nodes_fn` input
794
1182
  :type params: Tensor
795
- :param aggregate: Whether to aggregate (sum) the results across devices, defaults to True
796
- :type aggregate: bool, optional
797
1183
  :param op: Optional post-processing function for the output, defaults to None (corresponding to identity)
798
1184
  op is a cache key, so dont directly pass lambda function for op
799
1185
  :type op: Optional[Callable[[Tensor], Tensor]], optional
@@ -805,22 +1191,17 @@ class DistributedContractor:
805
1191
  fn_getter=self._get_device_sum_v_fn,
806
1192
  op=op,
807
1193
  output_dtype=output_dtype,
1194
+ is_grad_fn=False,
808
1195
  )
809
1196
 
810
- device_values = compiled_v_fn(self.tree, params, self.batched_slice_indices)
811
-
812
- if aggregate:
813
- return backend.sum(device_values)
814
- return device_values
1197
+ total_value = compiled_v_fn(self.tree, params, self.batched_slice_indices)
1198
+ return total_value
815
1199
 
816
1200
  def grad(
817
1201
  self,
818
1202
  params: Tensor,
819
- aggregate: bool = True,
820
1203
  op: Optional[Callable[[Tensor], Tensor]] = None,
821
1204
  output_dtype: Optional[str] = None,
822
1205
  ) -> Tensor:
823
- _, grad = self.value_and_grad(
824
- params, aggregate=aggregate, op=op, output_dtype=output_dtype
825
- )
1206
+ _, grad = self.value_and_grad(params, op=op, output_dtype=output_dtype)
826
1207
  return grad
@@ -132,13 +132,17 @@ def general_args_to_backend(
132
132
  target_backend = backend
133
133
  elif isinstance(target_backend, str):
134
134
  target_backend = get_backend(target_backend)
135
+ try:
136
+ t = backend.tree_map(target_backend.from_dlpack, caps)
137
+ except TypeError:
138
+ t = backend.tree_map(target_backend.from_dlpack, args)
139
+
135
140
  if dtype is None:
136
- return backend.tree_map(target_backend.from_dlpack, caps)
141
+ return t
137
142
  if isinstance(dtype, str):
138
143
  leaves, treedef = backend.tree_flatten(args)
139
144
  dtype = [dtype for _ in range(len(leaves))]
140
145
  dtype = backend.tree_unflatten(treedef, dtype)
141
- t = backend.tree_map(target_backend.from_dlpack, caps)
142
146
  t = backend.tree_map(target_backend.cast, t, dtype)
143
147
  return t
144
148
 
@@ -69,12 +69,14 @@ def torch_interface(
69
69
  @staticmethod
70
70
  def forward(ctx: Any, *x: Any) -> Any: # type: ignore
71
71
  # ctx.xdtype = [xi.dtype for xi in x]
72
- ctx.xdtype = backend.tree_map(lambda s: s.dtype, x)
72
+ ctx.save_for_backward(*x)
73
+ x_detached = backend.tree_map(lambda s: s.detach(), x)
74
+ ctx.xdtype = backend.tree_map(lambda s: s.dtype, x_detached)
73
75
  # (x, )
74
76
  if len(ctx.xdtype) == 1:
75
77
  ctx.xdtype = ctx.xdtype[0]
76
- ctx.device = (backend.tree_flatten(x)[0][0]).device
77
- x = general_args_to_backend(x, enable_dlpack=enable_dlpack)
78
+ ctx.device = (backend.tree_flatten(x_detached)[0][0]).device
79
+ x = general_args_to_backend(x_detached, enable_dlpack=enable_dlpack)
78
80
  y = fun(*x)
79
81
  ctx.ydtype = backend.tree_map(lambda s: s.dtype, y)
80
82
  if len(x) == 1:
@@ -88,6 +90,9 @@ def torch_interface(
88
90
 
89
91
  @staticmethod
90
92
  def backward(ctx: Any, *grad_y: Any) -> Any:
93
+ x = ctx.saved_tensors
94
+ x_detached = backend.tree_map(lambda s: s.detach(), x)
95
+ x_backend = general_args_to_backend(x_detached, enable_dlpack=enable_dlpack)
91
96
  if len(grad_y) == 1:
92
97
  grad_y = grad_y[0]
93
98
  grad_y = backend.tree_map(lambda s: s.contiguous(), grad_y)
@@ -96,7 +101,12 @@ def torch_interface(
96
101
  )
97
102
  # grad_y = general_args_to_numpy(grad_y)
98
103
  # grad_y = numpy_args_to_backend(grad_y, dtype=ctx.ydtype) # backend.dtype
99
- _, g = vjp_fun(ctx.x, grad_y)
104
+ if len(x_backend) == 1:
105
+ x_backend_for_vjp = x_backend[0]
106
+ else:
107
+ x_backend_for_vjp = x_backend
108
+
109
+ _, g = vjp_fun(x_backend_for_vjp, grad_y)
100
110
  # a redundency due to current vjp API
101
111
 
102
112
  r = general_args_to_backend(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tensorcircuit-nightly
3
- Version: 1.4.0.dev20251014
3
+ Version: 1.4.0.dev20251125
4
4
  Summary: High performance unified quantum computing framework for the NISQ era
5
5
  Author-email: TensorCircuit Authors <znfesnpbh@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -70,7 +70,7 @@ TensorCircuit-NG is the only actively maintained official version and a [fully c
70
70
 
71
71
  Please begin with [Quick Start](/docs/source/quickstart.rst) in the [full documentation](https://tensorcircuit-ng.readthedocs.io/).
72
72
 
73
- For more information on software usage, sota algorithm implementation and engineer paradigm demonstration, please refer to 90+ [example scripts](/examples) and 40+ [tutorial notebooks](https://tensorcircuit-ng.readthedocs.io/en/latest/#tutorials). API docstrings and test cases in [tests](/tests) are also informative. One can also refer to AI-native docs for tensorcircuit-ng: [Devin Deepwiki](https://deepwiki.com/tensorcircuit/tensorcircuit-ng) and [Context7 MCP](https://context7.com/tensorcircuit/tensorcircuit-ng).
73
+ For more information on software usage, sota algorithm implementation and engineer paradigm demonstration, please refer to 100+ [example scripts](/examples) and 40+ [tutorial notebooks](https://tensorcircuit-ng.readthedocs.io/en/latest/#tutorials). API docstrings and test cases in [tests](/tests) are also informative. One can also refer to AI-native docs for tensorcircuit-ng: [Devin Deepwiki](https://deepwiki.com/tensorcircuit/tensorcircuit-ng) and [Context7 MCP](https://context7.com/tensorcircuit/tensorcircuit-ng).
74
74
 
75
75
  For beginners, please refer to [quantum computing lectures with TC-NG](https://github.com/sxzgroup/qc_lecture) to learn both quantum computing basics and representative usage of TensorCircuit-NG.
76
76
 
@@ -347,6 +347,7 @@ TensorCircuit-NG is open source, released under the Apache License, Version 2.0.
347
347
  <td align="center" valign="top" width="16.66%"><a href="https://github.com/Charlespkuer"><img src="https://avatars.githubusercontent.com/u/112697147?v=4?s=100" width="100px;" alt="Huang"/><br /><sub><b>Huang</b></sub></a><br /><a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Charlespkuer" title="Code">💻</a> <a href="#example-Charlespkuer" title="Examples">💡</a> <a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Charlespkuer" title="Tests">⚠️</a></td>
348
348
  <td align="center" valign="top" width="16.66%"><a href="https://github.com/Huang-Xu-Yang"><img src="https://avatars.githubusercontent.com/u/227286661?v=4?s=100" width="100px;" alt="Huang-Xu-Yang"/><br /><sub><b>Huang-Xu-Yang</b></sub></a><br /><a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Huang-Xu-Yang" title="Code">💻</a> <a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=Huang-Xu-Yang" title="Tests">⚠️</a></td>
349
349
  <td align="center" valign="top" width="16.66%"><a href="https://github.com/WeiguoMa"><img src="https://avatars.githubusercontent.com/u/108172530?v=4?s=100" width="100px;" alt="Weiguo_M"/><br /><sub><b>Weiguo_M</b></sub></a><br /><a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=WeiguoMa" title="Code">💻</a> <a href="https://github.com/tensorcircuit/tensorcircuit-ng/commits?author=WeiguoMa" title="Tests">⚠️</a> <a href="#example-WeiguoMa" title="Examples">💡</a> <a href="#tutorial-WeiguoMa" title="Tutorials">✅</a></td>
350
+ <td align="center" valign="top" width="16.66%"><a href="https://github.com/QuiXamii"><img src="https://avatars.githubusercontent.com/u/136054857?v=4?s=100" width="100px;" alt="Qixiang WANG"/><br /><sub><b>Qixiang WANG</b></sub></a><br /><a href="#example-QuiXamii" title="Examples">💡</a></td>
350
351
  </tr>
351
352
  </tbody>
352
353
  </table>