xpk 0.17.0__tar.gz → 0.17.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. {xpk-0.17.0/src/xpk.egg-info → xpk-0.17.2}/PKG-INFO +1 -1
  2. {xpk-0.17.0 → xpk-0.17.2}/goldens/Workload_create.txt +3 -2
  3. {xpk-0.17.0 → xpk-0.17.2}/goldens/Workload_create_sub-slicing.txt +3 -2
  4. {xpk-0.17.0 → xpk-0.17.2}/goldens/Workload_create_super-slicing.txt +3 -2
  5. {xpk-0.17.0 → xpk-0.17.2}/goldens/Workload_create_with_output-manifest-file.txt +3 -2
  6. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/storage.py +0 -25
  7. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/workload.py +1 -0
  8. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/cluster.py +1 -3
  9. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/common.py +0 -151
  10. {xpk-0.17.0 → xpk-0.17.2/src/xpk.egg-info}/PKG-INFO +1 -1
  11. {xpk-0.17.0 → xpk-0.17.2}/src/xpk.egg-info/SOURCES.txt +0 -2
  12. xpk-0.17.0/src/xpk/core/kjob.py +0 -473
  13. xpk-0.17.0/src/xpk/templates/volume_bundle.yaml +0 -7
  14. {xpk-0.17.0 → xpk-0.17.2}/.dockerignore +0 -0
  15. {xpk-0.17.0 → xpk-0.17.2}/.github/CODEOWNERS +0 -0
  16. {xpk-0.17.0 → xpk-0.17.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  17. {xpk-0.17.0 → xpk-0.17.2}/.github/actions/install-kjob/action.yml +0 -0
  18. {xpk-0.17.0 → xpk-0.17.2}/.github/actions/install-kueue/action.yml +0 -0
  19. {xpk-0.17.0 → xpk-0.17.2}/.github/actions/setup-test-env/action.yml +0 -0
  20. {xpk-0.17.0 → xpk-0.17.2}/.github/release.yaml +0 -0
  21. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/README.md +0 -0
  22. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/build_tests.yaml +0 -0
  23. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/build_wheels.yaml +0 -0
  24. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/cleanup.yaml +0 -0
  25. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/gemini-dispatch.yml +0 -0
  26. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/gemini-invoke.yml +0 -0
  27. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/gemini-review.yml +0 -0
  28. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/gemini-scheduled-triage.yml +0 -0
  29. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/gemini-triage.yml +0 -0
  30. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/integration_basic_cluster_create.yaml +0 -0
  31. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/integration_legacy_tests.yaml +0 -0
  32. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/integration_pathways_cluster_create.yaml +0 -0
  33. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/integration_ray_cluster_create.yaml +0 -0
  34. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/integration_storage_tests.yaml +0 -0
  35. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/label-validation.yaml +0 -0
  36. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/nightly_tests.yaml +0 -0
  37. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/periodic_release.yaml +0 -0
  38. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/release_branch_versioning.yaml +0 -0
  39. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/reusable_build_kjob.yaml +0 -0
  40. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/reusable_build_scripts.yaml +0 -0
  41. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/reusable_build_wheel.yaml +0 -0
  42. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/reusable_goldens.yaml +0 -0
  43. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/reusable_integration_tests.yaml +0 -0
  44. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/reusable_lint_and_format.yml +0 -0
  45. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/reusable_storage_create.yaml +0 -0
  46. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/reusable_storage_delete.yaml +0 -0
  47. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/reusable_unit_tests.yaml +0 -0
  48. {xpk-0.17.0 → xpk-0.17.2}/.github/workflows/stale.yaml +0 -0
  49. {xpk-0.17.0 → xpk-0.17.2}/.gitignore +0 -0
  50. {xpk-0.17.0 → xpk-0.17.2}/.pre-commit-config.yaml +0 -0
  51. {xpk-0.17.0 → xpk-0.17.2}/LICENSE +0 -0
  52. {xpk-0.17.0 → xpk-0.17.2}/Makefile +0 -0
  53. {xpk-0.17.0 → xpk-0.17.2}/README.md +0 -0
  54. {xpk-0.17.0 → xpk-0.17.2}/backoff_retry.sh +0 -0
  55. {xpk-0.17.0 → xpk-0.17.2}/data/Dockerfile +0 -0
  56. {xpk-0.17.0 → xpk-0.17.2}/docs/code-of-conduct.md +0 -0
  57. {xpk-0.17.0 → xpk-0.17.2}/docs/contributing.md +0 -0
  58. {xpk-0.17.0 → xpk-0.17.2}/docs/installation.md +0 -0
  59. {xpk-0.17.0 → xpk-0.17.2}/docs/local_testing.md +0 -0
  60. {xpk-0.17.0 → xpk-0.17.2}/docs/permissions.md +0 -0
  61. {xpk-0.17.0 → xpk-0.17.2}/docs/testing.md +0 -0
  62. {xpk-0.17.0 → xpk-0.17.2}/docs/troubleshooting.md +0 -0
  63. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/advanced.md +0 -0
  64. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/autoprovisioning.md +0 -0
  65. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/clusters.md +0 -0
  66. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/cpu.md +0 -0
  67. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/docker.md +0 -0
  68. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/gpu.md +0 -0
  69. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/inspector.md +0 -0
  70. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/job.md +0 -0
  71. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/run.md +0 -0
  72. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/storage.md +0 -0
  73. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/tpu7x/clusters.md +0 -0
  74. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/tpu7x/recipes/flex_filestore_recipe.md +0 -0
  75. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/tpu7x/recipes/flex_lustre_recipe.md +0 -0
  76. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/tpu7x/recipes/reservation_gcs_bucket_recipe.md +0 -0
  77. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/tpu7x/workloads.md +0 -0
  78. {xpk-0.17.0 → xpk-0.17.2}/docs/usage/workloads.md +0 -0
  79. {xpk-0.17.0 → xpk-0.17.2}/examples/batch.md +0 -0
  80. {xpk-0.17.0 → xpk-0.17.2}/examples/fake_training.py +0 -0
  81. {xpk-0.17.0 → xpk-0.17.2}/examples/job.sh +0 -0
  82. {xpk-0.17.0 → xpk-0.17.2}/examples/llama-3.1-finetuning/check_cuda.sh +0 -0
  83. {xpk-0.17.0 → xpk-0.17.2}/examples/llama-3.1-finetuning/requirements.txt +0 -0
  84. {xpk-0.17.0 → xpk-0.17.2}/examples/llama-3.1-finetuning/train.py +0 -0
  85. {xpk-0.17.0 → xpk-0.17.2}/examples/llama-3.1-finetuning/train.slurm +0 -0
  86. {xpk-0.17.0 → xpk-0.17.2}/examples/llama-3.1-finetuning/training_data.jsonl +0 -0
  87. {xpk-0.17.0 → xpk-0.17.2}/examples/nccl/nccl-a3mega.sh +0 -0
  88. {xpk-0.17.0 → xpk-0.17.2}/examples/nccl/nccl-a3ultra.sh +0 -0
  89. {xpk-0.17.0 → xpk-0.17.2}/examples/nccl/nccl.md +0 -0
  90. {xpk-0.17.0 → xpk-0.17.2}/examples/storage/filestore-manifest-attach.yaml +0 -0
  91. {xpk-0.17.0 → xpk-0.17.2}/examples/storage/gcsfuse-manifest.yaml +0 -0
  92. {xpk-0.17.0 → xpk-0.17.2}/examples/storage/lustre-manifest-attach.yaml +0 -0
  93. {xpk-0.17.0 → xpk-0.17.2}/examples/storage/parallelstore-manifest-attach.yaml +0 -0
  94. {xpk-0.17.0 → xpk-0.17.2}/examples/storage/pd-manifest-attach.yaml +0 -0
  95. {xpk-0.17.0 → xpk-0.17.2}/golden_buddy.sh +0 -0
  96. {xpk-0.17.0 → xpk-0.17.2}/goldens/Basic_cluster_create.txt +0 -0
  97. {xpk-0.17.0 → xpk-0.17.2}/goldens/Batch.txt +0 -0
  98. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_for_multi-host_nodepool.txt +0 -0
  99. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_for_single-host_single-slice_TPU.txt +0 -0
  100. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_private.txt +0 -0
  101. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_sub-slicing.txt +0 -0
  102. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_super-slicing.txt +0 -0
  103. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_with_CPU_and_memory_limits_above_capacity.txt +0 -0
  104. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_with_CPU_and_memory_limits_below_capacity.txt +0 -0
  105. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_with_Managed_Lustre_driver.txt +0 -0
  106. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_with_Managed_Lustre_driver_and_legacy_port.txt +0 -0
  107. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_with_gb200-4.txt +0 -0
  108. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_create_with_shared_reservation.txt +0 -0
  109. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_delete.txt +0 -0
  110. {xpk-0.17.0 → xpk-0.17.2}/goldens/Cluster_delete_force.txt +0 -0
  111. {xpk-0.17.0 → xpk-0.17.2}/goldens/Job_cancel.txt +0 -0
  112. {xpk-0.17.0 → xpk-0.17.2}/goldens/Job_info.txt +0 -0
  113. {xpk-0.17.0 → xpk-0.17.2}/goldens/Job_list.txt +0 -0
  114. {xpk-0.17.0 → xpk-0.17.2}/goldens/NAP_cluster-create.txt +0 -0
  115. {xpk-0.17.0 → xpk-0.17.2}/goldens/NAP_cluster-create_with_pathways.txt +0 -0
  116. {xpk-0.17.0 → xpk-0.17.2}/goldens/Storage_list.txt +0 -0
  117. {xpk-0.17.0 → xpk-0.17.2}/goldens/Workload_create_pathways.txt +0 -0
  118. {xpk-0.17.0 → xpk-0.17.2}/goldens/Workload_delete.txt +0 -0
  119. {xpk-0.17.0 → xpk-0.17.2}/goldens/Workload_list.txt +0 -0
  120. {xpk-0.17.0 → xpk-0.17.2}/goldens.yaml +0 -0
  121. {xpk-0.17.0 → xpk-0.17.2}/pylintrc +0 -0
  122. {xpk-0.17.0 → xpk-0.17.2}/pyproject.toml +0 -0
  123. {xpk-0.17.0 → xpk-0.17.2}/setup.cfg +0 -0
  124. {xpk-0.17.0 → xpk-0.17.2}/src/integration/README.md +0 -0
  125. {xpk-0.17.0 → xpk-0.17.2}/src/integration/__init__.py +0 -0
  126. {xpk-0.17.0 → xpk-0.17.2}/src/integration/docker_manager_test.py +0 -0
  127. {xpk-0.17.0 → xpk-0.17.2}/src/integration/gcluster_a3mega_test.py +0 -0
  128. {xpk-0.17.0 → xpk-0.17.2}/src/integration/gcluster_a3ultra_test.py +0 -0
  129. {xpk-0.17.0 → xpk-0.17.2}/src/integration/gcluster_a4_test.py +0 -0
  130. {xpk-0.17.0 → xpk-0.17.2}/src/integration/gcluster_test.py +0 -0
  131. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/__init__.py +0 -0
  132. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/api/__init__.py +0 -0
  133. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/api/storage_crd.yaml +0 -0
  134. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/blueprints/a3mega/config-map.yaml.tftpl +0 -0
  135. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/blueprints/a3mega/storage_crd.yaml +0 -0
  136. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/blueprints/a3ultra/config-map.yaml.tftpl +0 -0
  137. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/blueprints/a3ultra/mlgru-disable.yaml +0 -0
  138. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/blueprints/a3ultra/nccl-installer.yaml +0 -0
  139. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/blueprints/a3ultra/storage_crd.yaml +0 -0
  140. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/blueprints/a4/config-map.yaml.tftpl +0 -0
  141. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/blueprints/a4/nccl-rdma-installer-a4.yaml +0 -0
  142. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/blueprints/a4/storage_crd.yaml +0 -0
  143. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/__init__.py +0 -0
  144. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/batch.py +0 -0
  145. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/cluster.py +0 -0
  146. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/cluster_gcluster.py +0 -0
  147. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/cluster_gcluster_test.py +0 -0
  148. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/cluster_test.py +0 -0
  149. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/common.py +0 -0
  150. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/config.py +0 -0
  151. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/info.py +0 -0
  152. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/inspector.py +0 -0
  153. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/job.py +0 -0
  154. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/kind.py +0 -0
  155. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/kjob_common.py +0 -0
  156. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/managed_ml_diagnostics.py +0 -0
  157. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/managed_ml_diagnostics_test.py +0 -0
  158. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/run.py +0 -0
  159. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/shell.py +0 -0
  160. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/version.py +0 -0
  161. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/commands/workload_test.py +0 -0
  162. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/__init__.py +0 -0
  163. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/blueprint/__init__.py +0 -0
  164. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/blueprint/blueprint_definitions.py +0 -0
  165. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/blueprint/blueprint_generator.py +0 -0
  166. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/blueprint/blueprint_test.py +0 -0
  167. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/blueprint/testing/__init__.py +0 -0
  168. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/blueprint/testing/data/a3_mega.yaml +0 -0
  169. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/blueprint/testing/data/a3_mega_spot.yaml +0 -0
  170. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/blueprint/testing/data/a3_ultra.yaml +0 -0
  171. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/blueprint/testing/data/a4.yaml +0 -0
  172. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/capacity.py +0 -0
  173. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/capacity_test.py +0 -0
  174. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/cluster_private.py +0 -0
  175. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/cluster_test.py +0 -0
  176. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/commands.py +0 -0
  177. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/config.py +0 -0
  178. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/config_test.py +0 -0
  179. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/docker_container.py +0 -0
  180. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/docker_image.py +0 -0
  181. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/docker_manager.py +0 -0
  182. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/docker_resources.py +0 -0
  183. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/filestore.py +0 -0
  184. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/gcloud_context.py +0 -0
  185. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/gcloud_context_test.py +0 -0
  186. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/gcluster_manager.py +0 -0
  187. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/gcsfuse.py +0 -0
  188. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/jobset.py +0 -0
  189. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/kueue_manager.py +0 -0
  190. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/kueue_manager_test.py +0 -0
  191. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/monitoring.py +0 -0
  192. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/mtc.py +0 -0
  193. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/nap.py +0 -0
  194. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/network.py +0 -0
  195. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/nodepool.py +0 -0
  196. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/nodepool_test.py +0 -0
  197. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/pathways.py +0 -0
  198. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/pathways_test.py +0 -0
  199. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/ray.py +0 -0
  200. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/remote_state/__init__.py +0 -0
  201. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/remote_state/fuse_remote_state.py +0 -0
  202. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/remote_state/remote_state_client.py +0 -0
  203. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/resources.py +0 -0
  204. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/scheduling.py +0 -0
  205. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/scheduling_test.py +0 -0
  206. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/storage.py +0 -0
  207. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/system_characteristics.py +0 -0
  208. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/system_characteristics_test.py +0 -0
  209. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/telemetry.py +0 -0
  210. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/telemetry_test.py +0 -0
  211. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/testing/__init__.py +0 -0
  212. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/testing/commands_tester.py +0 -0
  213. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/testing/commands_tester_test.py +0 -0
  214. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/updates.py +0 -0
  215. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/updates_test.py +0 -0
  216. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/vertex.py +0 -0
  217. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/workload.py +0 -0
  218. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/workload_decorators/__init__.py +0 -0
  219. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/workload_decorators/rdma_decorator.py +0 -0
  220. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/workload_decorators/storage_decorator.py +0 -0
  221. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/workload_decorators/tcpx_decorator.py +0 -0
  222. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/workload_decorators/tcpx_decorator_test.py +0 -0
  223. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/workload_decorators/tcpxo_decorator.py +0 -0
  224. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/core/workload_test.py +0 -0
  225. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/main.py +0 -0
  226. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/__init__.py +0 -0
  227. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/batch.py +0 -0
  228. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/cluster.py +0 -0
  229. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/cluster_test.py +0 -0
  230. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/common_test.py +0 -0
  231. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/config.py +0 -0
  232. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/core.py +0 -0
  233. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/info.py +0 -0
  234. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/inspector.py +0 -0
  235. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/job.py +0 -0
  236. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/kind.py +0 -0
  237. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/run.py +0 -0
  238. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/shell.py +0 -0
  239. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/storage.py +0 -0
  240. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/storage_test.py +0 -0
  241. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/validators.py +0 -0
  242. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/version.py +0 -0
  243. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/workload.py +0 -0
  244. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/parser/workload_test.py +0 -0
  245. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/telemetry_uploader.py +0 -0
  246. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/__init__.py +0 -0
  247. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/arm_gpu_workload_crate.yaml.j2 +0 -0
  248. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/cluster_preheat.yaml.j2 +0 -0
  249. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/filestore-pv.yaml +0 -0
  250. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/filestore-pvc.yaml +0 -0
  251. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/filestore-sc.yaml +0 -0
  252. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/fuse-pv.yaml +0 -0
  253. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/fuse-pvc.yaml +0 -0
  254. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/kueue_config.yaml.j2 +0 -0
  255. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/kueue_gke_default_topology.yaml.j2 +0 -0
  256. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/kueue_sub_slicing_topology.yaml.j2 +0 -0
  257. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/kueue_super_slicing_topology.yaml.j2 +0 -0
  258. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/mtc-cpc.yaml +0 -0
  259. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/templates/storage.yaml +0 -0
  260. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/__init__.py +0 -0
  261. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/console.py +0 -0
  262. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/console_test.py +0 -0
  263. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/execution_context.py +0 -0
  264. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/feature_flags.py +0 -0
  265. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/file.py +0 -0
  266. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/gcs_utils.py +0 -0
  267. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/kubectl.py +0 -0
  268. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/kueue.py +0 -0
  269. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/network.py +0 -0
  270. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/objects.py +0 -0
  271. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/templates.py +0 -0
  272. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/topology.py +0 -0
  273. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/topology_test.py +0 -0
  274. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/user_agent.py +0 -0
  275. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/user_agent_test.py +0 -0
  276. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/user_input.py +0 -0
  277. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/user_input_test.py +0 -0
  278. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/validation.py +0 -0
  279. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/validation_test.py +0 -0
  280. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/versions.py +0 -0
  281. {xpk-0.17.0 → xpk-0.17.2}/src/xpk/utils/yaml.py +0 -0
  282. {xpk-0.17.0 → xpk-0.17.2}/src/xpk.egg-info/dependency_links.txt +0 -0
  283. {xpk-0.17.0 → xpk-0.17.2}/src/xpk.egg-info/entry_points.txt +0 -0
  284. {xpk-0.17.0 → xpk-0.17.2}/src/xpk.egg-info/requires.txt +0 -0
  285. {xpk-0.17.0 → xpk-0.17.2}/src/xpk.egg-info/top_level.txt +0 -0
  286. {xpk-0.17.0 → xpk-0.17.2}/tools/Dockerfile-kjob +0 -0
  287. {xpk-0.17.0 → xpk-0.17.2}/tools/build-kjob.sh +0 -0
  288. {xpk-0.17.0 → xpk-0.17.2}/tools/install-gke-auth-plugin.sh +0 -0
  289. {xpk-0.17.0 → xpk-0.17.2}/tools/install-xpk.sh +0 -0
  290. {xpk-0.17.0 → xpk-0.17.2}/xpk-large-scale-guide.sh +0 -0
  291. {xpk-0.17.0 → xpk-0.17.2}/xpk-notebooks.md +0 -0
  292. {xpk-0.17.0 → xpk-0.17.2}/xpk-slurm-commands.md +0 -0
  293. {xpk-0.17.0 → xpk-0.17.2}/xpk.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xpk
3
- Version: 0.17.0
3
+ Version: 0.17.2
4
4
  Summary: xpk helps Cloud developers to orchestrate training jobs on accelerators on GKE.
5
5
  Author-email: XPK team <xpk-code-reviewers@google.com>
6
6
  License: Apache-2.0
@@ -35,7 +35,7 @@ docker buildx build --platform=linux/amd64 -f 4b6736a12db8ea0f78ce793fd0d4ee0c94
35
35
  docker tag dry-run-runner gcr.io/golden-project/dry-run-runner:prefix-current
36
36
  [XPK] Task: `Upload Docker Image` is implemented by the following command not running since it is a dry run.
37
37
  docker push gcr.io/golden-project/dry-run-runner:prefix-current
38
- [XPK] Temp file (bd9665007cde96247406beaa13c9f40ba5636f1f75ea7561749c4472989fd14b) content:
38
+ [XPK] Temp file (9c33f52b12eab63cfb9ce1aba6ff74f4642cb9e40e2a0ad9517d189ee41f09a5) content:
39
39
  apiVersion: jobset.x-k8s.io/v1alpha2
40
40
  kind: JobSet
41
41
  metadata:
@@ -65,6 +65,7 @@ spec:
65
65
  podFailurePolicy:
66
66
  rules:
67
67
  - action: FailJob
68
+ onPodConditions: []
68
69
  onExitCodes:
69
70
  containerName: jax-tpu
70
71
  operator: NotIn
@@ -145,7 +146,7 @@ spec:
145
146
 
146
147
 
147
148
  [XPK] Task: `Creating Workload` is implemented by the following command not running since it is a dry run.
148
- kubectl apply -f bd9665007cde96247406beaa13c9f40ba5636f1f75ea7561749c4472989fd14b
149
+ kubectl apply -f 9c33f52b12eab63cfb9ce1aba6ff74f4642cb9e40e2a0ad9517d189ee41f09a5
149
150
  [XPK] Task: `GKE Dashboard List` is implemented by the following command not running since it is a dry run.
150
151
  gcloud monitoring dashboards list --project=golden-project --filter="displayName:'GKE - TPU Monitoring Dashboard'" --format="value(name)" --verbosity=error
151
152
  [XPK] Check statistics and outlier mode of GKE metrics here: https://console.cloud.google.com/monitoring/dashboards/builder/0?project=golden-project&f.rlabel.cluster_name.ClusterName=golden-cluster. To view the metric data for your workload, select golden-workload from the JobName filter on the dashboard.
@@ -39,7 +39,7 @@ docker buildx build --platform=linux/amd64 -f 4b6736a12db8ea0f78ce793fd0d4ee0c94
39
39
  docker tag dry-run-runner gcr.io/golden-project/dry-run-runner:prefix-current
40
40
  [XPK] Task: `Upload Docker Image` is implemented by the following command not running since it is a dry run.
41
41
  docker push gcr.io/golden-project/dry-run-runner:prefix-current
42
- [XPK] Temp file (816caa6f0023876e99f55515fda46cdbb648ff2c609f9d2af7c2354079d0e582) content:
42
+ [XPK] Temp file (8d5155a477cf99bc463104e0b22de0d21ee90548f51297fe429cdaa721d70a63) content:
43
43
  apiVersion: jobset.x-k8s.io/v1alpha2
44
44
  kind: JobSet
45
45
  metadata:
@@ -69,6 +69,7 @@ spec:
69
69
  podFailurePolicy:
70
70
  rules:
71
71
  - action: FailJob
72
+ onPodConditions: []
72
73
  onExitCodes:
73
74
  containerName: jax-tpu
74
75
  operator: NotIn
@@ -150,7 +151,7 @@ spec:
150
151
 
151
152
 
152
153
  [XPK] Task: `Creating Workload` is implemented by the following command not running since it is a dry run.
153
- kubectl apply -f 816caa6f0023876e99f55515fda46cdbb648ff2c609f9d2af7c2354079d0e582
154
+ kubectl apply -f 8d5155a477cf99bc463104e0b22de0d21ee90548f51297fe429cdaa721d70a63
154
155
  [XPK] Task: `GKE Dashboard List` is implemented by the following command not running since it is a dry run.
155
156
  gcloud monitoring dashboards list --project=golden-project --filter="displayName:'GKE - TPU Monitoring Dashboard'" --format="value(name)" --verbosity=error
156
157
  [XPK] Check statistics and outlier mode of GKE metrics here: https://console.cloud.google.com/monitoring/dashboards/builder/0?project=golden-project&f.rlabel.cluster_name.ClusterName=golden-cluster. To view the metric data for your workload, select golden-workload from the JobName filter on the dashboard.
@@ -39,7 +39,7 @@ docker buildx build --platform=linux/amd64 -f 4b6736a12db8ea0f78ce793fd0d4ee0c94
39
39
  docker tag dry-run-runner gcr.io/golden-project/dry-run-runner:prefix-current
40
40
  [XPK] Task: `Upload Docker Image` is implemented by the following command not running since it is a dry run.
41
41
  docker push gcr.io/golden-project/dry-run-runner:prefix-current
42
- [XPK] Temp file (91fb78adbd49f9e2d6c2fec62dc461e724e79d4189af07b99e8f731bf8e2e11d) content:
42
+ [XPK] Temp file (5c6c507500cfbde66c80baa4f3a642c49ec3501b383057e8b68595c4121e95aa) content:
43
43
  apiVersion: jobset.x-k8s.io/v1alpha2
44
44
  kind: JobSet
45
45
  metadata:
@@ -69,6 +69,7 @@ spec:
69
69
  podFailurePolicy:
70
70
  rules:
71
71
  - action: FailJob
72
+ onPodConditions: []
72
73
  onExitCodes:
73
74
  containerName: jax-tpu
74
75
  operator: NotIn
@@ -149,7 +150,7 @@ spec:
149
150
 
150
151
 
151
152
  [XPK] Task: `Creating Workload` is implemented by the following command not running since it is a dry run.
152
- kubectl apply -f 91fb78adbd49f9e2d6c2fec62dc461e724e79d4189af07b99e8f731bf8e2e11d
153
+ kubectl apply -f 5c6c507500cfbde66c80baa4f3a642c49ec3501b383057e8b68595c4121e95aa
153
154
  [XPK] Task: `GKE Dashboard List` is implemented by the following command not running since it is a dry run.
154
155
  gcloud monitoring dashboards list --project=golden-project --filter="displayName:'GKE - TPU Monitoring Dashboard'" --format="value(name)" --verbosity=error
155
156
  [XPK] Check statistics and outlier mode of GKE metrics here: https://console.cloud.google.com/monitoring/dashboards/builder/0?project=golden-project&f.rlabel.cluster_name.ClusterName=golden-cluster. To view the metric data for your workload, select golden-workload from the JobName filter on the dashboard.
@@ -36,7 +36,7 @@ docker tag dry-run-runner gcr.io/golden-project/dry-run-runner:prefix-current
36
36
  [XPK] Task: `Upload Docker Image` is implemented by the following command not running since it is a dry run.
37
37
  docker push gcr.io/golden-project/dry-run-runner:prefix-current
38
38
  [XPK] Workload golden-workload manifest written to /var/tmp/manifest.yaml
39
- [XPK] Temp file (bd9665007cde96247406beaa13c9f40ba5636f1f75ea7561749c4472989fd14b) content:
39
+ [XPK] Temp file (9c33f52b12eab63cfb9ce1aba6ff74f4642cb9e40e2a0ad9517d189ee41f09a5) content:
40
40
  apiVersion: jobset.x-k8s.io/v1alpha2
41
41
  kind: JobSet
42
42
  metadata:
@@ -66,6 +66,7 @@ spec:
66
66
  podFailurePolicy:
67
67
  rules:
68
68
  - action: FailJob
69
+ onPodConditions: []
69
70
  onExitCodes:
70
71
  containerName: jax-tpu
71
72
  operator: NotIn
@@ -146,7 +147,7 @@ spec:
146
147
 
147
148
 
148
149
  [XPK] Task: `Creating Workload` is implemented by the following command not running since it is a dry run.
149
- kubectl apply -f bd9665007cde96247406beaa13c9f40ba5636f1f75ea7561749c4472989fd14b
150
+ kubectl apply -f 9c33f52b12eab63cfb9ce1aba6ff74f4642cb9e40e2a0ad9517d189ee41f09a5
150
151
  [XPK] Task: `GKE Dashboard List` is implemented by the following command not running since it is a dry run.
151
152
  gcloud monitoring dashboards list --project=golden-project --filter="displayName:'GKE - TPU Monitoring Dashboard'" --format="value(name)" --verbosity=error
152
153
  [XPK] Check statistics and outlier mode of GKE metrics here: https://console.cloud.google.com/monitoring/dashboards/builder/0?project=golden-project&f.rlabel.cluster_name.ClusterName=golden-cluster. To view the metric data for your workload, select golden-workload from the JobName filter on the dashboard.
@@ -23,7 +23,6 @@ from kubernetes.client.rest import ApiException
23
23
 
24
24
  from ..core import gcsfuse
25
25
  from ..core.cluster import (
26
- DEFAULT_NAMESPACE,
27
26
  add_zone_and_project,
28
27
  get_cluster_network,
29
28
  setup_k8s_env,
@@ -35,12 +34,6 @@ from ..core.cluster import (
35
34
  update_cluster_with_workload_identity_if_necessary,
36
35
  )
37
36
  from ..core.filestore import FilestoreClient, get_storage_class_name
38
- from ..core.kjob import (
39
- KJOB_API_GROUP_NAME,
40
- KJOB_API_GROUP_VERSION,
41
- KJOB_API_VOLUME_BUNDLE_PLURAL,
42
- create_volume_bundle_instance,
43
- )
44
37
  from ..core.storage import (
45
38
  GCP_FILESTORE_TYPE,
46
39
  GCS_FUSE_TYPE,
@@ -98,9 +91,6 @@ def storage_create(args: Namespace) -> None:
98
91
 
99
92
  k8s_api_client = setup_k8s_env(args)
100
93
  create_storage_crds(k8s_api_client, args, manifest)
101
- create_volume_bundle_instance(
102
- k8s_api_client, args.name, manifest, args.readonly, args.mount_point
103
- )
104
94
  # Not required for Filestore. Will be uncommented when adding GCSFuse create
105
95
  # return_code = update_cluster_with_workload_identity_if_necessary(args)
106
96
  # if return_code > 0:
@@ -214,9 +204,6 @@ def storage_attach(args: Namespace) -> None:
214
204
 
215
205
  k8s_api_client = setup_k8s_env(args)
216
206
  create_storage_crds(k8s_api_client, args, manifest)
217
- create_volume_bundle_instance(
218
- k8s_api_client, args.name, manifest, args.readonly, args.mount_point
219
- )
220
207
 
221
208
  enable_csi_drivers_if_necessary(args)
222
209
 
@@ -332,18 +319,6 @@ def delete_storage_resources(k8s_api_client: ApiClient, storage: Storage):
332
319
  "Storage Class",
333
320
  )
334
321
 
335
- delete_resource(
336
- lambda name: api_instance.delete_namespaced_custom_object(
337
- namespace=DEFAULT_NAMESPACE,
338
- name=name,
339
- group=KJOB_API_GROUP_NAME,
340
- version=KJOB_API_GROUP_VERSION,
341
- plural=KJOB_API_VOLUME_BUNDLE_PLURAL,
342
- ),
343
- storage.name,
344
- "VolumeBundle",
345
- )
346
-
347
322
  delete_resource(
348
323
  lambda name: api_instance.delete_cluster_custom_object(
349
324
  name=name,
@@ -493,6 +493,7 @@ def workload_create(args) -> None:
493
493
  podFailurePolicy:
494
494
  rules:
495
495
  - action: FailJob
496
+ onPodConditions: []
496
497
  onExitCodes:
497
498
  containerName: {get_main_container_docker_image(args, workload_system)}
498
499
  operator: NotIn
@@ -717,10 +717,8 @@ def get_cluster_credentials(args) -> int:
717
717
  location=location,
718
718
  dns_endpoint=True,
719
719
  )
720
- if return_code != 0:
721
- return return_code
722
720
 
723
- if not _are_credentials_valid():
721
+ if return_code != 0 or not _are_credentials_valid():
724
722
  xpk_print('Detected error. Retrying without --dns-endpoint flag...')
725
723
  return_code = _get_credentials(
726
724
  project=args.project,
@@ -180,157 +180,6 @@ def add_global_arguments(custom_parser_or_group: ParserOrArgumentGroup):
180
180
  )
181
181
 
182
182
 
183
- def add_slurm_arguments(custom_parser_or_group: ParserOrArgumentGroup):
184
- """Add Slurm job arguments to the parser.
185
-
186
- Args:
187
- custom_parser_or_group: parser or argument group to add global arguments to.
188
- """
189
- custom_parser_or_group.add_argument(
190
- '--ignore-unknown-flags',
191
- type=bool,
192
- action=argparse.BooleanOptionalAction,
193
- default=False,
194
- help='Ignore all the unsupported flags in the bash script.',
195
- )
196
- custom_parser_or_group.add_argument(
197
- '-a',
198
- '--array',
199
- type=str,
200
- default=None,
201
- help=(
202
- 'Submit a job array, multiple jobs to be executed with identical'
203
- ' parameters. The indexes specification identifies what array index'
204
- ' values should be used. For example, "--array=0-15" or'
205
- ' "--array=0,6,16-32". Multiple values may be specified using a comma'
206
- ' separated list and/or a range of values with a "-" separator. For'
207
- ' example "--array=0-15%%4" will limit the number of simultaneously'
208
- ' running tasks from this job array to 4. The minimum index value is'
209
- ' 0. The maximum index value is 2147483647.'
210
- ),
211
- )
212
- custom_parser_or_group.add_argument(
213
- '-c',
214
- '--cpus-per-task',
215
- type=str,
216
- default=None,
217
- help='How much cpus a container inside a pod requires.',
218
- )
219
- custom_parser_or_group.add_argument(
220
- '--gpus-per-task',
221
- type=str,
222
- default=None,
223
- help='How much gpus a container inside a pod requires.',
224
- )
225
- custom_parser_or_group.add_argument(
226
- '--mem',
227
- type=str,
228
- default=None,
229
- help='How much memory a pod requires.',
230
- )
231
- custom_parser_or_group.add_argument(
232
- '--mem-per-task',
233
- type=str,
234
- default=None,
235
- help='How much memory a container requires.',
236
- )
237
- custom_parser_or_group.add_argument(
238
- '--mem-per-cpu',
239
- type=str,
240
- default=None,
241
- help=(
242
- 'How much memory a container requires, it multiplies the number '
243
- 'of requested cpus per task by mem-per-cpu.'
244
- ),
245
- )
246
- custom_parser_or_group.add_argument(
247
- '--mem-per-gpu',
248
- type=str,
249
- default=None,
250
- help=(
251
- 'How much memory a container requires, it multiplies the number '
252
- 'of requested gpus per task by mem-per-gpu.'
253
- ),
254
- )
255
- custom_parser_or_group.add_argument(
256
- '-N',
257
- '--nodes',
258
- type=int,
259
- default=None,
260
- help='Number of pods to be used at a time.',
261
- )
262
- custom_parser_or_group.add_argument(
263
- '-n',
264
- '--ntasks',
265
- type=int,
266
- default=None,
267
- help='Number of identical containers inside of a pod, usually 1.',
268
- )
269
- custom_parser_or_group.add_argument(
270
- '-o',
271
- '--output',
272
- type=str,
273
- default=None,
274
- help=(
275
- 'Where to redirect the standard output stream of a task. If not'
276
- ' passed it proceeds to stdout, and is available via kubectl logs.'
277
- ),
278
- )
279
- custom_parser_or_group.add_argument(
280
- '-e',
281
- '--error',
282
- type=str,
283
- default=None,
284
- help=(
285
- 'Where to redirect std error stream of a task. If not passed it'
286
- ' proceeds to stdout, and is available via kubectl logs.'
287
- ),
288
- )
289
- custom_parser_or_group.add_argument(
290
- '--input',
291
- type=str,
292
- default=None,
293
- help='What to pipe into the script.',
294
- )
295
- custom_parser_or_group.add_argument(
296
- '-J',
297
- '--job-name',
298
- type=str,
299
- default=None,
300
- help='What is the job name.',
301
- )
302
- custom_parser_or_group.add_argument(
303
- '-D',
304
- '--chdir',
305
- type=str,
306
- default=None,
307
- help='Change directory before executing the script.',
308
- )
309
- custom_parser_or_group.add_argument(
310
- '-t',
311
- '--time',
312
- type=str,
313
- default=None,
314
- help=(
315
- 'Set a limit on the total run time of the job. '
316
- 'A time limit of zero requests that no time limit be imposed. '
317
- 'Acceptable time formats include "minutes", "minutes:seconds", '
318
- '"hours:minutes:seconds", "days-hours", "days-hours:minutes" '
319
- 'and "days-hours:minutes:seconds".'
320
- ),
321
- )
322
- custom_parser_or_group.add_argument(
323
- '--priority',
324
- type=str,
325
- default='medium',
326
- choices=['very-low', 'low', 'medium', 'high', 'very-high'],
327
- help=(
328
- 'A priority, one of `very-low`, `low`, `medium`, `high` or'
329
- ' `very-high`. Defaults to `medium`.'
330
- ),
331
- )
332
-
333
-
334
183
  def add_tpu_type_argument(
335
184
  custom_parser_or_group: ParserOrArgumentGroup,
336
185
  required: bool = False,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xpk
3
- Version: 0.17.0
3
+ Version: 0.17.2
4
4
  Summary: xpk helps Cloud developers to orchestrate training jobs on accelerators on GKE.
5
5
  Author-email: XPK team <xpk-code-reviewers@google.com>
6
6
  License: Apache-2.0
@@ -182,7 +182,6 @@ src/xpk/core/gcloud_context_test.py
182
182
  src/xpk/core/gcluster_manager.py
183
183
  src/xpk/core/gcsfuse.py
184
184
  src/xpk/core/jobset.py
185
- src/xpk/core/kjob.py
186
185
  src/xpk/core/kueue_manager.py
187
186
  src/xpk/core/kueue_manager_test.py
188
187
  src/xpk/core/monitoring.py
@@ -262,7 +261,6 @@ src/xpk/templates/kueue_sub_slicing_topology.yaml.j2
262
261
  src/xpk/templates/kueue_super_slicing_topology.yaml.j2
263
262
  src/xpk/templates/mtc-cpc.yaml
264
263
  src/xpk/templates/storage.yaml
265
- src/xpk/templates/volume_bundle.yaml
266
264
  src/xpk/utils/__init__.py
267
265
  src/xpk/utils/console.py
268
266
  src/xpk/utils/console_test.py