xpk 0.17.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. xpk/commands/cluster.py +4 -35
  2. xpk/commands/cluster_gcluster.py +1 -13
  3. xpk/commands/cluster_gcluster_test.py +2 -10
  4. xpk/commands/cluster_test.py +0 -4
  5. xpk/commands/workload.py +10 -3
  6. xpk/commands/workload_test.py +1 -0
  7. xpk/core/cluster.py +10 -9
  8. xpk/core/config.py +5 -17
  9. xpk/core/kueue_manager_test.py +2 -0
  10. xpk/core/nodepool.py +6 -0
  11. xpk/core/nodepool_test.py +4 -0
  12. xpk/core/scheduling.py +28 -3
  13. xpk/core/scheduling_test.py +38 -1
  14. xpk/core/system_characteristics.py +39 -16
  15. xpk/core/system_characteristics_test.py +11 -0
  16. xpk/core/workload_decorators/rdma_decorator.py +0 -15
  17. xpk/core/workload_decorators/tcpx_decorator.py +0 -8
  18. xpk/core/workload_decorators/tcpx_decorator_test.py +0 -78
  19. xpk/core/workload_decorators/tcpxo_decorator.py +0 -16
  20. xpk/parser/common.py +0 -17
  21. xpk/parser/core.py +0 -39
  22. xpk/parser/storage.py +0 -11
  23. xpk/utils/feature_flags.py +1 -1
  24. xpk/utils/validation.py +0 -8
  25. {xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/METADATA +15 -4
  26. {xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/RECORD +30 -41
  27. xpk/commands/batch.py +0 -144
  28. xpk/commands/job.py +0 -244
  29. xpk/commands/kind.py +0 -286
  30. xpk/commands/kjob_common.py +0 -60
  31. xpk/commands/run.py +0 -140
  32. xpk/commands/shell.py +0 -142
  33. xpk/parser/batch.py +0 -43
  34. xpk/parser/job.py +0 -147
  35. xpk/parser/kind.py +0 -95
  36. xpk/parser/run.py +0 -47
  37. xpk/parser/shell.py +0 -59
  38. {xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/WHEEL +0 -0
  39. {xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/entry_points.txt +0 -0
  40. {xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/licenses/LICENSE +0 -0
  41. {xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/top_level.txt +0 -0
xpk/parser/storage.py CHANGED
@@ -25,7 +25,6 @@ from ..commands.storage import (
25
25
  )
26
26
  from .common import (
27
27
  add_cluster_arguments,
28
- add_kind_cluster_arguments,
29
28
  add_shared_arguments,
30
29
  )
31
30
  from typing import Protocol, Any
@@ -185,7 +184,6 @@ def add_storage_attach_parser(
185
184
  help='Comma-separated list of mountOptions for PersistentVolume',
186
185
  default='implicit-dirs',
187
186
  )
188
- add_kind_cluster_arguments(opt_args)
189
187
 
190
188
 
191
189
  def add_storage_create_parser(storage_subcommands_parser: Subcommands) -> None:
@@ -284,8 +282,6 @@ def add_storage_create_parser(storage_subcommands_parser: Subcommands) -> None:
284
282
  default='',
285
283
  )
286
284
 
287
- add_kind_cluster_arguments(opt_args)
288
-
289
285
 
290
286
  def add_storage_list_parser(storage_subcommands_parser: Subcommands) -> None:
291
287
  storage_list_parser: argparse.ArgumentParser = (
@@ -319,12 +315,6 @@ def add_storage_detach_parser(storage_subcommands_parser: Subcommands) -> None:
319
315
  req_args.add_argument('name', type=str)
320
316
  add_cluster_arguments(req_args, required=True)
321
317
 
322
- opt_args = storage_detach_parser.add_argument_group(
323
- 'Optional Arguments',
324
- 'Optional arguments for storage delete.',
325
- )
326
- add_kind_cluster_arguments(opt_args)
327
-
328
318
 
329
319
  def add_storage_delete_parser(storage_subcommands_parser: Subcommands) -> None:
330
320
  storage_delete_parser: argparse.ArgumentParser = (
@@ -352,4 +342,3 @@ def add_storage_delete_parser(storage_subcommands_parser: Subcommands) -> None:
352
342
  action='store_true',
353
343
  help='Force filestore instance deletion even if it has attached storages',
354
344
  )
355
- add_kind_cluster_arguments(opt_args)
@@ -28,7 +28,7 @@ def _get_boolean_flag(flag: str, default: bool) -> bool:
28
28
 
29
29
  class _FeatureFlags:
30
30
  SUB_SLICING_ENABLED = _get_boolean_flag("SUB_SLICING_ENABLED", default=False)
31
- TELEMETRY_ENABLED = _get_boolean_flag("TELEMETRY_ENABLED", default=False)
31
+ TELEMETRY_ENABLED = _get_boolean_flag("TELEMETRY_ENABLED", default=True)
32
32
  SUPER_SLICING_ENABLED = _get_boolean_flag(
33
33
  "SUPER_SLICING_ENABLED", default=False
34
34
  )
xpk/utils/validation.py CHANGED
@@ -37,14 +37,6 @@ class SystemDependency(Enum):
37
37
  ' to install xpk prerequisites.'
38
38
  ),
39
39
  )
40
- KJOB = _SystemDependency(
41
- command='kubectl kjob --help',
42
- message=(
43
- '`kjobctl` not installed. Please follow'
44
- ' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
45
- ' to install xpk prerequisites.'
46
- ),
47
- )
48
40
  GCLOUD = _SystemDependency(
49
41
  command='gcloud version',
50
42
  message=(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xpk
3
- Version: 0.17.2
3
+ Version: 1.0.0
4
4
  Summary: xpk helps Cloud developers to orchestrate training jobs on accelerators on GKE.
5
5
  Author-email: XPK team <xpk-code-reviewers@google.com>
6
6
  License: Apache-2.0
@@ -114,10 +114,21 @@ XPK also supports the following [Google Cloud Storage solutions](./docs/usage/st
114
114
  * [Storage](./docs/usage/storage.md)
115
115
  * [Advanced](./docs/usage/advanced.md)
116
116
  * [Inspector](./docs/usage/inspector.md)
117
- * [Run](./docs/usage/run.md)
118
- * [Job](./docs/usage/job.md)
119
117
  * [Troubleshooting](./docs/troubleshooting.md)
120
- * [Local Testing](./docs/local_testing.md)
118
+
119
+ # Privacy notice
120
+
121
+ To help improve XPK, feature usage statistics are collected and sent to Google. You can opt-out at any time by executing
122
+ the following shell command:
123
+
124
+ ```shell
125
+ xpk config set send-telemetry <true/false>
126
+ ```
127
+
128
+ XPK telemetry overall is handled in accordance with the [Google Privacy Policy](https://policies.google.com/privacy). When
129
+ you use XPK to interact with or utilize GCP Services, your information is handled in accordance with the
130
+ [Google Cloud Privacy Notice](https://cloud.google.com/terms/cloud-privacy-notice).
131
+
121
132
 
122
133
  # Contributing
123
134
 
@@ -20,34 +20,28 @@ xpk/blueprints/a4/config-map.yaml.tftpl,sha256=o6LeGIYUfFGyj3vj-8ztV5ildQ46QZVl7
20
20
  xpk/blueprints/a4/nccl-rdma-installer-a4.yaml,sha256=if3WOmNLVGTJIJHU76EWC1FyiIXDTRIXcwo4OsBxarQ,2113
21
21
  xpk/blueprints/a4/storage_crd.yaml,sha256=r4WFXnSJJ25EUF-t4Ljfbl-cJoSaiFiZkP8451eTub4,1260
22
22
  xpk/commands/__init__.py,sha256=YPwWBbgLAu7L-YlTVGB2r8ZV4TzypURMRBcehSHHlLY,561
23
- xpk/commands/batch.py,sha256=Cj1bDpzPMoPdhaKKrOJJLJ3JzRvJrCMn8huQoHHIZJI,4192
24
- xpk/commands/cluster.py,sha256=DtMiIYdYsciXldoWqAfxPIxl9Hc9kbYIj2LsdBER0PI,46172
25
- xpk/commands/cluster_gcluster.py,sha256=x26UqoT8RFX5T9ftQXPEL12HMnMFTi8lret16dnZCms,13970
26
- xpk/commands/cluster_gcluster_test.py,sha256=UcqTTkrQv-R753AtsQvinwgI2vqI6lMHPPEfHPS5e-4,6655
27
- xpk/commands/cluster_test.py,sha256=-7EjuOoGSZhdnLBNBNCMKM6laDYy02aPncbSfUYcrUs,24147
23
+ xpk/commands/cluster.py,sha256=d5L8Kqfk93SZCxbPCZ5oePkqruWYZKi49rJ6yGgZSMg,45479
24
+ xpk/commands/cluster_gcluster.py,sha256=Ig8jLjsiyFgw9U4BBEzDK2diA9m0STKQgz-uUTG_vYE,13731
25
+ xpk/commands/cluster_gcluster_test.py,sha256=zdxz5gAMu3HRVNsj7F-VYRf4TYSPMjuOG7DolQN2Pb4,6263
26
+ xpk/commands/cluster_test.py,sha256=aMkwKrhoEuqElME16ztx5lwv4zT0z_xV0L3in1RaW6M,24017
28
27
  xpk/commands/common.py,sha256=p43sspD5RfYRj3Se_b-X0s0dbBs1PMI1qtySg6zZKKg,2706
29
28
  xpk/commands/config.py,sha256=L_zRpQTxMcSh6rxOT8gG263V6YGqzVoz4UxdWywTFdA,850
30
29
  xpk/commands/info.py,sha256=uhv5mPfgg9N-5JhQw4dT2jujL9ZC5kzGA18h9NFfm5A,7429
31
30
  xpk/commands/inspector.py,sha256=FPasKtGuEZKNXIQin4AG49clfD4b53NxXpWqBPZIIoE,12955
32
- xpk/commands/job.py,sha256=rPIfWvgm5mLz7K7YDLK721ZcUcg5OEmYVAPAtRtB5Ag,6718
33
- xpk/commands/kind.py,sha256=GNqsaoLInifFQ_ZGpbN_3xA8ExyeyOqBMdnoPV-PqYI,7813
34
- xpk/commands/kjob_common.py,sha256=bRaORiGVjPAdN0T3aRmbcQgXYe-EtjoVKePdWzQ5xU4,1928
35
31
  xpk/commands/managed_ml_diagnostics.py,sha256=87wmFbnYQY-kEpJfPo1Up53xM5P_P5wOlXczxHzxJjQ,6984
36
32
  xpk/commands/managed_ml_diagnostics_test.py,sha256=pQ1YUGMGRQFJYTS_1o9YyGUzYdLaBdA84LjbnncaeEo,3828
37
- xpk/commands/run.py,sha256=D0zgmnGeBLATphYhzQj29EScxrMmAKqPRhP6nfWuYcY,4085
38
- xpk/commands/shell.py,sha256=mRHMwm3Izzsue4bocekm82Rg_cPUaGMClSlvNzNXQ-o,4467
39
33
  xpk/commands/storage.py,sha256=cSTJN9Mjvdsvk_Nk43kVdQFhp89nxWbanDsTOGZCkpQ,10708
40
34
  xpk/commands/version.py,sha256=k30rdLP9clUM8eeSwRFhpfzSb1qwcQImTfuC59Ed6CA,771
41
- xpk/commands/workload.py,sha256=l99NRFLs7pXuaLdn5d-Pid-cZulKpB3FNus-HdNDtZw,31513
42
- xpk/commands/workload_test.py,sha256=iXTY7VR1KrlPZZyh1Zm0N946kIP1iV2Fnqx1NtOYDJU,7274
35
+ xpk/commands/workload.py,sha256=HV54qdjxR4fceQjae9Qpgdk9BY2C5Wh8a9aOS_kFY4E,31725
36
+ xpk/commands/workload_test.py,sha256=KLSI4L01fWmPm7xNPXEpjABU5FC9P3CXVto1ifSiqKY,7322
43
37
  xpk/core/__init__.py,sha256=YPwWBbgLAu7L-YlTVGB2r8ZV4TzypURMRBcehSHHlLY,561
44
38
  xpk/core/capacity.py,sha256=_TyWayBkNU8fBpz1LTbCddEFZiZW5Qz-xmJnQMsXh0c,10534
45
39
  xpk/core/capacity_test.py,sha256=jZjMHTYlFLdAmBN1t9k29iABCSE5hlW0--q7QLDQpfQ,4330
46
- xpk/core/cluster.py,sha256=3nl77I_MgQpBZsZSzsiQ_7IyFRzfLrYNRUL1gsSNhKU,24036
40
+ xpk/core/cluster.py,sha256=_h82ZmYufvBOezEUvy-DWOJv9h3SQsy5isL9qbYdq44,24001
47
41
  xpk/core/cluster_private.py,sha256=RLi0C7bV0NEUXl6QKQzvUT0weN9EdqPvjuuOQsNO0DY,6868
48
42
  xpk/core/cluster_test.py,sha256=J4Wk7E--ik_IsWWzL_iWGWbx99Ih03m-0bs-uU7gGDg,5853
49
43
  xpk/core/commands.py,sha256=at73VJHdZ4rVA8uvW997tNrvnCjP9v6zaw96bU0kd74,10841
50
- xpk/core/config.py,sha256=L3iPFvzFCpW8IEAvlbkuEHYBYXmRTC0BAaR7I_5_Peo,5146
44
+ xpk/core/config.py,sha256=U2JDXx-XBuqQpZJf2iUDoww5--E8ejZfgmIxKeGu-gU,4668
51
45
  xpk/core/config_test.py,sha256=POSuofK0LFbNNygDAo2fjtKY4NMrRjUFeGcpBh9JOS4,3569
52
46
  xpk/core/docker_container.py,sha256=8hqWWNKtjf6dqCFRpfndTMGvN_NS6zhfBr7YuKfh7qo,7626
53
47
  xpk/core/docker_image.py,sha256=9vwqbb6Mc3C5ZEOph03WS-EWI5hxMYGGigqzIMkDTjE,6909
@@ -60,22 +54,22 @@ xpk/core/gcluster_manager.py,sha256=lyv_MvdnkByy9_PEBj_ugAEBwnCbFNiWTSrEFjrMlPc,
60
54
  xpk/core/gcsfuse.py,sha256=kg5pgxdTjgiqquuGjev9fXzJPb8oiWPTK6wzCddzheQ,2125
61
55
  xpk/core/jobset.py,sha256=PJ4Fd8TNNLuYKNOMehoMYRIUEXyc5jsbHctJGqfW_8Y,4037
62
56
  xpk/core/kueue_manager.py,sha256=JB8DcD-RFvBdC9Mk_DDCAkI2Km8W5-KMTRMVec06LlM,20010
63
- xpk/core/kueue_manager_test.py,sha256=FfBd1vninU_fcJ9wZev45-vpEsH12a9-XKysk_h4auo,22008
57
+ xpk/core/kueue_manager_test.py,sha256=ZYnIOFN2ZgnrxBhoBkh9JUXP5YbFsoFI11iQnHspafI,22109
64
58
  xpk/core/monitoring.py,sha256=__bzTq_DIDAK8yIaN4F3MJh-yjYw5X1OlxmRgYOpf1g,4332
65
59
  xpk/core/mtc.py,sha256=pO7p3l-EzLFdTE8MdwWV8i0Zu-7epGql_kPoksVofIU,6259
66
60
  xpk/core/nap.py,sha256=7haJtWVfe9csfK-LmmIcDnmXCPIJFnoSviHaZ4y4i6s,12556
67
61
  xpk/core/network.py,sha256=Oulb7U69lWkpOKxOC1C7ekJDpC51TLwd7XdZA3NQ7E0,10505
68
- xpk/core/nodepool.py,sha256=X8sANyzpDoYhV5uWfEYV0RnHfvj_o1G84hArqWaNDu8,24301
69
- xpk/core/nodepool_test.py,sha256=k29tg-lDNEZP6KUNJQE0sHIKFLD6LJ1PSLVtWHzqh5E,13161
62
+ xpk/core/nodepool.py,sha256=ehncCzvtDZ5lhyejRkYQCBjlcKHwKh3PXuGSLY1eWz8,24504
63
+ xpk/core/nodepool_test.py,sha256=qx0VQ_YpgjB8Sf1KJvQCKQuCd3NmWvjBQuKdnT8NWKU,13358
70
64
  xpk/core/pathways.py,sha256=32GxCIPiEBqSpK6g2gMmB7Nxj_HlG4I30u1C9UyWl1A,11594
71
65
  xpk/core/pathways_test.py,sha256=UeuSo_g9BNI27to-wflQwc6dJFVSA5-kOK_cjmY5qgU,1809
72
66
  xpk/core/ray.py,sha256=JWhc_ToRHpF4_URGnuE_47FMgamaRsA4KVUMpqThWzw,6145
73
67
  xpk/core/resources.py,sha256=dDsG_LOtcU17p1UKgOYyjdPxbMfqcb7pJ4SjfLDA6Os,9389
74
- xpk/core/scheduling.py,sha256=RMoei_HUs03rfrEC-HYk7ONzg9BRKwr59-KljCR2TMo,11560
75
- xpk/core/scheduling_test.py,sha256=iYnzXv_MjN743pa4zYAgRqb-6dB9nVPpLI7JP5S8M2I,14463
68
+ xpk/core/scheduling.py,sha256=ucNOidEO_QQekIL44zG6Yhpr8gErPjJa6B2JJHb7diY,12404
69
+ xpk/core/scheduling_test.py,sha256=dzdrUCc61Twu7WC86t1XNDg-7bWLYYz4hqBDWIVHKL4,15883
76
70
  xpk/core/storage.py,sha256=NILvVAcLNMLmp4wKx_TEKbMMF5X1oL-FrQV46PT0_ds,16902
77
- xpk/core/system_characteristics.py,sha256=Tam8wjUz77E6jAJib-r0GsTBmdjo9uaEkXmIdWuzGO8,32844
78
- xpk/core/system_characteristics_test.py,sha256=sREN8u8bC0ze_q9hY3v-ZxC7so-_Ox1mt_DkIbUgHJ4,7477
71
+ xpk/core/system_characteristics.py,sha256=y9v4WRN-u9yvo990MKbWTUOWlzn-f6vfaMNeikQlhHY,34115
72
+ xpk/core/system_characteristics_test.py,sha256=XVaKJ5wYdNwwwUKBnuK3zd1u-Qj3VnJR7MHlOeCa-K0,8029
79
73
  xpk/core/telemetry.py,sha256=R7IONNl5heMoNcOurfT3I34XJrBEODKVY88ONiDGuqE,7512
80
74
  xpk/core/telemetry_test.py,sha256=ll-B1ut9X-por17fpQnNb6hKrfyoZanMWRPbvqWrXss,8261
81
75
  xpk/core/updates.py,sha256=FxLDgEL2O-qnslhT9U60NG5gzXmSv8Fn2wPUf3YZLM8,1734
@@ -99,26 +93,21 @@ xpk/core/testing/__init__.py,sha256=PkV8D9WOtlJHH5AIxsQaKeIBcmupT_Ol_bwJgN6G2I8,
99
93
  xpk/core/testing/commands_tester.py,sha256=mQOSFggESeTdzqG4srAPV9ezmoeT90r22K58yAty9sE,4445
100
94
  xpk/core/testing/commands_tester_test.py,sha256=NnLWh7TJ9rKtb-DtB-vwkxvCe5wNtvUJ0f6sOa87Ht4,4023
101
95
  xpk/core/workload_decorators/__init__.py,sha256=YPwWBbgLAu7L-YlTVGB2r8ZV4TzypURMRBcehSHHlLY,561
102
- xpk/core/workload_decorators/rdma_decorator.py,sha256=isbgPnjdu2AT_Da1nVUIRoGE_qZ7jMDOKCgZOLq5r2A,4006
96
+ xpk/core/workload_decorators/rdma_decorator.py,sha256=02HVA_jSyzlVtSQnQj7aPdK03h7v5YyioBqEen6pbj0,3636
103
97
  xpk/core/workload_decorators/storage_decorator.py,sha256=DDYQVO1OKTLhveDOA4V6b2RWr4n0fbwHdnoFFmW7iaQ,2000
104
- xpk/core/workload_decorators/tcpx_decorator.py,sha256=6yvofTv6_XmRfI-nESZjGYeLmGrza1rWxeJGET0TqXU,6182
105
- xpk/core/workload_decorators/tcpx_decorator_test.py,sha256=iTBS3X_-VwA2oveNDjscduLtll0VOJyFRCp4xmsjg7w,8515
106
- xpk/core/workload_decorators/tcpxo_decorator.py,sha256=_nLX7tbnxhnS-xv4Jijd1JOP76V4LpNCfW3Np404Cqw,6537
98
+ xpk/core/workload_decorators/tcpx_decorator.py,sha256=cLOntH2ekBcPeiPW0sU3TRozSCpcTxgxpzncrMbRj44,5962
99
+ xpk/core/workload_decorators/tcpx_decorator_test.py,sha256=BmTWsFoBeLb9xhQh3kpqSiarkYax4bj2wLeZ9GrQzag,6089
100
+ xpk/core/workload_decorators/tcpxo_decorator.py,sha256=5SgL-7aTHclN7rvCGvEOjZoUixBmyjfuhVIUBFmneug,6124
107
101
  xpk/parser/__init__.py,sha256=YPwWBbgLAu7L-YlTVGB2r8ZV4TzypURMRBcehSHHlLY,561
108
- xpk/parser/batch.py,sha256=mJU-Cp1yTLje59vD-B1IiBcUeD-ZmEsoeB4xhj9cflc,1406
109
102
  xpk/parser/cluster.py,sha256=U2T-Q4yS86PWeFLNfknYWDDzZfubCKqIhqasxKLmErI,31342
110
103
  xpk/parser/cluster_test.py,sha256=xzQEC3IeAMpwsbNbHLuaNKxR3iaZcm3z4m3i61G62d4,6581
111
- xpk/parser/common.py,sha256=sJYGjrn2YgFxelDCYB18s1R8Md8GpDcMQNoAezxDDIs,7257
104
+ xpk/parser/common.py,sha256=DBj0MQHbxcquPWJ3WcwdiaKhGJZgjdppNJrb9iUFQsE,6797
112
105
  xpk/parser/common_test.py,sha256=_6Fm2pUF7h4K0G5qxGabXSYr4ng9ihOzlViE6oLQwQs,1557
113
106
  xpk/parser/config.py,sha256=-XnWx9aFsBW4Uzo_hpOMD2ZQ0bdZLvq1ksv83_5jqSM,1633
114
- xpk/parser/core.py,sha256=VRJerlS92ufoQbG1mZv7B04DAP4qGkBHa4pRXgcbAs0,4761
107
+ xpk/parser/core.py,sha256=rUgPYrqBgqoeuVVwcu3qMABs5KZ3jZI-lfkKywwpGYo,3340
115
108
  xpk/parser/info.py,sha256=UJohxVVWdt9IgUXoPsrVae2DN1BjAVGWrSN2ajrB8RQ,1860
116
109
  xpk/parser/inspector.py,sha256=hAPAZ2k9iSJgC1mjnz3rMleInsAQ8PmkyyUKFyBmsgY,1997
117
- xpk/parser/job.py,sha256=5RdE70rucGfrsn65l7Ho6RmO06mag1S0AO-3saVuXyw,4328
118
- xpk/parser/kind.py,sha256=sgPCqNVrgmFLcOBEbhlaphwVXxMh_opP9ntCq4KPePE,2682
119
- xpk/parser/run.py,sha256=oi_ksSyJ8Ooffe2EgoV_ecpmXEmNGVotjpIQH-HjufE,1481
120
- xpk/parser/shell.py,sha256=VC8p-kz9XjJZW9DXZ-rnv41XnRDRpQRFywHpB5j7tfc,1970
121
- xpk/parser/storage.py,sha256=0V1d1htsjoa-SuxOX_vNxz2Lg4Nue9CBe_H0bNS2Hv0,10270
110
+ xpk/parser/storage.py,sha256=VnMWSGW1xNIMz_cU-dk1CtpGSZEtZ9ecz1KI3bpQPqw,9945
122
111
  xpk/parser/storage_test.py,sha256=i_F9cuQXHRvUy4RJwbfuuI8ZVpTpkkY96sZ1GZ4dLPw,1494
123
112
  xpk/parser/validators.py,sha256=-NBZelvfwZRzjz-YUCreD8EzMLHll8PZM-d-MVm2PG4,1192
124
113
  xpk/parser/version.py,sha256=eJo4PAbbmRQZulgKBs_ytbVgV9zAaaXeNzMMxmgFMVY,769
@@ -142,7 +131,7 @@ xpk/utils/__init__.py,sha256=YPwWBbgLAu7L-YlTVGB2r8ZV4TzypURMRBcehSHHlLY,561
142
131
  xpk/utils/console.py,sha256=AJWSyjuWyLjb7SYt8kPb0gw9N84EN9LbLxYCXjC-6Ds,2464
143
132
  xpk/utils/console_test.py,sha256=x1v7v9VrIZwAKH-eOzj1lAY4EsHxJ6ruhfEOzpssO6o,2944
144
133
  xpk/utils/execution_context.py,sha256=hONGz1hQSKE-puah2rE_uN9YUeEC4oW82VOryw5_Vgo,1181
145
- xpk/utils/feature_flags.py,sha256=lRNjJIKyMUvUkF35MjG0iQYuoxSZarf-W98gWOW8r6M,1161
134
+ xpk/utils/feature_flags.py,sha256=9QSMpdxcGR84unhOGJyCteT9R92-h5K6tOcjn5YHgDw,1160
146
135
  xpk/utils/file.py,sha256=yB1-k3FahoxkBpojB59vQNeZYOXB3pmktnjU4Ceah7M,2605
147
136
  xpk/utils/gcs_utils.py,sha256=zg-XSTv4G4TFjeT2bNBm2WLdDXPrOZi0rNv_JdppNg4,4113
148
137
  xpk/utils/kubectl.py,sha256=WKB9UhpouPN9G4n2ejRi_PgsYLI0R01gzkS1WGU6mJA,1828
@@ -156,13 +145,13 @@ xpk/utils/user_agent.py,sha256=1NMtixC1RIr_MwM5pJ0THQ0x1-fCQA92TFHjWAVZldw,1083
156
145
  xpk/utils/user_agent_test.py,sha256=lkv8LqzhlA1gXFVeBzoLwE1_iGnm8G9LzkkElMrIrx0,1774
157
146
  xpk/utils/user_input.py,sha256=kMdCcPWdkI31f1mJcMsNGda-xKyKxEerpSLpCqIWYPc,1503
158
147
  xpk/utils/user_input_test.py,sha256=xO34jkMoTAk5Cmw7yHTk-7YexzC2UZ6ajihV8lnlAyI,2666
159
- xpk/utils/validation.py,sha256=irL9579RbvwxiGn1t3zhhPo-0oHgdUPOSYsUuFqsDSM,3039
148
+ xpk/utils/validation.py,sha256=rE9LTkXJT7jIesodFb9pONL7ixhLqiQleyoaz7N39Dw,2765
160
149
  xpk/utils/validation_test.py,sha256=PEDSMUqZdt_Lx1FSR-LOTXKKtsJ47JH1fxugM0Gfz6Y,1168
161
150
  xpk/utils/versions.py,sha256=_Ep68W70a9605XjiaOOpBa9Is9jXlsoOiwL8v5Xt-WA,897
162
151
  xpk/utils/yaml.py,sha256=j8xuAJ9yAAwnQi6ozwZ-nMnDyDnc3xWkeBZMtSuP4RU,844
163
- xpk-0.17.2.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
164
- xpk-0.17.2.dist-info/METADATA,sha256=_G5EPL08DVbtGWPXVmHAg_HxH_-op5be3Fx1rWRJiwI,7930
165
- xpk-0.17.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
- xpk-0.17.2.dist-info/entry_points.txt,sha256=mzEtiIesFkT1kmcTUVDA1o3uOhiniX6tIz2wmOlMu1M,38
167
- xpk-0.17.2.dist-info/top_level.txt,sha256=TQKZWgV7LSElvmunYT9V_627qOMoxq3qYzWAFzKudB8,16
168
- xpk-0.17.2.dist-info/RECORD,,
152
+ xpk-1.0.0.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
153
+ xpk-1.0.0.dist-info/METADATA,sha256=9woUe1dyR3fKZPXpoJeLwWTSNKCPZO--_LWee7UPRNc,8364
154
+ xpk-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
155
+ xpk-1.0.0.dist-info/entry_points.txt,sha256=mzEtiIesFkT1kmcTUVDA1o3uOhiniX6tIz2wmOlMu1M,38
156
+ xpk-1.0.0.dist-info/top_level.txt,sha256=TQKZWgV7LSElvmunYT9V_627qOMoxq3qYzWAFzKudB8,16
157
+ xpk-1.0.0.dist-info/RECORD,,
xpk/commands/batch.py DELETED
@@ -1,144 +0,0 @@
1
- """
2
- Copyright 2024 Google LLC
3
-
4
- Licensed under the Apache License, Version 2.0 (the "License");
5
- you may not use this file except in compliance with the License.
6
- You may obtain a copy of the License at
7
-
8
- https://www.apache.org/licenses/LICENSE-2.0
9
-
10
- Unless required by applicable law or agreed to in writing, software
11
- distributed under the License is distributed on an "AS IS" BASIS,
12
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- See the License for the specific language governing permissions and
14
- limitations under the License.
15
- """
16
-
17
- import re
18
- from argparse import Namespace
19
-
20
- from ..core.cluster import (
21
- setup_k8s_service_accounts,
22
- get_cluster_credentials,
23
- )
24
- from ..core.commands import run_command_for_value
25
- from ..core.gcloud_context import add_zone_and_project
26
- from ..core.kjob import (
27
- AppProfileDefaults,
28
- JobTemplateDefaults,
29
- get_storage_annotations,
30
- prepare_kjob,
31
- )
32
- from ..core.kueue_manager import LOCAL_QUEUE_NAME
33
- from ..utils.console import xpk_exit, xpk_print
34
- from ..utils.execution_context import is_dry_run
35
- from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
36
- from .kind import set_local_cluster_command
37
- from .kjob_common import add_gpu_networking_annotations_to_command, add_TAS_annotations_to_command
38
-
39
-
40
- def batch(args: Namespace) -> None:
41
- """Run batch task.
42
- This function runs passed script in non-blocking manner.
43
- Args:
44
- args: user provided arguments for running the command.
45
- Returns:
46
- None
47
- """
48
- if should_validate_dependencies(args):
49
- validate_dependencies_list([
50
- SystemDependency.KUBECTL,
51
- SystemDependency.KJOB,
52
- SystemDependency.GCLOUD,
53
- ])
54
- if not args.kind_cluster:
55
- add_zone_and_project(args)
56
- get_cluster_credentials(args)
57
- else:
58
- set_cluster_command_code = set_local_cluster_command(args)
59
- if set_cluster_command_code != 0:
60
- xpk_exit(set_cluster_command_code)
61
-
62
- if not is_dry_run():
63
- err_code = prepare_kjob(args)
64
- if err_code > 0:
65
- xpk_exit(err_code)
66
- setup_k8s_service_accounts()
67
-
68
- submit_job(args)
69
-
70
-
71
- def submit_job(args: Namespace) -> None:
72
- cmd = (
73
- 'kubectl kjob create slurm'
74
- f' --profile {AppProfileDefaults.NAME.value}'
75
- f' --localqueue {LOCAL_QUEUE_NAME}'
76
- f' --worker-container {JobTemplateDefaults.CONTAINER_NAME.value}'
77
- ' --first-node-ip'
78
- )
79
- cmd = add_gpu_networking_annotations_to_command(args, cmd)
80
- cmd = add_TAS_annotations_to_command(args, cmd)
81
-
82
- annotations = [] if is_dry_run() else get_storage_annotations(args)
83
- for annotation in annotations:
84
- cmd += f' --pod-template-annotation {annotation}'
85
-
86
- if args.ignore_unknown_flags:
87
- cmd += ' --ignore-unknown-flags'
88
-
89
- cmd += f' -- {args.script} --partition {LOCAL_QUEUE_NAME}'
90
-
91
- if args.array is not None:
92
- cmd += f' --array {args.array}'
93
-
94
- if args.cpus_per_task is not None:
95
- cmd += f' --cpus-per-task {args.cpus_per_task}'
96
-
97
- if args.gpus_per_task is not None:
98
- cmd += f' --gpus-per-task {args.gpus_per_task}'
99
-
100
- if args.mem is not None:
101
- cmd += f' --mem {args.mem}'
102
-
103
- if args.mem_per_task is not None:
104
- cmd += f' --mem-per-task {args.mem_per_task}'
105
-
106
- if args.mem_per_cpu is not None:
107
- cmd += f' --mem-per-cpu {args.mem_per_cpu}'
108
-
109
- if args.mem_per_gpu is not None:
110
- cmd += f' --mem-per-gpu {args.mem_per_gpu}'
111
-
112
- if args.nodes is not None:
113
- cmd += f' --nodes {args.nodes}'
114
-
115
- if args.ntasks is not None:
116
- cmd += f' --ntasks {args.ntasks}'
117
-
118
- if args.output is not None:
119
- cmd += f' --output {args.output}'
120
-
121
- if args.error is not None:
122
- cmd += f' --error {args.error}'
123
-
124
- if args.input is not None:
125
- cmd += f' --input {args.input}'
126
-
127
- if args.job_name is not None:
128
- cmd += f' --job-name {args.job_name}'
129
-
130
- if args.chdir is not None:
131
- cmd += f' --chdir {args.chdir}'
132
-
133
- if args.time is not None:
134
- cmd += f' --time {args.time}'
135
-
136
- return_code, return_value = run_command_for_value(cmd, 'submit job')
137
-
138
- if return_code != 0:
139
- xpk_print(f'Running batch job returned ERROR {return_code}')
140
- xpk_exit(return_code)
141
-
142
- m = re.match(r'job\.batch/([-a-z0-9]+)', return_value)
143
- if m:
144
- xpk_print(f'Job name: {m.group(1)}')
xpk/commands/job.py DELETED
@@ -1,244 +0,0 @@
1
- """
2
- Copyright 2024 Google LLC
3
-
4
- Licensed under the Apache License, Version 2.0 (the "License");
5
- you may not use this file except in compliance with the License.
6
- You may obtain a copy of the License at
7
-
8
- https://www.apache.org/licenses/LICENSE-2.0
9
-
10
- Unless required by applicable law or agreed to in writing, software
11
- distributed under the License is distributed on an "AS IS" BASIS,
12
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- See the License for the specific language governing permissions and
14
- limitations under the License.
15
- """
16
-
17
- import re
18
- import sys
19
-
20
- from ruamel.yaml import YAML
21
- from typing import cast
22
-
23
- from ..core.commands import run_command_for_value, run_command_with_updates
24
- from ..core.cluster import get_cluster_credentials
25
- from ..core.gcloud_context import add_zone_and_project
26
- from ..core.kjob import AppProfileDefaults
27
- from ..utils.console import xpk_exit, xpk_print
28
- from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
29
- from .kind import set_local_cluster_command
30
-
31
-
32
- JOBS_DRY_RUN_YAML = """
33
- items:
34
- - apiVersion: slurm.k8s.io/v1alpha1
35
- kind: SlurmJob
36
- metadata:
37
- annotations:
38
- kjobctl.x-k8s.io/script: echo hello
39
- creationTimestamp: '2024-04-29T12:00:00Z'
40
- labels:
41
- kjobctl.x-k8s.io/app-profile: default
42
- name: golden-job
43
- namespace: default
44
- spec:
45
- script: echo hello
46
- """
47
-
48
- PODS_DRY_RUN_RESULT = """
49
- foo-pod 2/2 Running 0 2d
50
- bar-pod 1/1 Evicted 0 1d
51
- """
52
-
53
-
54
- def job_info(args):
55
- """Run commands obtaining information about a job given by name.
56
-
57
- Args:
58
- args: user provided arguments for running the command.
59
-
60
- Returns:
61
- None
62
- """
63
- if should_validate_dependencies(args):
64
- validate_dependencies_list([
65
- SystemDependency.KUBECTL,
66
- SystemDependency.KJOB,
67
- SystemDependency.GCLOUD,
68
- ])
69
- job_name = args.name
70
-
71
- desc_command = f'kubectl-kjob describe slurm {job_name}'
72
- desc_code, desc_text = run_command_for_value(desc_command, 'Getting job data')
73
- if desc_code != 0:
74
- xpk_print(f'Data info request returned ERROR {desc_code}')
75
- xpk_exit(desc_code)
76
-
77
- job_command = (
78
- 'kubectl-kjob list slurm -o yaml --field-selector'
79
- f' metadata.name=={job_name}'
80
- )
81
- job_code, job_text = run_command_for_value(
82
- job_command,
83
- 'Getting job info',
84
- dry_run_return_val=JOBS_DRY_RUN_YAML,
85
- )
86
- if job_code != 0:
87
- xpk_print(f'Job info request returned ERROR {job_code}')
88
- xpk_exit(job_code)
89
-
90
- pods_command = f'kubectl get pods -l=job-name={job_name} --no-headers'
91
- pods_code, pods_text = run_command_for_value(
92
- pods_command,
93
- 'Getting pods list',
94
- dry_run_return_val=PODS_DRY_RUN_RESULT,
95
- )
96
- if pods_code != 0:
97
- xpk_print(f'Pods list request returned ERROR {pods_code}')
98
- xpk_exit(pods_code)
99
-
100
- yaml = YAML(typ='safe')
101
- job_yaml = yaml.load(job_text)['items'][0]
102
-
103
- output = {
104
- 'Job name': job_name,
105
- 'Script name': get_script_name(job_yaml),
106
- 'Profile': get_profile(job_yaml),
107
- 'Labels': job_yaml.get('metadata').get('labels', []),
108
- 'Mounts': get_mounts(job_yaml),
109
- 'Pods': get_pods(pods_text),
110
- 'Entrypoint environment variables template': get_kjob_env_vars(desc_text),
111
- }
112
-
113
- yaml.default_flow_style = False
114
- yaml.sort_base_mapping_type_on_output = False
115
- yaml.dump(output, sys.stdout)
116
-
117
-
118
- def get_profile(job_yaml: dict) -> str:
119
- containers: list[dict] = (
120
- job_yaml.get('spec', {})
121
- .get('template', {})
122
- .get('spec', {})
123
- .get('containers', [])
124
- )
125
- env_vars = next(iter(containers), {}).get('env', [])
126
- profile = next((x['value'] for x in env_vars if x['name'] == 'PROFILE'), '')
127
- return profile
128
-
129
-
130
- def get_mounts(job_yaml: dict) -> list[dict]:
131
- containers: list[dict] = (
132
- job_yaml.get('spec', {})
133
- .get('template', {})
134
- .get('spec', {})
135
- .get('containers', [])
136
- )
137
- mounts: list[dict] = next(iter(containers), {}).get('volumeMounts', [])
138
- return mounts
139
-
140
-
141
- def get_kjob_env_vars(job_desc_text: str) -> list[tuple[str, str]]:
142
- regex = r'(SLURM_[A-Z_]*=.*)'
143
- search_res = re.findall(regex, job_desc_text)
144
- return search_res
145
-
146
-
147
- def get_pods(pods_text: str) -> list[dict[str, str]]:
148
- pods_lines = pods_text.strip().split('\n')
149
- pods_lines_tokenized = [line.split() for line in pods_lines]
150
- return [
151
- {
152
- 'Name': tokens[0],
153
- 'Status': tokens[2],
154
- }
155
- for tokens in pods_lines_tokenized
156
- ]
157
-
158
-
159
- def get_script_name(job_yaml: dict) -> str | None:
160
- return cast(
161
- str | None,
162
- job_yaml.get('metadata', {})
163
- .get('annotations', {})
164
- .get('kjobctl.x-k8s.io/script', ''),
165
- )
166
-
167
-
168
- def job_list(args) -> None:
169
- """Function around job list.
170
-
171
- Args:
172
- args: user provided arguments for running the command.
173
-
174
- Returns:
175
- None
176
- """
177
- if should_validate_dependencies(args):
178
- validate_dependencies_list([
179
- SystemDependency.KUBECTL,
180
- SystemDependency.KJOB,
181
- SystemDependency.GCLOUD,
182
- ])
183
- if not args.kind_cluster:
184
- add_zone_and_project(args)
185
- get_cluster_credentials(args)
186
- msg = f'Listing jobs for project {args.project} and zone {args.zone}:'
187
- else:
188
- set_cluster_command_code = set_local_cluster_command(args)
189
- msg = 'Listing jobs:'
190
- if set_cluster_command_code != 0:
191
- xpk_exit(set_cluster_command_code)
192
-
193
- xpk_print(msg, flush=True)
194
-
195
- return_code = run_slurm_job_list_command()
196
- xpk_exit(return_code)
197
-
198
-
199
- def run_slurm_job_list_command() -> int:
200
- cmd = f'kubectl-kjob list slurm --profile {AppProfileDefaults.NAME.value}'
201
-
202
- return_code = run_command_with_updates(cmd, 'list jobs')
203
- if return_code != 0:
204
- xpk_print(f'Listing jobs returned ERROR {return_code}')
205
- return return_code
206
-
207
-
208
- def job_cancel(args) -> None:
209
- """Function around job cancel.
210
-
211
- Args:
212
- args: user provided arguments for running the command.
213
-
214
- Returns:
215
- None
216
- """
217
- if should_validate_dependencies(args):
218
- validate_dependencies_list([
219
- SystemDependency.KUBECTL,
220
- SystemDependency.KJOB,
221
- SystemDependency.GCLOUD,
222
- ])
223
-
224
- xpk_print(f'Starting job cancel for job: {args.name}', flush=True)
225
- if not args.kind_cluster:
226
- add_zone_and_project(args)
227
- get_cluster_credentials(args)
228
- else:
229
- set_cluster_command_code = set_local_cluster_command(args)
230
- if set_cluster_command_code != 0:
231
- xpk_exit(set_cluster_command_code)
232
-
233
- return_code = run_slurm_job_delete_command(args)
234
- xpk_exit(return_code)
235
-
236
-
237
- def run_slurm_job_delete_command(args) -> int:
238
- list_of_jobs = ' '.join(args.name)
239
- cmd = f'kubectl-kjob delete slurm {list_of_jobs}'
240
-
241
- return_code = run_command_with_updates(cmd, 'delete job')
242
- if return_code != 0:
243
- xpk_print(f'Delete job request returned ERROR {return_code}')
244
- return return_code