xpk 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. xpk/__init__.py +15 -0
  2. xpk/api/__init__.py +15 -0
  3. xpk/api/storage_crd.yaml +52 -0
  4. xpk/commands/__init__.py +15 -0
  5. xpk/commands/batch.py +131 -0
  6. xpk/commands/cluster.py +808 -0
  7. xpk/commands/cluster_gcluster.py +269 -0
  8. xpk/commands/common.py +44 -0
  9. xpk/commands/config.py +29 -0
  10. xpk/commands/info.py +243 -0
  11. xpk/commands/inspector.py +357 -0
  12. xpk/commands/job.py +199 -0
  13. xpk/commands/kind.py +283 -0
  14. xpk/commands/kjob_common.py +44 -0
  15. xpk/commands/run.py +128 -0
  16. xpk/commands/shell.py +140 -0
  17. xpk/commands/storage.py +267 -0
  18. xpk/commands/version.py +27 -0
  19. xpk/commands/workload.py +889 -0
  20. xpk/core/__init__.py +15 -0
  21. xpk/core/blueprint/__init__.py +15 -0
  22. xpk/core/blueprint/blueprint_definitions.py +62 -0
  23. xpk/core/blueprint/blueprint_generator.py +708 -0
  24. xpk/core/capacity.py +185 -0
  25. xpk/core/cluster.py +564 -0
  26. xpk/core/cluster_private.py +200 -0
  27. xpk/core/commands.py +356 -0
  28. xpk/core/config.py +179 -0
  29. xpk/core/docker_container.py +225 -0
  30. xpk/core/docker_image.py +210 -0
  31. xpk/core/docker_manager.py +308 -0
  32. xpk/core/docker_resources.py +350 -0
  33. xpk/core/filestore.py +251 -0
  34. xpk/core/gcloud_context.py +196 -0
  35. xpk/core/gcluster_manager.py +176 -0
  36. xpk/core/gcsfuse.py +50 -0
  37. xpk/core/kjob.py +444 -0
  38. xpk/core/kueue.py +358 -0
  39. xpk/core/monitoring.py +134 -0
  40. xpk/core/nap.py +361 -0
  41. xpk/core/network.py +377 -0
  42. xpk/core/nodepool.py +581 -0
  43. xpk/core/pathways.py +377 -0
  44. xpk/core/ray.py +222 -0
  45. xpk/core/remote_state/__init__.py +15 -0
  46. xpk/core/remote_state/fuse_remote_state.py +99 -0
  47. xpk/core/remote_state/remote_state_client.py +38 -0
  48. xpk/core/resources.py +238 -0
  49. xpk/core/scheduling.py +253 -0
  50. xpk/core/storage.py +581 -0
  51. xpk/core/system_characteristics.py +1432 -0
  52. xpk/core/vertex.py +105 -0
  53. xpk/core/workload.py +341 -0
  54. xpk/core/workload_decorators/__init__.py +15 -0
  55. xpk/core/workload_decorators/rdma_decorator.py +129 -0
  56. xpk/core/workload_decorators/storage_decorator.py +52 -0
  57. xpk/core/workload_decorators/tcpxo_decorator.py +190 -0
  58. xpk/main.py +75 -0
  59. xpk/parser/__init__.py +15 -0
  60. xpk/parser/batch.py +43 -0
  61. xpk/parser/cluster.py +662 -0
  62. xpk/parser/common.py +259 -0
  63. xpk/parser/config.py +49 -0
  64. xpk/parser/core.py +135 -0
  65. xpk/parser/info.py +64 -0
  66. xpk/parser/inspector.py +65 -0
  67. xpk/parser/job.py +147 -0
  68. xpk/parser/kind.py +95 -0
  69. xpk/parser/run.py +47 -0
  70. xpk/parser/shell.py +59 -0
  71. xpk/parser/storage.py +316 -0
  72. xpk/parser/validators.py +39 -0
  73. xpk/parser/version.py +23 -0
  74. xpk/parser/workload.py +726 -0
  75. xpk/templates/__init__.py +15 -0
  76. xpk/templates/storage.yaml +13 -0
  77. xpk/utils/__init__.py +15 -0
  78. xpk/utils/console.py +55 -0
  79. xpk/utils/file.py +82 -0
  80. xpk/utils/gcs_utils.py +125 -0
  81. xpk/utils/kubectl.py +57 -0
  82. xpk/utils/network.py +168 -0
  83. xpk/utils/objects.py +88 -0
  84. xpk/utils/templates.py +28 -0
  85. xpk/utils/validation.py +80 -0
  86. xpk/utils/yaml.py +30 -0
  87. {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/METADATA +456 -32
  88. xpk-0.7.0.dist-info/RECORD +92 -0
  89. {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/WHEEL +1 -1
  90. xpk-0.7.0.dist-info/entry_points.txt +2 -0
  91. xpk-0.5.0.dist-info/RECORD +0 -7
  92. xpk-0.5.0.dist-info/entry_points.txt +0 -2
  93. xpk.py +0 -7282
  94. {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/LICENSE +0 -0
  95. {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/top_level.txt +0 -0
xpk/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """
2
+ Copyright 2023 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
xpk/api/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
@@ -0,0 +1,52 @@
1
+ apiVersion: apiextensions.k8s.io/v1
2
+ kind: CustomResourceDefinition
3
+ metadata:
4
+ name: storages.xpk.x-k8s.io
5
+ spec:
6
+ group: xpk.x-k8s.io
7
+ versions:
8
+ - name: v1
9
+ served: true
10
+ storage: true
11
+ schema:
12
+ openAPIV3Schema:
13
+ type: object
14
+ properties:
15
+ spec:
16
+ type: object
17
+ properties:
18
+ type:
19
+ type: string
20
+ cluster:
21
+ type: string
22
+ auto_mount:
23
+ type: boolean
24
+ mount_point:
25
+ type: string
26
+ readonly:
27
+ type: boolean
28
+ manifest:
29
+ type: string
30
+ pv:
31
+ type: string
32
+ pvc:
33
+ type: string
34
+ required:
35
+ - type
36
+ - cluster
37
+ - auto_mount
38
+ - mount_point
39
+ - readonly
40
+ - manifest
41
+ - pvc
42
+ - pv
43
+ x-kubernetes-validations:
44
+ - message: Value is immutable
45
+ rule: self == oldSelf
46
+ scope: Cluster
47
+ names:
48
+ plural: storages
49
+ singular: storage
50
+ kind: Storage
51
+ shortNames:
52
+ - stg
@@ -0,0 +1,15 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
xpk/commands/batch.py ADDED
@@ -0,0 +1,131 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from argparse import Namespace
18
+
19
+ from ..core.cluster import create_xpk_k8s_service_account
20
+ from ..core.commands import run_command_for_value
21
+ from ..core.gcloud_context import add_zone_and_project
22
+ from ..core.kueue import LOCAL_QUEUE_NAME
23
+ from ..utils.console import xpk_exit, xpk_print
24
+ from .common import set_cluster_command
25
+ from ..core.kjob import AppProfileDefaults, JobTemplateDefaults, prepare_kjob, Kueue_TAS_annotation, get_gcsfuse_annotation
26
+ from .kjob_common import add_gpu_networking_annotations_to_command
27
+ from .kind import set_local_cluster_command
28
+ import re
29
+
30
+
31
+ def batch(args: Namespace) -> None:
32
+ """Run batch task.
33
+ This function runs passed script in non-blocking manner.
34
+ Args:
35
+ args: user provided arguments for running the command.
36
+ Returns:
37
+ None
38
+ """
39
+ if not args.kind_cluster:
40
+ add_zone_and_project(args)
41
+ set_cluster_command_code = set_cluster_command(args)
42
+ else:
43
+ set_cluster_command_code = set_local_cluster_command(args)
44
+
45
+ if set_cluster_command_code != 0:
46
+ xpk_exit(set_cluster_command_code)
47
+
48
+ err_code = prepare_kjob(args)
49
+ if err_code > 0:
50
+ xpk_exit(err_code)
51
+ create_xpk_k8s_service_account()
52
+
53
+ submit_job(args)
54
+
55
+
56
+ def submit_job(args: Namespace) -> None:
57
+
58
+ create_xpk_k8s_service_account()
59
+
60
+ cmd = (
61
+ 'kubectl kjob create slurm'
62
+ f' --profile {AppProfileDefaults.NAME.value}'
63
+ f' --localqueue {LOCAL_QUEUE_NAME}'
64
+ f' --pod-template-annotation {Kueue_TAS_annotation}'
65
+ f' --worker-container {JobTemplateDefaults.CONTAINER_NAME.value}'
66
+ ' --first-node-ip'
67
+ )
68
+ cmd = add_gpu_networking_annotations_to_command(args, cmd)
69
+ gcsfuse_annotation = get_gcsfuse_annotation(args)
70
+ if gcsfuse_annotation is not None:
71
+ cmd += f' --pod-template-annotation {gcsfuse_annotation}'
72
+
73
+ if args.ignore_unknown_flags:
74
+ cmd += ' --ignore-unknown-flags'
75
+
76
+ cmd += f' -- {args.script} --partition {LOCAL_QUEUE_NAME}'
77
+
78
+ if args.array is not None:
79
+ cmd += f' --array {args.array}'
80
+
81
+ if args.cpus_per_task is not None:
82
+ cmd += f' --cpus-per-task {args.cpus_per_task}'
83
+
84
+ if args.gpus_per_task is not None:
85
+ cmd += f' --gpus-per-task {args.gpus_per_task}'
86
+
87
+ if args.mem is not None:
88
+ cmd += f' --mem {args.mem}'
89
+
90
+ if args.mem_per_task is not None:
91
+ cmd += f' --mem-per-task {args.mem_per_task}'
92
+
93
+ if args.mem_per_cpu is not None:
94
+ cmd += f' --mem-per-cpu {args.mem_per_cpu}'
95
+
96
+ if args.mem_per_gpu is not None:
97
+ cmd += f' --mem-per-gpu {args.mem_per_gpu}'
98
+
99
+ if args.nodes is not None:
100
+ cmd += f' --nodes {args.nodes}'
101
+
102
+ if args.ntasks is not None:
103
+ cmd += f' --ntasks {args.ntasks}'
104
+
105
+ if args.output is not None:
106
+ cmd += f' --output {args.output}'
107
+
108
+ if args.error is not None:
109
+ cmd += f' --error {args.error}'
110
+
111
+ if args.input is not None:
112
+ cmd += f' --input {args.input}'
113
+
114
+ if args.job_name is not None:
115
+ cmd += f' --job-name {args.job_name}'
116
+
117
+ if args.chdir is not None:
118
+ cmd += f' --chdir {args.chdir}'
119
+
120
+ if args.time is not None:
121
+ cmd += f' --time {args.time}'
122
+
123
+ return_code, return_value = run_command_for_value(cmd, 'submit job', args)
124
+
125
+ if return_code != 0:
126
+ xpk_print(f'Running batch job returned ERROR {return_code}')
127
+ xpk_exit(return_code)
128
+
129
+ m = re.match(r'job\.batch/([-a-z0-9]+)', return_value)
130
+ if m:
131
+ xpk_print(f'Job name: {m.group(1)}')