wandb 0.18.2__py3-none-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package_readme.md +89 -0
- wandb/__init__.py +245 -0
- wandb/__init__.pyi +1139 -0
- wandb/__main__.py +3 -0
- wandb/_globals.py +19 -0
- wandb/agents/__init__.py +0 -0
- wandb/agents/pyagent.py +363 -0
- wandb/analytics/__init__.py +3 -0
- wandb/analytics/sentry.py +266 -0
- wandb/apis/__init__.py +48 -0
- wandb/apis/attrs.py +40 -0
- wandb/apis/importers/__init__.py +1 -0
- wandb/apis/importers/internals/internal.py +385 -0
- wandb/apis/importers/internals/protocols.py +99 -0
- wandb/apis/importers/internals/util.py +78 -0
- wandb/apis/importers/mlflow.py +254 -0
- wandb/apis/importers/validation.py +108 -0
- wandb/apis/importers/wandb.py +1603 -0
- wandb/apis/internal.py +232 -0
- wandb/apis/normalize.py +89 -0
- wandb/apis/paginator.py +81 -0
- wandb/apis/public/__init__.py +34 -0
- wandb/apis/public/api.py +1305 -0
- wandb/apis/public/artifacts.py +1090 -0
- wandb/apis/public/const.py +4 -0
- wandb/apis/public/files.py +195 -0
- wandb/apis/public/history.py +149 -0
- wandb/apis/public/jobs.py +659 -0
- wandb/apis/public/projects.py +154 -0
- wandb/apis/public/query_generator.py +166 -0
- wandb/apis/public/reports.py +469 -0
- wandb/apis/public/runs.py +914 -0
- wandb/apis/public/sweeps.py +240 -0
- wandb/apis/public/teams.py +198 -0
- wandb/apis/public/users.py +136 -0
- wandb/apis/reports/__init__.py +1 -0
- wandb/apis/reports/v1/__init__.py +8 -0
- wandb/apis/reports/v2/__init__.py +8 -0
- wandb/apis/workspaces/__init__.py +8 -0
- wandb/beta/workflows.py +288 -0
- wandb/bin/nvidia_gpu_stats +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/__init__.py +0 -0
- wandb/cli/cli.py +3004 -0
- wandb/data_types.py +63 -0
- wandb/docker/__init__.py +342 -0
- wandb/docker/auth.py +436 -0
- wandb/docker/wandb-entrypoint.sh +33 -0
- wandb/docker/www_authenticate.py +94 -0
- wandb/env.py +514 -0
- wandb/errors/__init__.py +17 -0
- wandb/errors/errors.py +37 -0
- wandb/errors/term.py +103 -0
- wandb/errors/util.py +57 -0
- wandb/errors/warnings.py +2 -0
- wandb/filesync/__init__.py +0 -0
- wandb/filesync/dir_watcher.py +403 -0
- wandb/filesync/stats.py +100 -0
- wandb/filesync/step_checksum.py +142 -0
- wandb/filesync/step_prepare.py +179 -0
- wandb/filesync/step_upload.py +290 -0
- wandb/filesync/upload_job.py +142 -0
- wandb/integration/__init__.py +0 -0
- wandb/integration/catboost/__init__.py +5 -0
- wandb/integration/catboost/catboost.py +178 -0
- wandb/integration/cohere/__init__.py +3 -0
- wandb/integration/cohere/cohere.py +21 -0
- wandb/integration/cohere/resolver.py +347 -0
- wandb/integration/diffusers/__init__.py +3 -0
- wandb/integration/diffusers/autologger.py +76 -0
- wandb/integration/diffusers/pipeline_resolver.py +50 -0
- wandb/integration/diffusers/resolvers/__init__.py +9 -0
- wandb/integration/diffusers/resolvers/multimodal.py +882 -0
- wandb/integration/diffusers/resolvers/utils.py +102 -0
- wandb/integration/fastai/__init__.py +249 -0
- wandb/integration/gym/__init__.py +105 -0
- wandb/integration/huggingface/__init__.py +3 -0
- wandb/integration/huggingface/huggingface.py +18 -0
- wandb/integration/huggingface/resolver.py +213 -0
- wandb/integration/keras/__init__.py +11 -0
- wandb/integration/keras/callbacks/__init__.py +5 -0
- wandb/integration/keras/callbacks/metrics_logger.py +136 -0
- wandb/integration/keras/callbacks/model_checkpoint.py +195 -0
- wandb/integration/keras/callbacks/tables_builder.py +226 -0
- wandb/integration/keras/keras.py +1091 -0
- wandb/integration/kfp/__init__.py +6 -0
- wandb/integration/kfp/helpers.py +28 -0
- wandb/integration/kfp/kfp_patch.py +324 -0
- wandb/integration/kfp/wandb_logging.py +182 -0
- wandb/integration/langchain/__init__.py +3 -0
- wandb/integration/langchain/wandb_tracer.py +48 -0
- wandb/integration/lightgbm/__init__.py +239 -0
- wandb/integration/lightning/__init__.py +0 -0
- wandb/integration/lightning/fabric/__init__.py +3 -0
- wandb/integration/lightning/fabric/logger.py +762 -0
- wandb/integration/magic.py +556 -0
- wandb/integration/metaflow/__init__.py +3 -0
- wandb/integration/metaflow/metaflow.py +383 -0
- wandb/integration/openai/__init__.py +3 -0
- wandb/integration/openai/fine_tuning.py +480 -0
- wandb/integration/openai/openai.py +22 -0
- wandb/integration/openai/resolver.py +240 -0
- wandb/integration/prodigy/__init__.py +3 -0
- wandb/integration/prodigy/prodigy.py +299 -0
- wandb/integration/sacred/__init__.py +117 -0
- wandb/integration/sagemaker/__init__.py +12 -0
- wandb/integration/sagemaker/auth.py +28 -0
- wandb/integration/sagemaker/config.py +49 -0
- wandb/integration/sagemaker/files.py +3 -0
- wandb/integration/sagemaker/resources.py +34 -0
- wandb/integration/sb3/__init__.py +3 -0
- wandb/integration/sb3/sb3.py +153 -0
- wandb/integration/sklearn/__init__.py +37 -0
- wandb/integration/sklearn/calculate/__init__.py +32 -0
- wandb/integration/sklearn/calculate/calibration_curves.py +125 -0
- wandb/integration/sklearn/calculate/class_proportions.py +68 -0
- wandb/integration/sklearn/calculate/confusion_matrix.py +93 -0
- wandb/integration/sklearn/calculate/decision_boundaries.py +40 -0
- wandb/integration/sklearn/calculate/elbow_curve.py +55 -0
- wandb/integration/sklearn/calculate/feature_importances.py +67 -0
- wandb/integration/sklearn/calculate/learning_curve.py +64 -0
- wandb/integration/sklearn/calculate/outlier_candidates.py +69 -0
- wandb/integration/sklearn/calculate/residuals.py +86 -0
- wandb/integration/sklearn/calculate/silhouette.py +118 -0
- wandb/integration/sklearn/calculate/summary_metrics.py +62 -0
- wandb/integration/sklearn/plot/__init__.py +35 -0
- wandb/integration/sklearn/plot/classifier.py +329 -0
- wandb/integration/sklearn/plot/clusterer.py +146 -0
- wandb/integration/sklearn/plot/regressor.py +121 -0
- wandb/integration/sklearn/plot/shared.py +91 -0
- wandb/integration/sklearn/utils.py +183 -0
- wandb/integration/tensorboard/__init__.py +10 -0
- wandb/integration/tensorboard/log.py +355 -0
- wandb/integration/tensorboard/monkeypatch.py +185 -0
- wandb/integration/tensorflow/__init__.py +5 -0
- wandb/integration/tensorflow/estimator_hook.py +54 -0
- wandb/integration/torch/__init__.py +0 -0
- wandb/integration/torch/wandb_torch.py +554 -0
- wandb/integration/ultralytics/__init__.py +11 -0
- wandb/integration/ultralytics/bbox_utils.py +208 -0
- wandb/integration/ultralytics/callback.py +524 -0
- wandb/integration/ultralytics/classification_utils.py +83 -0
- wandb/integration/ultralytics/mask_utils.py +202 -0
- wandb/integration/ultralytics/pose_utils.py +103 -0
- wandb/integration/xgboost/__init__.py +11 -0
- wandb/integration/xgboost/xgboost.py +189 -0
- wandb/integration/yolov8/__init__.py +0 -0
- wandb/integration/yolov8/yolov8.py +284 -0
- wandb/jupyter.py +515 -0
- wandb/magic.py +3 -0
- wandb/mpmain/__init__.py +0 -0
- wandb/mpmain/__main__.py +1 -0
- wandb/old/__init__.py +0 -0
- wandb/old/core.py +53 -0
- wandb/old/settings.py +173 -0
- wandb/old/summary.py +440 -0
- wandb/plot/__init__.py +19 -0
- wandb/plot/bar.py +45 -0
- wandb/plot/confusion_matrix.py +100 -0
- wandb/plot/histogram.py +39 -0
- wandb/plot/line.py +43 -0
- wandb/plot/line_series.py +88 -0
- wandb/plot/pr_curve.py +136 -0
- wandb/plot/roc_curve.py +118 -0
- wandb/plot/scatter.py +32 -0
- wandb/plot/utils.py +183 -0
- wandb/plot/viz.py +123 -0
- wandb/proto/__init__.py +0 -0
- wandb/proto/v3/__init__.py +0 -0
- wandb/proto/v3/wandb_base_pb2.py +55 -0
- wandb/proto/v3/wandb_internal_pb2.py +1608 -0
- wandb/proto/v3/wandb_server_pb2.py +208 -0
- wandb/proto/v3/wandb_settings_pb2.py +112 -0
- wandb/proto/v3/wandb_telemetry_pb2.py +106 -0
- wandb/proto/v4/__init__.py +0 -0
- wandb/proto/v4/wandb_base_pb2.py +30 -0
- wandb/proto/v4/wandb_internal_pb2.py +360 -0
- wandb/proto/v4/wandb_server_pb2.py +63 -0
- wandb/proto/v4/wandb_settings_pb2.py +45 -0
- wandb/proto/v4/wandb_telemetry_pb2.py +41 -0
- wandb/proto/v5/wandb_base_pb2.py +31 -0
- wandb/proto/v5/wandb_internal_pb2.py +361 -0
- wandb/proto/v5/wandb_server_pb2.py +64 -0
- wandb/proto/v5/wandb_settings_pb2.py +46 -0
- wandb/proto/v5/wandb_telemetry_pb2.py +42 -0
- wandb/proto/wandb_base_pb2.py +10 -0
- wandb/proto/wandb_deprecated.py +53 -0
- wandb/proto/wandb_generate_deprecated.py +34 -0
- wandb/proto/wandb_generate_proto.py +49 -0
- wandb/proto/wandb_internal_pb2.py +16 -0
- wandb/proto/wandb_server_pb2.py +10 -0
- wandb/proto/wandb_settings_pb2.py +10 -0
- wandb/proto/wandb_telemetry_pb2.py +10 -0
- wandb/py.typed +0 -0
- wandb/sdk/__init__.py +37 -0
- wandb/sdk/artifacts/__init__.py +0 -0
- wandb/sdk/artifacts/_validators.py +90 -0
- wandb/sdk/artifacts/artifact.py +2389 -0
- wandb/sdk/artifacts/artifact_download_logger.py +43 -0
- wandb/sdk/artifacts/artifact_file_cache.py +253 -0
- wandb/sdk/artifacts/artifact_instance_cache.py +17 -0
- wandb/sdk/artifacts/artifact_manifest.py +74 -0
- wandb/sdk/artifacts/artifact_manifest_entry.py +249 -0
- wandb/sdk/artifacts/artifact_manifests/__init__.py +0 -0
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +92 -0
- wandb/sdk/artifacts/artifact_saver.py +269 -0
- wandb/sdk/artifacts/artifact_state.py +11 -0
- wandb/sdk/artifacts/artifact_ttl.py +7 -0
- wandb/sdk/artifacts/exceptions.py +57 -0
- wandb/sdk/artifacts/staging.py +25 -0
- wandb/sdk/artifacts/storage_handler.py +62 -0
- wandb/sdk/artifacts/storage_handlers/__init__.py +0 -0
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +208 -0
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +228 -0
- wandb/sdk/artifacts/storage_handlers/http_handler.py +114 -0
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +141 -0
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +56 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +300 -0
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +72 -0
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +135 -0
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +74 -0
- wandb/sdk/artifacts/storage_layout.py +6 -0
- wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
- wandb/sdk/artifacts/storage_policies/register.py +1 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +378 -0
- wandb/sdk/artifacts/storage_policy.py +72 -0
- wandb/sdk/backend/__init__.py +0 -0
- wandb/sdk/backend/backend.py +222 -0
- wandb/sdk/data_types/__init__.py +0 -0
- wandb/sdk/data_types/_dtypes.py +914 -0
- wandb/sdk/data_types/_private.py +10 -0
- wandb/sdk/data_types/audio.py +165 -0
- wandb/sdk/data_types/base_types/__init__.py +0 -0
- wandb/sdk/data_types/base_types/json_metadata.py +55 -0
- wandb/sdk/data_types/base_types/media.py +315 -0
- wandb/sdk/data_types/base_types/wb_value.py +272 -0
- wandb/sdk/data_types/bokeh.py +70 -0
- wandb/sdk/data_types/graph.py +405 -0
- wandb/sdk/data_types/helper_types/__init__.py +0 -0
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +295 -0
- wandb/sdk/data_types/helper_types/classes.py +159 -0
- wandb/sdk/data_types/helper_types/image_mask.py +235 -0
- wandb/sdk/data_types/histogram.py +96 -0
- wandb/sdk/data_types/html.py +115 -0
- wandb/sdk/data_types/image.py +845 -0
- wandb/sdk/data_types/molecule.py +241 -0
- wandb/sdk/data_types/object_3d.py +474 -0
- wandb/sdk/data_types/plotly.py +82 -0
- wandb/sdk/data_types/saved_model.py +446 -0
- wandb/sdk/data_types/table.py +1204 -0
- wandb/sdk/data_types/trace_tree.py +438 -0
- wandb/sdk/data_types/utils.py +229 -0
- wandb/sdk/data_types/video.py +247 -0
- wandb/sdk/integration_utils/__init__.py +0 -0
- wandb/sdk/integration_utils/auto_logging.py +239 -0
- wandb/sdk/integration_utils/data_logging.py +475 -0
- wandb/sdk/interface/__init__.py +0 -0
- wandb/sdk/interface/constants.py +4 -0
- wandb/sdk/interface/interface.py +972 -0
- wandb/sdk/interface/interface_queue.py +59 -0
- wandb/sdk/interface/interface_relay.py +53 -0
- wandb/sdk/interface/interface_shared.py +537 -0
- wandb/sdk/interface/interface_sock.py +61 -0
- wandb/sdk/interface/message_future.py +27 -0
- wandb/sdk/interface/message_future_poll.py +50 -0
- wandb/sdk/interface/router.py +118 -0
- wandb/sdk/interface/router_queue.py +44 -0
- wandb/sdk/interface/router_relay.py +39 -0
- wandb/sdk/interface/router_sock.py +36 -0
- wandb/sdk/interface/summary_record.py +67 -0
- wandb/sdk/internal/__init__.py +0 -0
- wandb/sdk/internal/context.py +89 -0
- wandb/sdk/internal/datastore.py +297 -0
- wandb/sdk/internal/file_pusher.py +181 -0
- wandb/sdk/internal/file_stream.py +695 -0
- wandb/sdk/internal/flow_control.py +263 -0
- wandb/sdk/internal/handler.py +901 -0
- wandb/sdk/internal/internal.py +417 -0
- wandb/sdk/internal/internal_api.py +4358 -0
- wandb/sdk/internal/internal_util.py +100 -0
- wandb/sdk/internal/job_builder.py +629 -0
- wandb/sdk/internal/profiler.py +78 -0
- wandb/sdk/internal/progress.py +83 -0
- wandb/sdk/internal/run.py +25 -0
- wandb/sdk/internal/sample.py +70 -0
- wandb/sdk/internal/sender.py +1686 -0
- wandb/sdk/internal/sender_config.py +197 -0
- wandb/sdk/internal/settings_static.py +90 -0
- wandb/sdk/internal/system/__init__.py +0 -0
- wandb/sdk/internal/system/assets/__init__.py +27 -0
- wandb/sdk/internal/system/assets/aggregators.py +37 -0
- wandb/sdk/internal/system/assets/asset_registry.py +20 -0
- wandb/sdk/internal/system/assets/cpu.py +163 -0
- wandb/sdk/internal/system/assets/disk.py +210 -0
- wandb/sdk/internal/system/assets/gpu.py +416 -0
- wandb/sdk/internal/system/assets/gpu_amd.py +239 -0
- wandb/sdk/internal/system/assets/gpu_apple.py +177 -0
- wandb/sdk/internal/system/assets/interfaces.py +207 -0
- wandb/sdk/internal/system/assets/ipu.py +177 -0
- wandb/sdk/internal/system/assets/memory.py +166 -0
- wandb/sdk/internal/system/assets/network.py +125 -0
- wandb/sdk/internal/system/assets/open_metrics.py +299 -0
- wandb/sdk/internal/system/assets/tpu.py +154 -0
- wandb/sdk/internal/system/assets/trainium.py +399 -0
- wandb/sdk/internal/system/env_probe_helpers.py +13 -0
- wandb/sdk/internal/system/system_info.py +249 -0
- wandb/sdk/internal/system/system_monitor.py +229 -0
- wandb/sdk/internal/tb_watcher.py +518 -0
- wandb/sdk/internal/thread_local_settings.py +18 -0
- wandb/sdk/internal/writer.py +206 -0
- wandb/sdk/launch/__init__.py +14 -0
- wandb/sdk/launch/_launch.py +330 -0
- wandb/sdk/launch/_launch_add.py +255 -0
- wandb/sdk/launch/_project_spec.py +566 -0
- wandb/sdk/launch/agent/__init__.py +5 -0
- wandb/sdk/launch/agent/agent.py +924 -0
- wandb/sdk/launch/agent/config.py +296 -0
- wandb/sdk/launch/agent/job_status_tracker.py +53 -0
- wandb/sdk/launch/agent/run_queue_item_file_saver.py +45 -0
- wandb/sdk/launch/builder/__init__.py +0 -0
- wandb/sdk/launch/builder/abstract.py +156 -0
- wandb/sdk/launch/builder/build.py +297 -0
- wandb/sdk/launch/builder/context_manager.py +235 -0
- wandb/sdk/launch/builder/docker_builder.py +177 -0
- wandb/sdk/launch/builder/kaniko_builder.py +595 -0
- wandb/sdk/launch/builder/noop.py +58 -0
- wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +188 -0
- wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
- wandb/sdk/launch/create_job.py +528 -0
- wandb/sdk/launch/environment/abstract.py +29 -0
- wandb/sdk/launch/environment/aws_environment.py +322 -0
- wandb/sdk/launch/environment/azure_environment.py +105 -0
- wandb/sdk/launch/environment/gcp_environment.py +335 -0
- wandb/sdk/launch/environment/local_environment.py +66 -0
- wandb/sdk/launch/errors.py +19 -0
- wandb/sdk/launch/git_reference.py +109 -0
- wandb/sdk/launch/inputs/files.py +148 -0
- wandb/sdk/launch/inputs/internal.py +315 -0
- wandb/sdk/launch/inputs/manage.py +113 -0
- wandb/sdk/launch/inputs/schema.py +39 -0
- wandb/sdk/launch/loader.py +249 -0
- wandb/sdk/launch/registry/abstract.py +48 -0
- wandb/sdk/launch/registry/anon.py +29 -0
- wandb/sdk/launch/registry/azure_container_registry.py +124 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +192 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +219 -0
- wandb/sdk/launch/registry/local_registry.py +67 -0
- wandb/sdk/launch/runner/__init__.py +0 -0
- wandb/sdk/launch/runner/abstract.py +195 -0
- wandb/sdk/launch/runner/kubernetes_monitor.py +474 -0
- wandb/sdk/launch/runner/kubernetes_runner.py +963 -0
- wandb/sdk/launch/runner/local_container.py +301 -0
- wandb/sdk/launch/runner/local_process.py +78 -0
- wandb/sdk/launch/runner/sagemaker_runner.py +426 -0
- wandb/sdk/launch/runner/vertex_runner.py +230 -0
- wandb/sdk/launch/sweeps/__init__.py +39 -0
- wandb/sdk/launch/sweeps/scheduler.py +742 -0
- wandb/sdk/launch/sweeps/scheduler_sweep.py +91 -0
- wandb/sdk/launch/sweeps/utils.py +316 -0
- wandb/sdk/launch/utils.py +746 -0
- wandb/sdk/launch/wandb_reference.py +138 -0
- wandb/sdk/lib/__init__.py +5 -0
- wandb/sdk/lib/_settings_toposort_generate.py +159 -0
- wandb/sdk/lib/_settings_toposort_generated.py +250 -0
- wandb/sdk/lib/_wburls_generate.py +25 -0
- wandb/sdk/lib/_wburls_generated.py +22 -0
- wandb/sdk/lib/apikey.py +273 -0
- wandb/sdk/lib/capped_dict.py +26 -0
- wandb/sdk/lib/config_util.py +101 -0
- wandb/sdk/lib/credentials.py +141 -0
- wandb/sdk/lib/deprecate.py +42 -0
- wandb/sdk/lib/disabled.py +29 -0
- wandb/sdk/lib/exit_hooks.py +54 -0
- wandb/sdk/lib/file_stream_utils.py +118 -0
- wandb/sdk/lib/filenames.py +64 -0
- wandb/sdk/lib/filesystem.py +372 -0
- wandb/sdk/lib/fsm.py +174 -0
- wandb/sdk/lib/gitlib.py +239 -0
- wandb/sdk/lib/gql_request.py +65 -0
- wandb/sdk/lib/handler_util.py +21 -0
- wandb/sdk/lib/hashutil.py +84 -0
- wandb/sdk/lib/import_hooks.py +275 -0
- wandb/sdk/lib/ipython.py +146 -0
- wandb/sdk/lib/json_util.py +80 -0
- wandb/sdk/lib/lazyloader.py +63 -0
- wandb/sdk/lib/mailbox.py +460 -0
- wandb/sdk/lib/module.py +69 -0
- wandb/sdk/lib/paths.py +106 -0
- wandb/sdk/lib/preinit.py +42 -0
- wandb/sdk/lib/printer.py +313 -0
- wandb/sdk/lib/proto_util.py +90 -0
- wandb/sdk/lib/redirect.py +845 -0
- wandb/sdk/lib/reporting.py +99 -0
- wandb/sdk/lib/retry.py +289 -0
- wandb/sdk/lib/run_moment.py +78 -0
- wandb/sdk/lib/runid.py +12 -0
- wandb/sdk/lib/server.py +52 -0
- wandb/sdk/lib/service_connection.py +216 -0
- wandb/sdk/lib/service_token.py +94 -0
- wandb/sdk/lib/sock_client.py +295 -0
- wandb/sdk/lib/sparkline.py +45 -0
- wandb/sdk/lib/telemetry.py +100 -0
- wandb/sdk/lib/timed_input.py +133 -0
- wandb/sdk/lib/timer.py +19 -0
- wandb/sdk/lib/tracelog.py +255 -0
- wandb/sdk/lib/wburls.py +46 -0
- wandb/sdk/service/__init__.py +0 -0
- wandb/sdk/service/_startup_debug.py +22 -0
- wandb/sdk/service/port_file.py +53 -0
- wandb/sdk/service/server.py +116 -0
- wandb/sdk/service/server_sock.py +276 -0
- wandb/sdk/service/service.py +242 -0
- wandb/sdk/service/streams.py +417 -0
- wandb/sdk/verify/__init__.py +0 -0
- wandb/sdk/verify/verify.py +501 -0
- wandb/sdk/wandb_alerts.py +12 -0
- wandb/sdk/wandb_config.py +322 -0
- wandb/sdk/wandb_helper.py +54 -0
- wandb/sdk/wandb_init.py +1266 -0
- wandb/sdk/wandb_login.py +349 -0
- wandb/sdk/wandb_metric.py +110 -0
- wandb/sdk/wandb_require.py +97 -0
- wandb/sdk/wandb_require_helpers.py +44 -0
- wandb/sdk/wandb_run.py +4236 -0
- wandb/sdk/wandb_settings.py +2001 -0
- wandb/sdk/wandb_setup.py +409 -0
- wandb/sdk/wandb_summary.py +150 -0
- wandb/sdk/wandb_sweep.py +119 -0
- wandb/sdk/wandb_sync.py +81 -0
- wandb/sdk/wandb_watch.py +144 -0
- wandb/sklearn.py +35 -0
- wandb/sync/__init__.py +3 -0
- wandb/sync/sync.py +443 -0
- wandb/trigger.py +29 -0
- wandb/util.py +1956 -0
- wandb/vendor/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/setup.py +40 -0
- wandb/vendor/gql-0.2.0/tests/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/tests/starwars/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/tests/starwars/fixtures.py +96 -0
- wandb/vendor/gql-0.2.0/tests/starwars/schema.py +146 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_dsl.py +293 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_query.py +355 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_validation.py +171 -0
- wandb/vendor/gql-0.2.0/tests/test_client.py +31 -0
- wandb/vendor/gql-0.2.0/tests/test_transport.py +89 -0
- wandb/vendor/gql-0.2.0/wandb_gql/__init__.py +4 -0
- wandb/vendor/gql-0.2.0/wandb_gql/client.py +75 -0
- wandb/vendor/gql-0.2.0/wandb_gql/dsl.py +152 -0
- wandb/vendor/gql-0.2.0/wandb_gql/gql.py +10 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/http.py +6 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/local_schema.py +15 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/requests.py +46 -0
- wandb/vendor/gql-0.2.0/wandb_gql/utils.py +21 -0
- wandb/vendor/graphql-core-1.1/setup.py +86 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/__init__.py +287 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/__init__.py +6 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/base.py +42 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/format_error.py +11 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/located_error.py +29 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/syntax_error.py +36 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/__init__.py +26 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/base.py +311 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executor.py +398 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/asyncio.py +53 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/gevent.py +22 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/process.py +32 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/sync.py +7 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/thread.py +35 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/utils.py +6 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/executor.py +66 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/fragment.py +252 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/resolver.py +151 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/utils.py +7 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/middleware.py +57 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/values.py +145 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/graphql.py +60 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/ast.py +1349 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/base.py +19 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/lexer.py +435 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/location.py +30 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/parser.py +779 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/printer.py +193 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/source.py +18 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor.py +222 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor_meta.py +82 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/cached_property.py +17 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/contain_subset.py +28 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/default_ordered_dict.py +40 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/ordereddict.py +8 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/pair_set.py +43 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/version.py +78 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/__init__.py +67 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/definition.py +619 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/directives.py +132 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/introspection.py +440 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/scalars.py +131 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/schema.py +100 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/typemap.py +145 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/assert_valid_name.py +9 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_from_value.py +65 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_code.py +49 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_dict.py +24 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/base.py +75 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_ast_schema.py +291 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_client_schema.py +250 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/concat_ast.py +9 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/extend_schema.py +357 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_field_def.py +27 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_operation_ast.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/introspection_query.py +90 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_literal_value.py +67 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_value.py +66 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/quoted_or_list.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/schema_printer.py +168 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/suggestion_list.py +56 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_comparators.py +69 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_from_ast.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_info.py +149 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/value_from_ast.py +69 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/__init__.py +4 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/__init__.py +79 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/arguments_of_correct_type.py +24 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/base.py +8 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/default_values_of_correct_type.py +44 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fields_on_correct_type.py +113 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fragments_on_composite_types.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_argument_names.py +70 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_directives.py +97 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_fragment_names.py +19 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_type_names.py +43 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/lone_anonymous_operation.py +23 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_fragment_cycles.py +59 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_undefined_variables.py +36 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_fragments.py +38 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_variables.py +37 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/overlapping_fields_can_be_merged.py +529 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/possible_fragment_spreads.py +44 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/provided_non_null_arguments.py +46 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/scalar_leafs.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_argument_names.py +32 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_fragment_names.py +28 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_input_field_names.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_operation_names.py +31 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_variable_names.py +27 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_are_input_types.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_in_allowed_position.py +53 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/validation.py +158 -0
- wandb/vendor/promise-2.3.0/conftest.py +30 -0
- wandb/vendor/promise-2.3.0/setup.py +64 -0
- wandb/vendor/promise-2.3.0/tests/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/tests/conftest.py +8 -0
- wandb/vendor/promise-2.3.0/tests/test_awaitable.py +32 -0
- wandb/vendor/promise-2.3.0/tests/test_awaitable_35.py +47 -0
- wandb/vendor/promise-2.3.0/tests/test_benchmark.py +116 -0
- wandb/vendor/promise-2.3.0/tests/test_complex_threads.py +23 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader.py +452 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader_awaitable_35.py +99 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader_extra.py +65 -0
- wandb/vendor/promise-2.3.0/tests/test_extra.py +670 -0
- wandb/vendor/promise-2.3.0/tests/test_issues.py +132 -0
- wandb/vendor/promise-2.3.0/tests/test_promise_list.py +70 -0
- wandb/vendor/promise-2.3.0/tests/test_spec.py +584 -0
- wandb/vendor/promise-2.3.0/tests/test_thread_safety.py +115 -0
- wandb/vendor/promise-2.3.0/tests/utils.py +3 -0
- wandb/vendor/promise-2.3.0/wandb_promise/__init__.py +38 -0
- wandb/vendor/promise-2.3.0/wandb_promise/async_.py +135 -0
- wandb/vendor/promise-2.3.0/wandb_promise/compat.py +32 -0
- wandb/vendor/promise-2.3.0/wandb_promise/dataloader.py +326 -0
- wandb/vendor/promise-2.3.0/wandb_promise/iterate_promise.py +12 -0
- wandb/vendor/promise-2.3.0/wandb_promise/promise.py +848 -0
- wandb/vendor/promise-2.3.0/wandb_promise/promise_list.py +151 -0
- wandb/vendor/promise-2.3.0/wandb_promise/pyutils/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/wandb_promise/pyutils/version.py +83 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/asyncio.py +22 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/gevent.py +21 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/immediate.py +27 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/thread.py +18 -0
- wandb/vendor/promise-2.3.0/wandb_promise/utils.py +56 -0
- wandb/vendor/pygments/__init__.py +90 -0
- wandb/vendor/pygments/cmdline.py +568 -0
- wandb/vendor/pygments/console.py +74 -0
- wandb/vendor/pygments/filter.py +74 -0
- wandb/vendor/pygments/filters/__init__.py +350 -0
- wandb/vendor/pygments/formatter.py +95 -0
- wandb/vendor/pygments/formatters/__init__.py +153 -0
- wandb/vendor/pygments/formatters/_mapping.py +85 -0
- wandb/vendor/pygments/formatters/bbcode.py +109 -0
- wandb/vendor/pygments/formatters/html.py +851 -0
- wandb/vendor/pygments/formatters/img.py +600 -0
- wandb/vendor/pygments/formatters/irc.py +182 -0
- wandb/vendor/pygments/formatters/latex.py +482 -0
- wandb/vendor/pygments/formatters/other.py +160 -0
- wandb/vendor/pygments/formatters/rtf.py +147 -0
- wandb/vendor/pygments/formatters/svg.py +153 -0
- wandb/vendor/pygments/formatters/terminal.py +136 -0
- wandb/vendor/pygments/formatters/terminal256.py +309 -0
- wandb/vendor/pygments/lexer.py +871 -0
- wandb/vendor/pygments/lexers/__init__.py +329 -0
- wandb/vendor/pygments/lexers/_asy_builtins.py +1645 -0
- wandb/vendor/pygments/lexers/_cl_builtins.py +232 -0
- wandb/vendor/pygments/lexers/_cocoa_builtins.py +72 -0
- wandb/vendor/pygments/lexers/_csound_builtins.py +1346 -0
- wandb/vendor/pygments/lexers/_lasso_builtins.py +5327 -0
- wandb/vendor/pygments/lexers/_lua_builtins.py +295 -0
- wandb/vendor/pygments/lexers/_mapping.py +500 -0
- wandb/vendor/pygments/lexers/_mql_builtins.py +1172 -0
- wandb/vendor/pygments/lexers/_openedge_builtins.py +2547 -0
- wandb/vendor/pygments/lexers/_php_builtins.py +4756 -0
- wandb/vendor/pygments/lexers/_postgres_builtins.py +621 -0
- wandb/vendor/pygments/lexers/_scilab_builtins.py +3094 -0
- wandb/vendor/pygments/lexers/_sourcemod_builtins.py +1163 -0
- wandb/vendor/pygments/lexers/_stan_builtins.py +532 -0
- wandb/vendor/pygments/lexers/_stata_builtins.py +419 -0
- wandb/vendor/pygments/lexers/_tsql_builtins.py +1004 -0
- wandb/vendor/pygments/lexers/_vim_builtins.py +1939 -0
- wandb/vendor/pygments/lexers/actionscript.py +240 -0
- wandb/vendor/pygments/lexers/agile.py +24 -0
- wandb/vendor/pygments/lexers/algebra.py +221 -0
- wandb/vendor/pygments/lexers/ambient.py +76 -0
- wandb/vendor/pygments/lexers/ampl.py +87 -0
- wandb/vendor/pygments/lexers/apl.py +101 -0
- wandb/vendor/pygments/lexers/archetype.py +318 -0
- wandb/vendor/pygments/lexers/asm.py +641 -0
- wandb/vendor/pygments/lexers/automation.py +374 -0
- wandb/vendor/pygments/lexers/basic.py +500 -0
- wandb/vendor/pygments/lexers/bibtex.py +160 -0
- wandb/vendor/pygments/lexers/business.py +612 -0
- wandb/vendor/pygments/lexers/c_cpp.py +252 -0
- wandb/vendor/pygments/lexers/c_like.py +541 -0
- wandb/vendor/pygments/lexers/capnproto.py +78 -0
- wandb/vendor/pygments/lexers/chapel.py +102 -0
- wandb/vendor/pygments/lexers/clean.py +288 -0
- wandb/vendor/pygments/lexers/compiled.py +34 -0
- wandb/vendor/pygments/lexers/configs.py +833 -0
- wandb/vendor/pygments/lexers/console.py +114 -0
- wandb/vendor/pygments/lexers/crystal.py +393 -0
- wandb/vendor/pygments/lexers/csound.py +366 -0
- wandb/vendor/pygments/lexers/css.py +689 -0
- wandb/vendor/pygments/lexers/d.py +251 -0
- wandb/vendor/pygments/lexers/dalvik.py +125 -0
- wandb/vendor/pygments/lexers/data.py +555 -0
- wandb/vendor/pygments/lexers/diff.py +165 -0
- wandb/vendor/pygments/lexers/dotnet.py +691 -0
- wandb/vendor/pygments/lexers/dsls.py +878 -0
- wandb/vendor/pygments/lexers/dylan.py +289 -0
- wandb/vendor/pygments/lexers/ecl.py +125 -0
- wandb/vendor/pygments/lexers/eiffel.py +65 -0
- wandb/vendor/pygments/lexers/elm.py +121 -0
- wandb/vendor/pygments/lexers/erlang.py +533 -0
- wandb/vendor/pygments/lexers/esoteric.py +277 -0
- wandb/vendor/pygments/lexers/ezhil.py +69 -0
- wandb/vendor/pygments/lexers/factor.py +344 -0
- wandb/vendor/pygments/lexers/fantom.py +250 -0
- wandb/vendor/pygments/lexers/felix.py +273 -0
- wandb/vendor/pygments/lexers/forth.py +177 -0
- wandb/vendor/pygments/lexers/fortran.py +205 -0
- wandb/vendor/pygments/lexers/foxpro.py +428 -0
- wandb/vendor/pygments/lexers/functional.py +21 -0
- wandb/vendor/pygments/lexers/go.py +101 -0
- wandb/vendor/pygments/lexers/grammar_notation.py +213 -0
- wandb/vendor/pygments/lexers/graph.py +80 -0
- wandb/vendor/pygments/lexers/graphics.py +553 -0
- wandb/vendor/pygments/lexers/haskell.py +843 -0
- wandb/vendor/pygments/lexers/haxe.py +936 -0
- wandb/vendor/pygments/lexers/hdl.py +382 -0
- wandb/vendor/pygments/lexers/hexdump.py +103 -0
- wandb/vendor/pygments/lexers/html.py +602 -0
- wandb/vendor/pygments/lexers/idl.py +270 -0
- wandb/vendor/pygments/lexers/igor.py +288 -0
- wandb/vendor/pygments/lexers/inferno.py +96 -0
- wandb/vendor/pygments/lexers/installers.py +322 -0
- wandb/vendor/pygments/lexers/int_fiction.py +1343 -0
- wandb/vendor/pygments/lexers/iolang.py +63 -0
- wandb/vendor/pygments/lexers/j.py +146 -0
- wandb/vendor/pygments/lexers/javascript.py +1525 -0
- wandb/vendor/pygments/lexers/julia.py +333 -0
- wandb/vendor/pygments/lexers/jvm.py +1573 -0
- wandb/vendor/pygments/lexers/lisp.py +2621 -0
- wandb/vendor/pygments/lexers/make.py +202 -0
- wandb/vendor/pygments/lexers/markup.py +595 -0
- wandb/vendor/pygments/lexers/math.py +21 -0
- wandb/vendor/pygments/lexers/matlab.py +663 -0
- wandb/vendor/pygments/lexers/ml.py +769 -0
- wandb/vendor/pygments/lexers/modeling.py +358 -0
- wandb/vendor/pygments/lexers/modula2.py +1561 -0
- wandb/vendor/pygments/lexers/monte.py +204 -0
- wandb/vendor/pygments/lexers/ncl.py +894 -0
- wandb/vendor/pygments/lexers/nimrod.py +159 -0
- wandb/vendor/pygments/lexers/nit.py +64 -0
- wandb/vendor/pygments/lexers/nix.py +136 -0
- wandb/vendor/pygments/lexers/oberon.py +105 -0
- wandb/vendor/pygments/lexers/objective.py +504 -0
- wandb/vendor/pygments/lexers/ooc.py +85 -0
- wandb/vendor/pygments/lexers/other.py +41 -0
- wandb/vendor/pygments/lexers/parasail.py +79 -0
- wandb/vendor/pygments/lexers/parsers.py +835 -0
- wandb/vendor/pygments/lexers/pascal.py +644 -0
- wandb/vendor/pygments/lexers/pawn.py +199 -0
- wandb/vendor/pygments/lexers/perl.py +620 -0
- wandb/vendor/pygments/lexers/php.py +267 -0
- wandb/vendor/pygments/lexers/praat.py +294 -0
- wandb/vendor/pygments/lexers/prolog.py +306 -0
- wandb/vendor/pygments/lexers/python.py +939 -0
- wandb/vendor/pygments/lexers/qvt.py +152 -0
- wandb/vendor/pygments/lexers/r.py +453 -0
- wandb/vendor/pygments/lexers/rdf.py +270 -0
- wandb/vendor/pygments/lexers/rebol.py +431 -0
- wandb/vendor/pygments/lexers/resource.py +85 -0
- wandb/vendor/pygments/lexers/rnc.py +67 -0
- wandb/vendor/pygments/lexers/roboconf.py +82 -0
- wandb/vendor/pygments/lexers/robotframework.py +560 -0
- wandb/vendor/pygments/lexers/ruby.py +519 -0
- wandb/vendor/pygments/lexers/rust.py +220 -0
- wandb/vendor/pygments/lexers/sas.py +228 -0
- wandb/vendor/pygments/lexers/scripting.py +1222 -0
- wandb/vendor/pygments/lexers/shell.py +794 -0
- wandb/vendor/pygments/lexers/smalltalk.py +195 -0
- wandb/vendor/pygments/lexers/smv.py +79 -0
- wandb/vendor/pygments/lexers/snobol.py +83 -0
- wandb/vendor/pygments/lexers/special.py +103 -0
- wandb/vendor/pygments/lexers/sql.py +681 -0
- wandb/vendor/pygments/lexers/stata.py +108 -0
- wandb/vendor/pygments/lexers/supercollider.py +90 -0
- wandb/vendor/pygments/lexers/tcl.py +145 -0
- wandb/vendor/pygments/lexers/templates.py +2283 -0
- wandb/vendor/pygments/lexers/testing.py +207 -0
- wandb/vendor/pygments/lexers/text.py +25 -0
- wandb/vendor/pygments/lexers/textedit.py +169 -0
- wandb/vendor/pygments/lexers/textfmts.py +297 -0
- wandb/vendor/pygments/lexers/theorem.py +458 -0
- wandb/vendor/pygments/lexers/trafficscript.py +54 -0
- wandb/vendor/pygments/lexers/typoscript.py +226 -0
- wandb/vendor/pygments/lexers/urbi.py +133 -0
- wandb/vendor/pygments/lexers/varnish.py +190 -0
- wandb/vendor/pygments/lexers/verification.py +111 -0
- wandb/vendor/pygments/lexers/web.py +24 -0
- wandb/vendor/pygments/lexers/webmisc.py +988 -0
- wandb/vendor/pygments/lexers/whiley.py +116 -0
- wandb/vendor/pygments/lexers/x10.py +69 -0
- wandb/vendor/pygments/modeline.py +44 -0
- wandb/vendor/pygments/plugin.py +68 -0
- wandb/vendor/pygments/regexopt.py +92 -0
- wandb/vendor/pygments/scanner.py +105 -0
- wandb/vendor/pygments/sphinxext.py +158 -0
- wandb/vendor/pygments/style.py +155 -0
- wandb/vendor/pygments/styles/__init__.py +80 -0
- wandb/vendor/pygments/styles/abap.py +29 -0
- wandb/vendor/pygments/styles/algol.py +63 -0
- wandb/vendor/pygments/styles/algol_nu.py +63 -0
- wandb/vendor/pygments/styles/arduino.py +98 -0
- wandb/vendor/pygments/styles/autumn.py +65 -0
- wandb/vendor/pygments/styles/borland.py +51 -0
- wandb/vendor/pygments/styles/bw.py +49 -0
- wandb/vendor/pygments/styles/colorful.py +81 -0
- wandb/vendor/pygments/styles/default.py +73 -0
- wandb/vendor/pygments/styles/emacs.py +72 -0
- wandb/vendor/pygments/styles/friendly.py +72 -0
- wandb/vendor/pygments/styles/fruity.py +42 -0
- wandb/vendor/pygments/styles/igor.py +29 -0
- wandb/vendor/pygments/styles/lovelace.py +97 -0
- wandb/vendor/pygments/styles/manni.py +75 -0
- wandb/vendor/pygments/styles/monokai.py +106 -0
- wandb/vendor/pygments/styles/murphy.py +80 -0
- wandb/vendor/pygments/styles/native.py +65 -0
- wandb/vendor/pygments/styles/paraiso_dark.py +125 -0
- wandb/vendor/pygments/styles/paraiso_light.py +125 -0
- wandb/vendor/pygments/styles/pastie.py +75 -0
- wandb/vendor/pygments/styles/perldoc.py +69 -0
- wandb/vendor/pygments/styles/rainbow_dash.py +89 -0
- wandb/vendor/pygments/styles/rrt.py +33 -0
- wandb/vendor/pygments/styles/sas.py +44 -0
- wandb/vendor/pygments/styles/stata.py +40 -0
- wandb/vendor/pygments/styles/tango.py +141 -0
- wandb/vendor/pygments/styles/trac.py +63 -0
- wandb/vendor/pygments/styles/vim.py +63 -0
- wandb/vendor/pygments/styles/vs.py +38 -0
- wandb/vendor/pygments/styles/xcode.py +51 -0
- wandb/vendor/pygments/token.py +213 -0
- wandb/vendor/pygments/unistring.py +217 -0
- wandb/vendor/pygments/util.py +388 -0
- wandb/vendor/pynvml/__init__.py +0 -0
- wandb/vendor/pynvml/pynvml.py +4779 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/__init__.py +17 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/events.py +615 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/__init__.py +98 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/api.py +369 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents.py +172 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents2.py +239 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify.py +218 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_buffer.py +81 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_c.py +575 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/kqueue.py +730 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/polling.py +145 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/read_directory_changes.py +133 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/winapi.py +348 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/patterns.py +265 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/tricks/__init__.py +174 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/__init__.py +151 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/bricks.py +249 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/compat.py +29 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/decorators.py +198 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/delayed_queue.py +88 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/dirsnapshot.py +293 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/echo.py +157 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/event_backport.py +41 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/importlib2.py +40 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/platform.py +57 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/unicode_paths.py +64 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/win32stat.py +123 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/version.py +28 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/watchmedo.py +577 -0
- wandb/wandb_agent.py +588 -0
- wandb/wandb_controller.py +721 -0
- wandb/wandb_run.py +9 -0
- wandb-0.18.2.dist-info/METADATA +213 -0
- wandb-0.18.2.dist-info/RECORD +827 -0
- wandb-0.18.2.dist-info/WHEEL +5 -0
- wandb-0.18.2.dist-info/entry_points.txt +3 -0
- wandb-0.18.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,963 @@
|
|
1
|
+
"""Implementation of KubernetesRunner class for wandb launch."""
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
import base64
|
5
|
+
import datetime
|
6
|
+
import json
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
10
|
+
|
11
|
+
import yaml
|
12
|
+
|
13
|
+
import wandb
|
14
|
+
from wandb.apis.internal import Api
|
15
|
+
from wandb.sdk.launch.agent.agent import LaunchAgent
|
16
|
+
from wandb.sdk.launch.environment.abstract import AbstractEnvironment
|
17
|
+
from wandb.sdk.launch.registry.abstract import AbstractRegistry
|
18
|
+
from wandb.sdk.launch.registry.azure_container_registry import AzureContainerRegistry
|
19
|
+
from wandb.sdk.launch.registry.local_registry import LocalRegistry
|
20
|
+
from wandb.sdk.launch.runner.abstract import Status
|
21
|
+
from wandb.sdk.launch.runner.kubernetes_monitor import (
|
22
|
+
WANDB_K8S_LABEL_AGENT,
|
23
|
+
WANDB_K8S_LABEL_MONITOR,
|
24
|
+
WANDB_K8S_RUN_ID,
|
25
|
+
CustomResource,
|
26
|
+
LaunchKubernetesMonitor,
|
27
|
+
)
|
28
|
+
from wandb.sdk.lib.retry import ExponentialBackoff, retry_async
|
29
|
+
from wandb.util import get_module
|
30
|
+
|
31
|
+
from .._project_spec import EntryPoint, LaunchProject
|
32
|
+
from ..errors import LaunchError
|
33
|
+
from ..utils import (
|
34
|
+
CODE_MOUNT_DIR,
|
35
|
+
LOG_PREFIX,
|
36
|
+
MAX_ENV_LENGTHS,
|
37
|
+
PROJECT_SYNCHRONOUS,
|
38
|
+
get_kube_context_and_api_client,
|
39
|
+
make_name_dns_safe,
|
40
|
+
)
|
41
|
+
from .abstract import AbstractRun, AbstractRunner
|
42
|
+
|
43
|
+
get_module(
|
44
|
+
"kubernetes_asyncio",
|
45
|
+
required="Kubernetes runner requires the kubernetes package. Please install it with `pip install wandb[launch]`.",
|
46
|
+
)
|
47
|
+
|
48
|
+
import kubernetes_asyncio # type: ignore # noqa: E402
|
49
|
+
from kubernetes_asyncio import client # noqa: E402
|
50
|
+
from kubernetes_asyncio.client.api.batch_v1_api import ( # type: ignore # noqa: E402
|
51
|
+
BatchV1Api,
|
52
|
+
)
|
53
|
+
from kubernetes_asyncio.client.api.core_v1_api import ( # type: ignore # noqa: E402
|
54
|
+
CoreV1Api,
|
55
|
+
)
|
56
|
+
from kubernetes_asyncio.client.api.custom_objects_api import ( # type: ignore # noqa: E402
|
57
|
+
CustomObjectsApi,
|
58
|
+
)
|
59
|
+
from kubernetes_asyncio.client.models.v1_secret import ( # type: ignore # noqa: E402
|
60
|
+
V1Secret,
|
61
|
+
)
|
62
|
+
from kubernetes_asyncio.client.rest import ApiException # type: ignore # noqa: E402
|
63
|
+
|
64
|
+
TIMEOUT = 5
|
65
|
+
API_KEY_SECRET_MAX_RETRIES = 5
|
66
|
+
|
67
|
+
_logger = logging.getLogger(__name__)
|
68
|
+
|
69
|
+
|
70
|
+
SOURCE_CODE_PVC_MOUNT_PATH = os.environ.get("WANDB_LAUNCH_CODE_PVC_MOUNT_PATH")
|
71
|
+
SOURCE_CODE_PVC_NAME = os.environ.get("WANDB_LAUNCH_CODE_PVC_NAME")
|
72
|
+
|
73
|
+
|
74
|
+
class KubernetesSubmittedRun(AbstractRun):
|
75
|
+
"""Wrapper for a launched run on Kubernetes."""
|
76
|
+
|
77
|
+
def __init__(
|
78
|
+
self,
|
79
|
+
batch_api: "BatchV1Api",
|
80
|
+
core_api: "CoreV1Api",
|
81
|
+
name: str,
|
82
|
+
namespace: Optional[str] = "default",
|
83
|
+
secret: Optional["V1Secret"] = None,
|
84
|
+
) -> None:
|
85
|
+
"""Initialize a KubernetesSubmittedRun.
|
86
|
+
|
87
|
+
Other implementations of the AbstractRun interface poll on the run
|
88
|
+
when `get_status` is called, but KubernetesSubmittedRun uses
|
89
|
+
Kubernetes watch streams to update the run status. One thread handles
|
90
|
+
events from the job object and another thread handles events from the
|
91
|
+
rank 0 pod. These threads updated the `_status` attributed of the
|
92
|
+
KubernetesSubmittedRun object. When `get_status` is called, the
|
93
|
+
`_status` attribute is returned.
|
94
|
+
|
95
|
+
Arguments:
|
96
|
+
batch_api: Kubernetes BatchV1Api object.
|
97
|
+
core_api: Kubernetes CoreV1Api object.
|
98
|
+
name: Name of the job.
|
99
|
+
namespace: Kubernetes namespace.
|
100
|
+
secret: Kubernetes secret.
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
None.
|
104
|
+
"""
|
105
|
+
self.batch_api = batch_api
|
106
|
+
self.core_api = core_api
|
107
|
+
self.name = name
|
108
|
+
self.namespace = namespace
|
109
|
+
self._fail_count = 0
|
110
|
+
self.secret = secret
|
111
|
+
|
112
|
+
@property
|
113
|
+
def id(self) -> str:
|
114
|
+
"""Return the run id."""
|
115
|
+
return self.name
|
116
|
+
|
117
|
+
async def get_logs(self) -> Optional[str]:
|
118
|
+
try:
|
119
|
+
pods = await self.core_api.list_namespaced_pod(
|
120
|
+
label_selector=f"job-name={self.name}", namespace=self.namespace
|
121
|
+
)
|
122
|
+
pod_names = [pi.metadata.name for pi in pods.items]
|
123
|
+
if not pod_names:
|
124
|
+
wandb.termwarn(f"Found no pods for kubernetes job: {self.name}")
|
125
|
+
return None
|
126
|
+
logs = await self.core_api.read_namespaced_pod_log(
|
127
|
+
name=pod_names[0], namespace=self.namespace
|
128
|
+
)
|
129
|
+
if logs:
|
130
|
+
return str(logs)
|
131
|
+
else:
|
132
|
+
wandb.termwarn(f"No logs for kubernetes pod(s): {pod_names}")
|
133
|
+
return None
|
134
|
+
except Exception as e:
|
135
|
+
wandb.termerror(f"{LOG_PREFIX}Failed to get pod logs: {e}")
|
136
|
+
return None
|
137
|
+
|
138
|
+
async def wait(self) -> bool:
|
139
|
+
"""Wait for the run to finish.
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
True if the run finished successfully, False otherwise.
|
143
|
+
"""
|
144
|
+
while True:
|
145
|
+
status = await self.get_status()
|
146
|
+
wandb.termlog(f"{LOG_PREFIX}Job {self.name} status: {status.state}")
|
147
|
+
if status.state in ["finished", "failed", "preempted"]:
|
148
|
+
break
|
149
|
+
await asyncio.sleep(5)
|
150
|
+
|
151
|
+
await self._delete_secret()
|
152
|
+
return (
|
153
|
+
status.state == "finished"
|
154
|
+
) # todo: not sure if this (copied from aws runner) is the right approach? should we return false on failure
|
155
|
+
|
156
|
+
async def get_status(self) -> Status:
|
157
|
+
status = LaunchKubernetesMonitor.get_status(self.name)
|
158
|
+
if status in ["stopped", "failed", "finished", "preempted"]:
|
159
|
+
await self._delete_secret()
|
160
|
+
return status
|
161
|
+
|
162
|
+
async def cancel(self) -> None:
|
163
|
+
"""Cancel the run."""
|
164
|
+
try:
|
165
|
+
await self.batch_api.delete_namespaced_job(
|
166
|
+
namespace=self.namespace,
|
167
|
+
name=self.name,
|
168
|
+
)
|
169
|
+
await self._delete_secret()
|
170
|
+
except ApiException as e:
|
171
|
+
raise LaunchError(
|
172
|
+
f"Failed to delete Kubernetes Job {self.name} in namespace {self.namespace}: {str(e)}"
|
173
|
+
) from e
|
174
|
+
|
175
|
+
async def _delete_secret(self) -> None:
|
176
|
+
# Cleanup secret if not running in a helm-managed context
|
177
|
+
if not os.environ.get("WANDB_RELEASE_NAME") and self.secret:
|
178
|
+
await self.core_api.delete_namespaced_secret(
|
179
|
+
name=self.secret.metadata.name,
|
180
|
+
namespace=self.secret.metadata.namespace,
|
181
|
+
)
|
182
|
+
self.secret = None
|
183
|
+
|
184
|
+
|
185
|
+
class CrdSubmittedRun(AbstractRun):
|
186
|
+
"""Run submitted to a CRD backend, e.g. Volcano."""
|
187
|
+
|
188
|
+
def __init__(
|
189
|
+
self,
|
190
|
+
group: str,
|
191
|
+
version: str,
|
192
|
+
plural: str,
|
193
|
+
name: str,
|
194
|
+
namespace: str,
|
195
|
+
core_api: CoreV1Api,
|
196
|
+
custom_api: CustomObjectsApi,
|
197
|
+
) -> None:
|
198
|
+
"""Create a run object for tracking the progress of a CRD.
|
199
|
+
|
200
|
+
Arguments:
|
201
|
+
group: The API group of the CRD.
|
202
|
+
version: The API version of the CRD.
|
203
|
+
plural: The plural name of the CRD.
|
204
|
+
name: The name of the CRD instance.
|
205
|
+
namespace: The namespace of the CRD instance.
|
206
|
+
core_api: The Kubernetes core API client.
|
207
|
+
custom_api: The Kubernetes custom object API client.
|
208
|
+
|
209
|
+
Raises:
|
210
|
+
LaunchError: If the CRD instance does not exist.
|
211
|
+
"""
|
212
|
+
self.group = group
|
213
|
+
self.version = version
|
214
|
+
self.plural = plural
|
215
|
+
self.name = name
|
216
|
+
self.namespace = namespace
|
217
|
+
self.core_api = core_api
|
218
|
+
self.custom_api = custom_api
|
219
|
+
self._fail_count = 0
|
220
|
+
|
221
|
+
@property
|
222
|
+
def id(self) -> str:
|
223
|
+
"""Get the name of the custom object."""
|
224
|
+
return self.name
|
225
|
+
|
226
|
+
async def get_logs(self) -> Optional[str]:
|
227
|
+
"""Get logs for custom object."""
|
228
|
+
# TODO: test more carefully once we release multi-node support
|
229
|
+
logs: Dict[str, Optional[str]] = {}
|
230
|
+
try:
|
231
|
+
pods = await self.core_api.list_namespaced_pod(
|
232
|
+
label_selector=f"wandb/run-id={self.name}", namespace=self.namespace
|
233
|
+
)
|
234
|
+
pod_names = [pi.metadata.name for pi in pods.items]
|
235
|
+
for pod_name in pod_names:
|
236
|
+
logs[pod_name] = await self.core_api.read_namespaced_pod_log(
|
237
|
+
name=pod_name, namespace=self.namespace
|
238
|
+
)
|
239
|
+
except ApiException as e:
|
240
|
+
wandb.termwarn(f"Failed to get logs for {self.name}: {str(e)}")
|
241
|
+
return None
|
242
|
+
if not logs:
|
243
|
+
return None
|
244
|
+
logs_as_array = [f"Pod {pod_name}:\n{log}" for pod_name, log in logs.items()]
|
245
|
+
return "\n".join(logs_as_array)
|
246
|
+
|
247
|
+
async def get_status(self) -> Status:
|
248
|
+
"""Get status of custom object."""
|
249
|
+
return LaunchKubernetesMonitor.get_status(self.name)
|
250
|
+
|
251
|
+
async def cancel(self) -> None:
|
252
|
+
"""Cancel the custom object."""
|
253
|
+
try:
|
254
|
+
await self.custom_api.delete_namespaced_custom_object(
|
255
|
+
group=self.group,
|
256
|
+
version=self.version,
|
257
|
+
namespace=self.namespace,
|
258
|
+
plural=self.plural,
|
259
|
+
name=self.name,
|
260
|
+
)
|
261
|
+
except ApiException as e:
|
262
|
+
raise LaunchError(
|
263
|
+
f"Failed to delete CRD {self.name} in namespace {self.namespace}: {str(e)}"
|
264
|
+
) from e
|
265
|
+
|
266
|
+
async def wait(self) -> bool:
|
267
|
+
"""Wait for this custom object to finish running."""
|
268
|
+
while True:
|
269
|
+
status = await self.get_status()
|
270
|
+
wandb.termlog(f"{LOG_PREFIX}Job {self.name} status: {status}")
|
271
|
+
if status.state in ["finished", "failed", "preempted"]:
|
272
|
+
return status.state == "finished"
|
273
|
+
await asyncio.sleep(5)
|
274
|
+
|
275
|
+
|
276
|
+
class KubernetesRunner(AbstractRunner):
|
277
|
+
"""Launches runs onto kubernetes."""
|
278
|
+
|
279
|
+
def __init__(
|
280
|
+
self,
|
281
|
+
api: Api,
|
282
|
+
backend_config: Dict[str, Any],
|
283
|
+
environment: AbstractEnvironment,
|
284
|
+
registry: AbstractRegistry,
|
285
|
+
) -> None:
|
286
|
+
"""Create a Kubernetes runner.
|
287
|
+
|
288
|
+
Arguments:
|
289
|
+
api: The API client object.
|
290
|
+
backend_config: The backend configuration.
|
291
|
+
environment: The environment to launch runs into.
|
292
|
+
|
293
|
+
Raises:
|
294
|
+
LaunchError: If the Kubernetes configuration is invalid.
|
295
|
+
"""
|
296
|
+
super().__init__(api, backend_config)
|
297
|
+
self.environment = environment
|
298
|
+
self.registry = registry
|
299
|
+
|
300
|
+
def get_namespace(
|
301
|
+
self, resource_args: Dict[str, Any], context: Dict[str, Any]
|
302
|
+
) -> str:
|
303
|
+
"""Get the namespace to launch into.
|
304
|
+
|
305
|
+
Arguments:
|
306
|
+
resource_args: The resource args to launch.
|
307
|
+
context: The k8s config context.
|
308
|
+
|
309
|
+
Returns:
|
310
|
+
The namespace to launch into.
|
311
|
+
"""
|
312
|
+
default_namespace = (
|
313
|
+
context["context"].get("namespace", "default") if context else "default"
|
314
|
+
)
|
315
|
+
return ( # type: ignore[no-any-return]
|
316
|
+
resource_args.get("metadata", {}).get("namespace")
|
317
|
+
or resource_args.get(
|
318
|
+
"namespace"
|
319
|
+
) # continue support for malformed namespace
|
320
|
+
or self.backend_config.get("runner", {}).get("namespace")
|
321
|
+
or default_namespace
|
322
|
+
)
|
323
|
+
|
324
|
+
async def _inject_defaults(
|
325
|
+
self,
|
326
|
+
resource_args: Dict[str, Any],
|
327
|
+
launch_project: LaunchProject,
|
328
|
+
image_uri: str,
|
329
|
+
namespace: str,
|
330
|
+
core_api: "CoreV1Api",
|
331
|
+
) -> Tuple[Dict[str, Any], Optional["V1Secret"]]:
|
332
|
+
"""Apply our default values, return job dict and api key secret.
|
333
|
+
|
334
|
+
Arguments:
|
335
|
+
resource_args (Dict[str, Any]): The resource args to launch.
|
336
|
+
launch_project (LaunchProject): The launch project.
|
337
|
+
builder (Optional[AbstractBuilder]): The builder.
|
338
|
+
namespace (str): The namespace.
|
339
|
+
core_api (CoreV1Api): The core api.
|
340
|
+
|
341
|
+
Returns:
|
342
|
+
Tuple[Dict[str, Any], Optional["V1Secret"]]: The resource args and api key secret.
|
343
|
+
"""
|
344
|
+
job: Dict[str, Any] = {
|
345
|
+
"apiVersion": "batch/v1",
|
346
|
+
"kind": "Job",
|
347
|
+
}
|
348
|
+
job.update(resource_args)
|
349
|
+
|
350
|
+
job_metadata: Dict[str, Any] = job.get("metadata", {})
|
351
|
+
job_spec: Dict[str, Any] = {"backoffLimit": 0, "ttlSecondsAfterFinished": 60}
|
352
|
+
job_spec.update(job.get("spec", {}))
|
353
|
+
pod_template: Dict[str, Any] = job_spec.get("template", {})
|
354
|
+
pod_spec: Dict[str, Any] = {"restartPolicy": "Never"}
|
355
|
+
pod_spec.update(pod_template.get("spec", {}))
|
356
|
+
containers: List[Dict[str, Any]] = pod_spec.get("containers", [{}])
|
357
|
+
|
358
|
+
# Add labels to job metadata
|
359
|
+
job_metadata.setdefault("labels", {})
|
360
|
+
job_metadata["labels"][WANDB_K8S_RUN_ID] = launch_project.run_id
|
361
|
+
job_metadata["labels"][WANDB_K8S_LABEL_MONITOR] = "true"
|
362
|
+
if LaunchAgent.initialized():
|
363
|
+
job_metadata["labels"][WANDB_K8S_LABEL_AGENT] = LaunchAgent.name()
|
364
|
+
# name precedence: name in spec > generated name
|
365
|
+
if not job_metadata.get("name"):
|
366
|
+
job_metadata["generateName"] = make_name_dns_safe(
|
367
|
+
f"launch-{launch_project.target_entity}-{launch_project.target_project}-"
|
368
|
+
)
|
369
|
+
|
370
|
+
for i, cont in enumerate(containers):
|
371
|
+
if "name" not in cont:
|
372
|
+
cont["name"] = cont.get("name", "launch" + str(i))
|
373
|
+
if "securityContext" not in cont:
|
374
|
+
cont["securityContext"] = {
|
375
|
+
"allowPrivilegeEscalation": False,
|
376
|
+
"capabilities": {"drop": ["ALL"]},
|
377
|
+
"seccompProfile": {"type": "RuntimeDefault"},
|
378
|
+
}
|
379
|
+
|
380
|
+
entry_point = (
|
381
|
+
launch_project.override_entrypoint or launch_project.get_job_entry_point()
|
382
|
+
)
|
383
|
+
if launch_project.docker_image:
|
384
|
+
# dont specify run id if user provided image, could have multiple runs
|
385
|
+
containers[0]["image"] = image_uri
|
386
|
+
# TODO: handle secret pulling image from registry
|
387
|
+
elif not any(["image" in cont for cont in containers]):
|
388
|
+
assert entry_point is not None
|
389
|
+
# in the non instance case we need to make an imagePullSecret
|
390
|
+
# so the new job can pull the image
|
391
|
+
containers[0]["image"] = image_uri
|
392
|
+
secret = await maybe_create_imagepull_secret(
|
393
|
+
core_api, self.registry, launch_project.run_id, namespace
|
394
|
+
)
|
395
|
+
if secret is not None:
|
396
|
+
pod_spec["imagePullSecrets"] = [
|
397
|
+
{"name": f"regcred-{launch_project.run_id}"}
|
398
|
+
]
|
399
|
+
|
400
|
+
inject_entrypoint_and_args(
|
401
|
+
containers,
|
402
|
+
entry_point,
|
403
|
+
launch_project.override_args,
|
404
|
+
launch_project.override_entrypoint is not None,
|
405
|
+
)
|
406
|
+
|
407
|
+
env_vars = launch_project.get_env_vars_dict(
|
408
|
+
self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
|
409
|
+
)
|
410
|
+
api_key_secret = None
|
411
|
+
for cont in containers:
|
412
|
+
# Add our env vars to user supplied env vars
|
413
|
+
env = cont.get("env") or []
|
414
|
+
for key, value in env_vars.items():
|
415
|
+
if (
|
416
|
+
key == "WANDB_API_KEY"
|
417
|
+
and value
|
418
|
+
and (
|
419
|
+
LaunchAgent.initialized()
|
420
|
+
or self.backend_config[PROJECT_SYNCHRONOUS]
|
421
|
+
)
|
422
|
+
):
|
423
|
+
# Override API key with secret. TODO: Do the same for other runners
|
424
|
+
release_name = os.environ.get("WANDB_RELEASE_NAME")
|
425
|
+
secret_name = "wandb-api-key"
|
426
|
+
if release_name:
|
427
|
+
secret_name += f"-{release_name}"
|
428
|
+
else:
|
429
|
+
secret_name += f"-{launch_project.run_id}"
|
430
|
+
|
431
|
+
def handle_exception(e):
|
432
|
+
wandb.termwarn(
|
433
|
+
f"Exception when ensuring Kubernetes API key secret: {e}. Retrying..."
|
434
|
+
)
|
435
|
+
|
436
|
+
api_key_secret = await retry_async(
|
437
|
+
backoff=ExponentialBackoff(
|
438
|
+
initial_sleep=datetime.timedelta(seconds=1),
|
439
|
+
max_sleep=datetime.timedelta(minutes=1),
|
440
|
+
max_retries=API_KEY_SECRET_MAX_RETRIES,
|
441
|
+
),
|
442
|
+
fn=ensure_api_key_secret,
|
443
|
+
on_exc=handle_exception,
|
444
|
+
core_api=core_api,
|
445
|
+
secret_name=secret_name,
|
446
|
+
namespace=namespace,
|
447
|
+
api_key=value,
|
448
|
+
)
|
449
|
+
env.append(
|
450
|
+
{
|
451
|
+
"name": key,
|
452
|
+
"valueFrom": {
|
453
|
+
"secretKeyRef": {
|
454
|
+
"name": secret_name,
|
455
|
+
"key": "password",
|
456
|
+
}
|
457
|
+
},
|
458
|
+
}
|
459
|
+
)
|
460
|
+
else:
|
461
|
+
env.append({"name": key, "value": value})
|
462
|
+
cont["env"] = env
|
463
|
+
|
464
|
+
pod_spec["containers"] = containers
|
465
|
+
pod_template["spec"] = pod_spec
|
466
|
+
job_spec["template"] = pod_template
|
467
|
+
job["spec"] = job_spec
|
468
|
+
job["metadata"] = job_metadata
|
469
|
+
|
470
|
+
add_label_to_pods(
|
471
|
+
job,
|
472
|
+
WANDB_K8S_LABEL_MONITOR,
|
473
|
+
"true",
|
474
|
+
)
|
475
|
+
|
476
|
+
if launch_project.job_base_image:
|
477
|
+
apply_code_mount_configuration(
|
478
|
+
job,
|
479
|
+
launch_project,
|
480
|
+
)
|
481
|
+
|
482
|
+
# Add wandb.ai/agent: current agent label on all pods
|
483
|
+
if LaunchAgent.initialized():
|
484
|
+
add_label_to_pods(
|
485
|
+
job,
|
486
|
+
WANDB_K8S_LABEL_AGENT,
|
487
|
+
LaunchAgent.name(),
|
488
|
+
)
|
489
|
+
|
490
|
+
return job, api_key_secret
|
491
|
+
|
492
|
+
async def run(
|
493
|
+
self, launch_project: LaunchProject, image_uri: str
|
494
|
+
) -> Optional[AbstractRun]: # noqa: C901
|
495
|
+
"""Execute a launch project on Kubernetes.
|
496
|
+
|
497
|
+
Arguments:
|
498
|
+
launch_project: The launch project to execute.
|
499
|
+
builder: The builder to use to build the image.
|
500
|
+
|
501
|
+
Returns:
|
502
|
+
The run object if the run was successful, otherwise None.
|
503
|
+
"""
|
504
|
+
await LaunchKubernetesMonitor.ensure_initialized()
|
505
|
+
resource_args = launch_project.fill_macros(image_uri).get("kubernetes", {})
|
506
|
+
if not resource_args:
|
507
|
+
wandb.termlog(
|
508
|
+
f"{LOG_PREFIX}Note: no resource args specified. Add a "
|
509
|
+
"Kubernetes yaml spec or other options in a json file "
|
510
|
+
"with --resource-args <json>."
|
511
|
+
)
|
512
|
+
_logger.info(f"Running Kubernetes job with resource args: {resource_args}")
|
513
|
+
|
514
|
+
context, api_client = await get_kube_context_and_api_client(
|
515
|
+
kubernetes_asyncio, resource_args
|
516
|
+
)
|
517
|
+
|
518
|
+
# If using pvc for code mount, move code there.
|
519
|
+
if launch_project.job_base_image is not None:
|
520
|
+
if SOURCE_CODE_PVC_NAME is None or SOURCE_CODE_PVC_MOUNT_PATH is None:
|
521
|
+
raise LaunchError(
|
522
|
+
"WANDB_LAUNCH_SOURCE_CODE_PVC_ environment variables not set. "
|
523
|
+
"Unable to mount source code PVC into base image. "
|
524
|
+
"Use the `codeMountPvcName` variable in the agent helm chart "
|
525
|
+
"to enable base image jobs for this agent. See "
|
526
|
+
"https://github.com/wandb/helm-charts/tree/main/charts/launch-agent "
|
527
|
+
"for more information."
|
528
|
+
)
|
529
|
+
code_subdir = launch_project.get_image_source_string()
|
530
|
+
launch_project.change_project_dir(
|
531
|
+
os.path.join(SOURCE_CODE_PVC_MOUNT_PATH, code_subdir)
|
532
|
+
)
|
533
|
+
|
534
|
+
# If the user specified an alternate api, we need will execute this
|
535
|
+
# run by creating a custom object.
|
536
|
+
api_version = resource_args.get("apiVersion", "batch/v1")
|
537
|
+
|
538
|
+
if api_version not in ["batch/v1", "batch/v1beta1"]:
|
539
|
+
env_vars = launch_project.get_env_vars_dict(
|
540
|
+
self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
|
541
|
+
)
|
542
|
+
# Crawl the resource args and add our env vars to the containers.
|
543
|
+
add_wandb_env(resource_args, env_vars)
|
544
|
+
|
545
|
+
# Add our labels to the resource args. This is necessary for the
|
546
|
+
# agent to find the custom object later on.
|
547
|
+
resource_args["metadata"] = resource_args.get("metadata", {})
|
548
|
+
resource_args["metadata"]["labels"] = resource_args["metadata"].get(
|
549
|
+
"labels", {}
|
550
|
+
)
|
551
|
+
resource_args["metadata"]["labels"][WANDB_K8S_LABEL_MONITOR] = "true"
|
552
|
+
|
553
|
+
# Crawl the resource arsg and add our labels to the pods. This is
|
554
|
+
# necessary for the agent to find the pods later on.
|
555
|
+
add_label_to_pods(
|
556
|
+
resource_args,
|
557
|
+
WANDB_K8S_LABEL_MONITOR,
|
558
|
+
"true",
|
559
|
+
)
|
560
|
+
|
561
|
+
# Add wandb.ai/agent: current agent label on all pods
|
562
|
+
if LaunchAgent.initialized():
|
563
|
+
add_label_to_pods(
|
564
|
+
resource_args,
|
565
|
+
WANDB_K8S_LABEL_AGENT,
|
566
|
+
LaunchAgent.name(),
|
567
|
+
)
|
568
|
+
resource_args["metadata"]["labels"][WANDB_K8S_LABEL_AGENT] = (
|
569
|
+
LaunchAgent.name()
|
570
|
+
)
|
571
|
+
|
572
|
+
if launch_project.job_base_image:
|
573
|
+
apply_code_mount_configuration(resource_args, launch_project)
|
574
|
+
|
575
|
+
overrides = {}
|
576
|
+
if launch_project.override_args:
|
577
|
+
overrides["args"] = launch_project.override_args
|
578
|
+
if launch_project.override_entrypoint:
|
579
|
+
overrides["command"] = launch_project.override_entrypoint.command
|
580
|
+
add_entrypoint_args_overrides(
|
581
|
+
resource_args,
|
582
|
+
overrides,
|
583
|
+
)
|
584
|
+
api = client.CustomObjectsApi(api_client)
|
585
|
+
# Infer the attributes of a custom object from the apiVersion and/or
|
586
|
+
# a kind: attribute in the resource args.
|
587
|
+
namespace = self.get_namespace(resource_args, context)
|
588
|
+
group, version, *_ = api_version.split("/")
|
589
|
+
group = resource_args.get("group", group)
|
590
|
+
version = resource_args.get("version", version)
|
591
|
+
kind = resource_args.get("kind", version)
|
592
|
+
plural = f"{kind.lower()}s"
|
593
|
+
custom_resource = CustomResource(
|
594
|
+
group=group,
|
595
|
+
version=version,
|
596
|
+
plural=plural,
|
597
|
+
)
|
598
|
+
LaunchKubernetesMonitor.monitor_namespace(
|
599
|
+
namespace, custom_resource=custom_resource
|
600
|
+
)
|
601
|
+
|
602
|
+
try:
|
603
|
+
response = await api.create_namespaced_custom_object(
|
604
|
+
group=group,
|
605
|
+
version=version,
|
606
|
+
namespace=namespace,
|
607
|
+
plural=plural,
|
608
|
+
body=resource_args,
|
609
|
+
)
|
610
|
+
except ApiException as e:
|
611
|
+
body = json.loads(e.body)
|
612
|
+
body_yaml = yaml.dump(body)
|
613
|
+
raise LaunchError(
|
614
|
+
f"Error creating CRD of kind {kind}: {e.status} {e.reason}\n{body_yaml}"
|
615
|
+
) from e
|
616
|
+
name = response.get("metadata", {}).get("name")
|
617
|
+
_logger.info(f"Created {kind} {response['metadata']['name']}")
|
618
|
+
submitted_run = CrdSubmittedRun(
|
619
|
+
name=name,
|
620
|
+
group=group,
|
621
|
+
version=version,
|
622
|
+
namespace=namespace,
|
623
|
+
plural=plural,
|
624
|
+
core_api=client.CoreV1Api(api_client),
|
625
|
+
custom_api=api,
|
626
|
+
)
|
627
|
+
if self.backend_config[PROJECT_SYNCHRONOUS]:
|
628
|
+
await submitted_run.wait()
|
629
|
+
return submitted_run
|
630
|
+
|
631
|
+
batch_api = kubernetes_asyncio.client.BatchV1Api(api_client)
|
632
|
+
core_api = kubernetes_asyncio.client.CoreV1Api(api_client)
|
633
|
+
namespace = self.get_namespace(resource_args, context)
|
634
|
+
job, secret = await self._inject_defaults(
|
635
|
+
resource_args, launch_project, image_uri, namespace, core_api
|
636
|
+
)
|
637
|
+
msg = "Creating Kubernetes job"
|
638
|
+
if "name" in resource_args:
|
639
|
+
msg += f": {resource_args['name']}"
|
640
|
+
_logger.info(msg)
|
641
|
+
try:
|
642
|
+
response = await kubernetes_asyncio.utils.create_from_dict(
|
643
|
+
api_client, job, namespace=namespace
|
644
|
+
)
|
645
|
+
except kubernetes_asyncio.utils.FailToCreateError as e:
|
646
|
+
for exc in e.api_exceptions:
|
647
|
+
resp = json.loads(exc.body)
|
648
|
+
msg = resp.get("message")
|
649
|
+
code = resp.get("code")
|
650
|
+
raise LaunchError(
|
651
|
+
f"Failed to create Kubernetes job for run {launch_project.run_id} ({code} {exc.reason}): {msg}"
|
652
|
+
)
|
653
|
+
except Exception as e:
|
654
|
+
raise LaunchError(
|
655
|
+
f"Unexpected exception when creating Kubernetes job: {str(e)}\n"
|
656
|
+
)
|
657
|
+
job_response = response[0]
|
658
|
+
job_name = job_response.metadata.name
|
659
|
+
LaunchKubernetesMonitor.monitor_namespace(namespace)
|
660
|
+
submitted_job = KubernetesSubmittedRun(
|
661
|
+
batch_api, core_api, job_name, namespace, secret
|
662
|
+
)
|
663
|
+
if self.backend_config[PROJECT_SYNCHRONOUS]:
|
664
|
+
await submitted_job.wait()
|
665
|
+
|
666
|
+
return submitted_job
|
667
|
+
|
668
|
+
|
669
|
+
def inject_entrypoint_and_args(
|
670
|
+
containers: List[dict],
|
671
|
+
entry_point: Optional[EntryPoint],
|
672
|
+
override_args: List[str],
|
673
|
+
should_override_entrypoint: bool,
|
674
|
+
) -> None:
|
675
|
+
"""Inject the entrypoint and args into the containers.
|
676
|
+
|
677
|
+
Arguments:
|
678
|
+
containers: The containers to inject the entrypoint and args into.
|
679
|
+
entry_point: The entrypoint to inject.
|
680
|
+
override_args: The args to inject.
|
681
|
+
should_override_entrypoint: Whether to override the entrypoint.
|
682
|
+
|
683
|
+
Returns:
|
684
|
+
None
|
685
|
+
"""
|
686
|
+
for i in range(len(containers)):
|
687
|
+
if override_args:
|
688
|
+
containers[i]["args"] = override_args
|
689
|
+
if entry_point and (
|
690
|
+
not containers[i].get("command") or should_override_entrypoint
|
691
|
+
):
|
692
|
+
containers[i]["command"] = entry_point.command
|
693
|
+
|
694
|
+
|
695
|
+
async def ensure_api_key_secret(
|
696
|
+
core_api: "CoreV1Api",
|
697
|
+
secret_name: str,
|
698
|
+
namespace: str,
|
699
|
+
api_key: str,
|
700
|
+
) -> "V1Secret":
|
701
|
+
"""Create a secret containing a user's wandb API key.
|
702
|
+
|
703
|
+
Arguments:
|
704
|
+
core_api: The Kubernetes CoreV1Api object.
|
705
|
+
secret_name: The name to use for the secret.
|
706
|
+
namespace: The namespace to create the secret in.
|
707
|
+
api_key: The user's wandb API key
|
708
|
+
|
709
|
+
Returns:
|
710
|
+
The created secret
|
711
|
+
"""
|
712
|
+
secret_data = {"password": base64.b64encode(api_key.encode()).decode()}
|
713
|
+
labels = {"wandb.ai/created-by": "launch-agent"}
|
714
|
+
secret = client.V1Secret(
|
715
|
+
data=secret_data,
|
716
|
+
metadata=client.V1ObjectMeta(
|
717
|
+
name=secret_name, namespace=namespace, labels=labels
|
718
|
+
),
|
719
|
+
kind="Secret",
|
720
|
+
type="kubernetes.io/basic-auth",
|
721
|
+
)
|
722
|
+
|
723
|
+
try:
|
724
|
+
try:
|
725
|
+
return await core_api.create_namespaced_secret(namespace, secret)
|
726
|
+
except ApiException as e:
|
727
|
+
# 409 = conflict = secret already exists
|
728
|
+
if e.status == 409:
|
729
|
+
existing_secret = await core_api.read_namespaced_secret(
|
730
|
+
name=secret_name, namespace=namespace
|
731
|
+
)
|
732
|
+
if existing_secret.data != secret_data:
|
733
|
+
# If it's a previous secret made by launch agent, clean it up
|
734
|
+
if (
|
735
|
+
existing_secret.metadata.labels.get("wandb.ai/created-by")
|
736
|
+
== "launch-agent"
|
737
|
+
):
|
738
|
+
await core_api.delete_namespaced_secret(
|
739
|
+
name=secret_name, namespace=namespace
|
740
|
+
)
|
741
|
+
return await core_api.create_namespaced_secret(
|
742
|
+
namespace, secret
|
743
|
+
)
|
744
|
+
else:
|
745
|
+
raise LaunchError(
|
746
|
+
f"Kubernetes secret already exists in namespace {namespace} with incorrect data: {secret_name}"
|
747
|
+
)
|
748
|
+
return existing_secret
|
749
|
+
raise
|
750
|
+
except Exception as e:
|
751
|
+
raise LaunchError(
|
752
|
+
f"Exception when ensuring Kubernetes API key secret: {str(e)}\n"
|
753
|
+
)
|
754
|
+
|
755
|
+
|
756
|
+
async def maybe_create_imagepull_secret(
|
757
|
+
core_api: "CoreV1Api",
|
758
|
+
registry: AbstractRegistry,
|
759
|
+
run_id: str,
|
760
|
+
namespace: str,
|
761
|
+
) -> Optional["V1Secret"]:
|
762
|
+
"""Create a secret for pulling images from a private registry.
|
763
|
+
|
764
|
+
Arguments:
|
765
|
+
core_api: The Kubernetes CoreV1Api object.
|
766
|
+
registry: The registry to pull from.
|
767
|
+
run_id: The run id.
|
768
|
+
namespace: The namespace to create the secret in.
|
769
|
+
|
770
|
+
Returns:
|
771
|
+
A secret if one was created, otherwise None.
|
772
|
+
"""
|
773
|
+
secret = None
|
774
|
+
if isinstance(registry, LocalRegistry) or isinstance(
|
775
|
+
registry, AzureContainerRegistry
|
776
|
+
):
|
777
|
+
# Secret not required
|
778
|
+
return None
|
779
|
+
uname, token = await registry.get_username_password()
|
780
|
+
creds_info = {
|
781
|
+
"auths": {
|
782
|
+
registry.uri: {
|
783
|
+
"auth": base64.b64encode(f"{uname}:{token}".encode()).decode(),
|
784
|
+
# need an email but the use is deprecated
|
785
|
+
"email": "deprecated@wandblaunch.com",
|
786
|
+
}
|
787
|
+
}
|
788
|
+
}
|
789
|
+
secret_data = {
|
790
|
+
".dockerconfigjson": base64.b64encode(json.dumps(creds_info).encode()).decode()
|
791
|
+
}
|
792
|
+
secret = client.V1Secret(
|
793
|
+
data=secret_data,
|
794
|
+
metadata=client.V1ObjectMeta(name=f"regcred-{run_id}", namespace=namespace),
|
795
|
+
kind="Secret",
|
796
|
+
type="kubernetes.io/dockerconfigjson",
|
797
|
+
)
|
798
|
+
try:
|
799
|
+
try:
|
800
|
+
return await core_api.create_namespaced_secret(namespace, secret)
|
801
|
+
except ApiException as e:
|
802
|
+
# 409 = conflict = secret already exists
|
803
|
+
if e.status == 409:
|
804
|
+
return await core_api.read_namespaced_secret(
|
805
|
+
name=f"regcred-{run_id}", namespace=namespace
|
806
|
+
)
|
807
|
+
raise
|
808
|
+
except Exception as e:
|
809
|
+
raise LaunchError(f"Exception when creating Kubernetes secret: {str(e)}\n")
|
810
|
+
|
811
|
+
|
812
|
+
def yield_containers(root: Any) -> Iterator[dict]:
|
813
|
+
"""Yield all container specs in a manifest.
|
814
|
+
|
815
|
+
Recursively traverses the manifest and yields all container specs. Container
|
816
|
+
specs are identified by the presence of a "containers" key in the value.
|
817
|
+
"""
|
818
|
+
if isinstance(root, dict):
|
819
|
+
for k, v in root.items():
|
820
|
+
if k == "containers":
|
821
|
+
if isinstance(v, list):
|
822
|
+
yield from v
|
823
|
+
elif isinstance(v, (dict, list)):
|
824
|
+
yield from yield_containers(v)
|
825
|
+
elif isinstance(root, list):
|
826
|
+
for item in root:
|
827
|
+
yield from yield_containers(item)
|
828
|
+
|
829
|
+
|
830
|
+
def add_wandb_env(root: Union[dict, list], env_vars: Dict[str, str]) -> None:
|
831
|
+
"""Injects wandb environment variables into specs.
|
832
|
+
|
833
|
+
Recursively walks the spec and injects the environment variables into
|
834
|
+
every container spec. Containers are identified by the "containers" key.
|
835
|
+
|
836
|
+
This function treats the WANDB_RUN_ID and WANDB_GROUP_ID environment variables
|
837
|
+
specially. If they are present in the spec, they will be overwritten. If a setting
|
838
|
+
for WANDB_RUN_ID is provided in env_vars, then that environment variable will only be
|
839
|
+
set in the first container modified by this function.
|
840
|
+
|
841
|
+
Arguments:
|
842
|
+
root: The spec to modify.
|
843
|
+
env_vars: The environment variables to inject.
|
844
|
+
|
845
|
+
Returns: None.
|
846
|
+
"""
|
847
|
+
for cont in yield_containers(root):
|
848
|
+
env = cont.setdefault("env", [])
|
849
|
+
env.extend([{"name": key, "value": value} for key, value in env_vars.items()])
|
850
|
+
cont["env"] = env
|
851
|
+
# After we have set WANDB_RUN_ID once, we don't want to set it again
|
852
|
+
if "WANDB_RUN_ID" in env_vars:
|
853
|
+
env_vars.pop("WANDB_RUN_ID")
|
854
|
+
|
855
|
+
|
856
|
+
def yield_pods(manifest: Any) -> Iterator[dict]:
|
857
|
+
"""Yield all pod specs in a manifest.
|
858
|
+
|
859
|
+
Recursively traverses the manifest and yields all pod specs. Pod specs are
|
860
|
+
identified by the presence of a "spec" key with a "containers" key in the
|
861
|
+
value.
|
862
|
+
"""
|
863
|
+
if isinstance(manifest, list):
|
864
|
+
for item in manifest:
|
865
|
+
yield from yield_pods(item)
|
866
|
+
elif isinstance(manifest, dict):
|
867
|
+
if "spec" in manifest and "containers" in manifest["spec"]:
|
868
|
+
yield manifest
|
869
|
+
for value in manifest.values():
|
870
|
+
if isinstance(value, (dict, list)):
|
871
|
+
yield from yield_pods(value)
|
872
|
+
|
873
|
+
|
874
|
+
def add_label_to_pods(
|
875
|
+
manifest: Union[dict, list], label_key: str, label_value: str
|
876
|
+
) -> None:
|
877
|
+
"""Add a label to all pod specs in a manifest.
|
878
|
+
|
879
|
+
Recursively traverses the manifest and adds the label to all pod specs.
|
880
|
+
Pod specs are identified by the presence of a "spec" key with a "containers"
|
881
|
+
key in the value.
|
882
|
+
|
883
|
+
Arguments:
|
884
|
+
manifest: The manifest to modify.
|
885
|
+
label_key: The label key to add.
|
886
|
+
label_value: The label value to add.
|
887
|
+
|
888
|
+
Returns: None.
|
889
|
+
"""
|
890
|
+
for pod in yield_pods(manifest):
|
891
|
+
metadata = pod.setdefault("metadata", {})
|
892
|
+
labels = metadata.setdefault("labels", {})
|
893
|
+
labels[label_key] = label_value
|
894
|
+
|
895
|
+
|
896
|
+
def add_entrypoint_args_overrides(manifest: Union[dict, list], overrides: dict) -> None:
|
897
|
+
"""Add entrypoint and args overrides to all containers in a manifest.
|
898
|
+
|
899
|
+
Recursively traverses the manifest and adds the entrypoint and args overrides
|
900
|
+
to all containers. Containers are identified by the presence of a "spec" key
|
901
|
+
with a "containers" key in the value.
|
902
|
+
|
903
|
+
Arguments:
|
904
|
+
manifest: The manifest to modify.
|
905
|
+
overrides: Dictionary with args and entrypoint keys.
|
906
|
+
|
907
|
+
Returns: None.
|
908
|
+
"""
|
909
|
+
if isinstance(manifest, list):
|
910
|
+
for item in manifest:
|
911
|
+
add_entrypoint_args_overrides(item, overrides)
|
912
|
+
elif isinstance(manifest, dict):
|
913
|
+
if "spec" in manifest and "containers" in manifest["spec"]:
|
914
|
+
containers = manifest["spec"]["containers"]
|
915
|
+
for container in containers:
|
916
|
+
if "command" in overrides:
|
917
|
+
container["command"] = overrides["command"]
|
918
|
+
if "args" in overrides:
|
919
|
+
container["args"] = overrides["args"]
|
920
|
+
for value in manifest.values():
|
921
|
+
add_entrypoint_args_overrides(value, overrides)
|
922
|
+
|
923
|
+
|
924
|
+
def apply_code_mount_configuration(
|
925
|
+
manifest: Union[Dict, list], project: LaunchProject
|
926
|
+
) -> None:
|
927
|
+
"""Apply code mount configuration to all containers in a manifest.
|
928
|
+
|
929
|
+
Recursively traverses the manifest and adds the code mount configuration to
|
930
|
+
all containers. Containers are identified by the presence of a "spec" key
|
931
|
+
with a "containers" key in the value.
|
932
|
+
|
933
|
+
Arguments:
|
934
|
+
manifest: The manifest to modify.
|
935
|
+
project: The launch project.
|
936
|
+
|
937
|
+
Returns: None.
|
938
|
+
"""
|
939
|
+
assert SOURCE_CODE_PVC_NAME is not None
|
940
|
+
source_dir = project.get_image_source_string()
|
941
|
+
for pod in yield_pods(manifest):
|
942
|
+
for container in yield_containers(pod):
|
943
|
+
if "volumeMounts" not in container:
|
944
|
+
container["volumeMounts"] = []
|
945
|
+
container["volumeMounts"].append(
|
946
|
+
{
|
947
|
+
"name": "wandb-source-code-volume",
|
948
|
+
"mountPath": CODE_MOUNT_DIR,
|
949
|
+
"subPath": source_dir,
|
950
|
+
}
|
951
|
+
)
|
952
|
+
container["workingDir"] = CODE_MOUNT_DIR
|
953
|
+
spec = pod["spec"]
|
954
|
+
if "volumes" not in spec:
|
955
|
+
spec["volumes"] = []
|
956
|
+
spec["volumes"].append(
|
957
|
+
{
|
958
|
+
"name": "wandb-source-code-volume",
|
959
|
+
"persistentVolumeClaim": {
|
960
|
+
"claimName": SOURCE_CODE_PVC_NAME,
|
961
|
+
},
|
962
|
+
}
|
963
|
+
)
|