wandb 0.18.2__py3-none-musllinux_1_2_x86_64.whl
Sign up to get free protection for your applications and to get access to all the features.
- package_readme.md +89 -0
- wandb/__init__.py +245 -0
- wandb/__init__.pyi +1139 -0
- wandb/__main__.py +3 -0
- wandb/_globals.py +19 -0
- wandb/agents/__init__.py +0 -0
- wandb/agents/pyagent.py +363 -0
- wandb/analytics/__init__.py +3 -0
- wandb/analytics/sentry.py +266 -0
- wandb/apis/__init__.py +48 -0
- wandb/apis/attrs.py +40 -0
- wandb/apis/importers/__init__.py +1 -0
- wandb/apis/importers/internals/internal.py +385 -0
- wandb/apis/importers/internals/protocols.py +99 -0
- wandb/apis/importers/internals/util.py +78 -0
- wandb/apis/importers/mlflow.py +254 -0
- wandb/apis/importers/validation.py +108 -0
- wandb/apis/importers/wandb.py +1603 -0
- wandb/apis/internal.py +232 -0
- wandb/apis/normalize.py +89 -0
- wandb/apis/paginator.py +81 -0
- wandb/apis/public/__init__.py +34 -0
- wandb/apis/public/api.py +1305 -0
- wandb/apis/public/artifacts.py +1090 -0
- wandb/apis/public/const.py +4 -0
- wandb/apis/public/files.py +195 -0
- wandb/apis/public/history.py +149 -0
- wandb/apis/public/jobs.py +659 -0
- wandb/apis/public/projects.py +154 -0
- wandb/apis/public/query_generator.py +166 -0
- wandb/apis/public/reports.py +469 -0
- wandb/apis/public/runs.py +914 -0
- wandb/apis/public/sweeps.py +240 -0
- wandb/apis/public/teams.py +198 -0
- wandb/apis/public/users.py +136 -0
- wandb/apis/reports/__init__.py +1 -0
- wandb/apis/reports/v1/__init__.py +8 -0
- wandb/apis/reports/v2/__init__.py +8 -0
- wandb/apis/workspaces/__init__.py +8 -0
- wandb/beta/workflows.py +288 -0
- wandb/bin/nvidia_gpu_stats +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/__init__.py +0 -0
- wandb/cli/cli.py +3004 -0
- wandb/data_types.py +63 -0
- wandb/docker/__init__.py +342 -0
- wandb/docker/auth.py +436 -0
- wandb/docker/wandb-entrypoint.sh +33 -0
- wandb/docker/www_authenticate.py +94 -0
- wandb/env.py +514 -0
- wandb/errors/__init__.py +17 -0
- wandb/errors/errors.py +37 -0
- wandb/errors/term.py +103 -0
- wandb/errors/util.py +57 -0
- wandb/errors/warnings.py +2 -0
- wandb/filesync/__init__.py +0 -0
- wandb/filesync/dir_watcher.py +403 -0
- wandb/filesync/stats.py +100 -0
- wandb/filesync/step_checksum.py +142 -0
- wandb/filesync/step_prepare.py +179 -0
- wandb/filesync/step_upload.py +290 -0
- wandb/filesync/upload_job.py +142 -0
- wandb/integration/__init__.py +0 -0
- wandb/integration/catboost/__init__.py +5 -0
- wandb/integration/catboost/catboost.py +178 -0
- wandb/integration/cohere/__init__.py +3 -0
- wandb/integration/cohere/cohere.py +21 -0
- wandb/integration/cohere/resolver.py +347 -0
- wandb/integration/diffusers/__init__.py +3 -0
- wandb/integration/diffusers/autologger.py +76 -0
- wandb/integration/diffusers/pipeline_resolver.py +50 -0
- wandb/integration/diffusers/resolvers/__init__.py +9 -0
- wandb/integration/diffusers/resolvers/multimodal.py +882 -0
- wandb/integration/diffusers/resolvers/utils.py +102 -0
- wandb/integration/fastai/__init__.py +249 -0
- wandb/integration/gym/__init__.py +105 -0
- wandb/integration/huggingface/__init__.py +3 -0
- wandb/integration/huggingface/huggingface.py +18 -0
- wandb/integration/huggingface/resolver.py +213 -0
- wandb/integration/keras/__init__.py +11 -0
- wandb/integration/keras/callbacks/__init__.py +5 -0
- wandb/integration/keras/callbacks/metrics_logger.py +136 -0
- wandb/integration/keras/callbacks/model_checkpoint.py +195 -0
- wandb/integration/keras/callbacks/tables_builder.py +226 -0
- wandb/integration/keras/keras.py +1091 -0
- wandb/integration/kfp/__init__.py +6 -0
- wandb/integration/kfp/helpers.py +28 -0
- wandb/integration/kfp/kfp_patch.py +324 -0
- wandb/integration/kfp/wandb_logging.py +182 -0
- wandb/integration/langchain/__init__.py +3 -0
- wandb/integration/langchain/wandb_tracer.py +48 -0
- wandb/integration/lightgbm/__init__.py +239 -0
- wandb/integration/lightning/__init__.py +0 -0
- wandb/integration/lightning/fabric/__init__.py +3 -0
- wandb/integration/lightning/fabric/logger.py +762 -0
- wandb/integration/magic.py +556 -0
- wandb/integration/metaflow/__init__.py +3 -0
- wandb/integration/metaflow/metaflow.py +383 -0
- wandb/integration/openai/__init__.py +3 -0
- wandb/integration/openai/fine_tuning.py +480 -0
- wandb/integration/openai/openai.py +22 -0
- wandb/integration/openai/resolver.py +240 -0
- wandb/integration/prodigy/__init__.py +3 -0
- wandb/integration/prodigy/prodigy.py +299 -0
- wandb/integration/sacred/__init__.py +117 -0
- wandb/integration/sagemaker/__init__.py +12 -0
- wandb/integration/sagemaker/auth.py +28 -0
- wandb/integration/sagemaker/config.py +49 -0
- wandb/integration/sagemaker/files.py +3 -0
- wandb/integration/sagemaker/resources.py +34 -0
- wandb/integration/sb3/__init__.py +3 -0
- wandb/integration/sb3/sb3.py +153 -0
- wandb/integration/sklearn/__init__.py +37 -0
- wandb/integration/sklearn/calculate/__init__.py +32 -0
- wandb/integration/sklearn/calculate/calibration_curves.py +125 -0
- wandb/integration/sklearn/calculate/class_proportions.py +68 -0
- wandb/integration/sklearn/calculate/confusion_matrix.py +93 -0
- wandb/integration/sklearn/calculate/decision_boundaries.py +40 -0
- wandb/integration/sklearn/calculate/elbow_curve.py +55 -0
- wandb/integration/sklearn/calculate/feature_importances.py +67 -0
- wandb/integration/sklearn/calculate/learning_curve.py +64 -0
- wandb/integration/sklearn/calculate/outlier_candidates.py +69 -0
- wandb/integration/sklearn/calculate/residuals.py +86 -0
- wandb/integration/sklearn/calculate/silhouette.py +118 -0
- wandb/integration/sklearn/calculate/summary_metrics.py +62 -0
- wandb/integration/sklearn/plot/__init__.py +35 -0
- wandb/integration/sklearn/plot/classifier.py +329 -0
- wandb/integration/sklearn/plot/clusterer.py +146 -0
- wandb/integration/sklearn/plot/regressor.py +121 -0
- wandb/integration/sklearn/plot/shared.py +91 -0
- wandb/integration/sklearn/utils.py +183 -0
- wandb/integration/tensorboard/__init__.py +10 -0
- wandb/integration/tensorboard/log.py +355 -0
- wandb/integration/tensorboard/monkeypatch.py +185 -0
- wandb/integration/tensorflow/__init__.py +5 -0
- wandb/integration/tensorflow/estimator_hook.py +54 -0
- wandb/integration/torch/__init__.py +0 -0
- wandb/integration/torch/wandb_torch.py +554 -0
- wandb/integration/ultralytics/__init__.py +11 -0
- wandb/integration/ultralytics/bbox_utils.py +208 -0
- wandb/integration/ultralytics/callback.py +524 -0
- wandb/integration/ultralytics/classification_utils.py +83 -0
- wandb/integration/ultralytics/mask_utils.py +202 -0
- wandb/integration/ultralytics/pose_utils.py +103 -0
- wandb/integration/xgboost/__init__.py +11 -0
- wandb/integration/xgboost/xgboost.py +189 -0
- wandb/integration/yolov8/__init__.py +0 -0
- wandb/integration/yolov8/yolov8.py +284 -0
- wandb/jupyter.py +515 -0
- wandb/magic.py +3 -0
- wandb/mpmain/__init__.py +0 -0
- wandb/mpmain/__main__.py +1 -0
- wandb/old/__init__.py +0 -0
- wandb/old/core.py +53 -0
- wandb/old/settings.py +173 -0
- wandb/old/summary.py +440 -0
- wandb/plot/__init__.py +19 -0
- wandb/plot/bar.py +45 -0
- wandb/plot/confusion_matrix.py +100 -0
- wandb/plot/histogram.py +39 -0
- wandb/plot/line.py +43 -0
- wandb/plot/line_series.py +88 -0
- wandb/plot/pr_curve.py +136 -0
- wandb/plot/roc_curve.py +118 -0
- wandb/plot/scatter.py +32 -0
- wandb/plot/utils.py +183 -0
- wandb/plot/viz.py +123 -0
- wandb/proto/__init__.py +0 -0
- wandb/proto/v3/__init__.py +0 -0
- wandb/proto/v3/wandb_base_pb2.py +55 -0
- wandb/proto/v3/wandb_internal_pb2.py +1608 -0
- wandb/proto/v3/wandb_server_pb2.py +208 -0
- wandb/proto/v3/wandb_settings_pb2.py +112 -0
- wandb/proto/v3/wandb_telemetry_pb2.py +106 -0
- wandb/proto/v4/__init__.py +0 -0
- wandb/proto/v4/wandb_base_pb2.py +30 -0
- wandb/proto/v4/wandb_internal_pb2.py +360 -0
- wandb/proto/v4/wandb_server_pb2.py +63 -0
- wandb/proto/v4/wandb_settings_pb2.py +45 -0
- wandb/proto/v4/wandb_telemetry_pb2.py +41 -0
- wandb/proto/v5/wandb_base_pb2.py +31 -0
- wandb/proto/v5/wandb_internal_pb2.py +361 -0
- wandb/proto/v5/wandb_server_pb2.py +64 -0
- wandb/proto/v5/wandb_settings_pb2.py +46 -0
- wandb/proto/v5/wandb_telemetry_pb2.py +42 -0
- wandb/proto/wandb_base_pb2.py +10 -0
- wandb/proto/wandb_deprecated.py +53 -0
- wandb/proto/wandb_generate_deprecated.py +34 -0
- wandb/proto/wandb_generate_proto.py +49 -0
- wandb/proto/wandb_internal_pb2.py +16 -0
- wandb/proto/wandb_server_pb2.py +10 -0
- wandb/proto/wandb_settings_pb2.py +10 -0
- wandb/proto/wandb_telemetry_pb2.py +10 -0
- wandb/py.typed +0 -0
- wandb/sdk/__init__.py +37 -0
- wandb/sdk/artifacts/__init__.py +0 -0
- wandb/sdk/artifacts/_validators.py +90 -0
- wandb/sdk/artifacts/artifact.py +2389 -0
- wandb/sdk/artifacts/artifact_download_logger.py +43 -0
- wandb/sdk/artifacts/artifact_file_cache.py +253 -0
- wandb/sdk/artifacts/artifact_instance_cache.py +17 -0
- wandb/sdk/artifacts/artifact_manifest.py +74 -0
- wandb/sdk/artifacts/artifact_manifest_entry.py +249 -0
- wandb/sdk/artifacts/artifact_manifests/__init__.py +0 -0
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +92 -0
- wandb/sdk/artifacts/artifact_saver.py +269 -0
- wandb/sdk/artifacts/artifact_state.py +11 -0
- wandb/sdk/artifacts/artifact_ttl.py +7 -0
- wandb/sdk/artifacts/exceptions.py +57 -0
- wandb/sdk/artifacts/staging.py +25 -0
- wandb/sdk/artifacts/storage_handler.py +62 -0
- wandb/sdk/artifacts/storage_handlers/__init__.py +0 -0
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +208 -0
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +228 -0
- wandb/sdk/artifacts/storage_handlers/http_handler.py +114 -0
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +141 -0
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +56 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +300 -0
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +72 -0
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +135 -0
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +74 -0
- wandb/sdk/artifacts/storage_layout.py +6 -0
- wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
- wandb/sdk/artifacts/storage_policies/register.py +1 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +378 -0
- wandb/sdk/artifacts/storage_policy.py +72 -0
- wandb/sdk/backend/__init__.py +0 -0
- wandb/sdk/backend/backend.py +222 -0
- wandb/sdk/data_types/__init__.py +0 -0
- wandb/sdk/data_types/_dtypes.py +914 -0
- wandb/sdk/data_types/_private.py +10 -0
- wandb/sdk/data_types/audio.py +165 -0
- wandb/sdk/data_types/base_types/__init__.py +0 -0
- wandb/sdk/data_types/base_types/json_metadata.py +55 -0
- wandb/sdk/data_types/base_types/media.py +315 -0
- wandb/sdk/data_types/base_types/wb_value.py +272 -0
- wandb/sdk/data_types/bokeh.py +70 -0
- wandb/sdk/data_types/graph.py +405 -0
- wandb/sdk/data_types/helper_types/__init__.py +0 -0
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +295 -0
- wandb/sdk/data_types/helper_types/classes.py +159 -0
- wandb/sdk/data_types/helper_types/image_mask.py +235 -0
- wandb/sdk/data_types/histogram.py +96 -0
- wandb/sdk/data_types/html.py +115 -0
- wandb/sdk/data_types/image.py +845 -0
- wandb/sdk/data_types/molecule.py +241 -0
- wandb/sdk/data_types/object_3d.py +474 -0
- wandb/sdk/data_types/plotly.py +82 -0
- wandb/sdk/data_types/saved_model.py +446 -0
- wandb/sdk/data_types/table.py +1204 -0
- wandb/sdk/data_types/trace_tree.py +438 -0
- wandb/sdk/data_types/utils.py +229 -0
- wandb/sdk/data_types/video.py +247 -0
- wandb/sdk/integration_utils/__init__.py +0 -0
- wandb/sdk/integration_utils/auto_logging.py +239 -0
- wandb/sdk/integration_utils/data_logging.py +475 -0
- wandb/sdk/interface/__init__.py +0 -0
- wandb/sdk/interface/constants.py +4 -0
- wandb/sdk/interface/interface.py +972 -0
- wandb/sdk/interface/interface_queue.py +59 -0
- wandb/sdk/interface/interface_relay.py +53 -0
- wandb/sdk/interface/interface_shared.py +537 -0
- wandb/sdk/interface/interface_sock.py +61 -0
- wandb/sdk/interface/message_future.py +27 -0
- wandb/sdk/interface/message_future_poll.py +50 -0
- wandb/sdk/interface/router.py +118 -0
- wandb/sdk/interface/router_queue.py +44 -0
- wandb/sdk/interface/router_relay.py +39 -0
- wandb/sdk/interface/router_sock.py +36 -0
- wandb/sdk/interface/summary_record.py +67 -0
- wandb/sdk/internal/__init__.py +0 -0
- wandb/sdk/internal/context.py +89 -0
- wandb/sdk/internal/datastore.py +297 -0
- wandb/sdk/internal/file_pusher.py +181 -0
- wandb/sdk/internal/file_stream.py +695 -0
- wandb/sdk/internal/flow_control.py +263 -0
- wandb/sdk/internal/handler.py +901 -0
- wandb/sdk/internal/internal.py +417 -0
- wandb/sdk/internal/internal_api.py +4358 -0
- wandb/sdk/internal/internal_util.py +100 -0
- wandb/sdk/internal/job_builder.py +629 -0
- wandb/sdk/internal/profiler.py +78 -0
- wandb/sdk/internal/progress.py +83 -0
- wandb/sdk/internal/run.py +25 -0
- wandb/sdk/internal/sample.py +70 -0
- wandb/sdk/internal/sender.py +1686 -0
- wandb/sdk/internal/sender_config.py +197 -0
- wandb/sdk/internal/settings_static.py +90 -0
- wandb/sdk/internal/system/__init__.py +0 -0
- wandb/sdk/internal/system/assets/__init__.py +27 -0
- wandb/sdk/internal/system/assets/aggregators.py +37 -0
- wandb/sdk/internal/system/assets/asset_registry.py +20 -0
- wandb/sdk/internal/system/assets/cpu.py +163 -0
- wandb/sdk/internal/system/assets/disk.py +210 -0
- wandb/sdk/internal/system/assets/gpu.py +416 -0
- wandb/sdk/internal/system/assets/gpu_amd.py +239 -0
- wandb/sdk/internal/system/assets/gpu_apple.py +177 -0
- wandb/sdk/internal/system/assets/interfaces.py +207 -0
- wandb/sdk/internal/system/assets/ipu.py +177 -0
- wandb/sdk/internal/system/assets/memory.py +166 -0
- wandb/sdk/internal/system/assets/network.py +125 -0
- wandb/sdk/internal/system/assets/open_metrics.py +299 -0
- wandb/sdk/internal/system/assets/tpu.py +154 -0
- wandb/sdk/internal/system/assets/trainium.py +399 -0
- wandb/sdk/internal/system/env_probe_helpers.py +13 -0
- wandb/sdk/internal/system/system_info.py +249 -0
- wandb/sdk/internal/system/system_monitor.py +229 -0
- wandb/sdk/internal/tb_watcher.py +518 -0
- wandb/sdk/internal/thread_local_settings.py +18 -0
- wandb/sdk/internal/writer.py +206 -0
- wandb/sdk/launch/__init__.py +14 -0
- wandb/sdk/launch/_launch.py +330 -0
- wandb/sdk/launch/_launch_add.py +255 -0
- wandb/sdk/launch/_project_spec.py +566 -0
- wandb/sdk/launch/agent/__init__.py +5 -0
- wandb/sdk/launch/agent/agent.py +924 -0
- wandb/sdk/launch/agent/config.py +296 -0
- wandb/sdk/launch/agent/job_status_tracker.py +53 -0
- wandb/sdk/launch/agent/run_queue_item_file_saver.py +45 -0
- wandb/sdk/launch/builder/__init__.py +0 -0
- wandb/sdk/launch/builder/abstract.py +156 -0
- wandb/sdk/launch/builder/build.py +297 -0
- wandb/sdk/launch/builder/context_manager.py +235 -0
- wandb/sdk/launch/builder/docker_builder.py +177 -0
- wandb/sdk/launch/builder/kaniko_builder.py +595 -0
- wandb/sdk/launch/builder/noop.py +58 -0
- wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +188 -0
- wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
- wandb/sdk/launch/create_job.py +528 -0
- wandb/sdk/launch/environment/abstract.py +29 -0
- wandb/sdk/launch/environment/aws_environment.py +322 -0
- wandb/sdk/launch/environment/azure_environment.py +105 -0
- wandb/sdk/launch/environment/gcp_environment.py +335 -0
- wandb/sdk/launch/environment/local_environment.py +66 -0
- wandb/sdk/launch/errors.py +19 -0
- wandb/sdk/launch/git_reference.py +109 -0
- wandb/sdk/launch/inputs/files.py +148 -0
- wandb/sdk/launch/inputs/internal.py +315 -0
- wandb/sdk/launch/inputs/manage.py +113 -0
- wandb/sdk/launch/inputs/schema.py +39 -0
- wandb/sdk/launch/loader.py +249 -0
- wandb/sdk/launch/registry/abstract.py +48 -0
- wandb/sdk/launch/registry/anon.py +29 -0
- wandb/sdk/launch/registry/azure_container_registry.py +124 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +192 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +219 -0
- wandb/sdk/launch/registry/local_registry.py +67 -0
- wandb/sdk/launch/runner/__init__.py +0 -0
- wandb/sdk/launch/runner/abstract.py +195 -0
- wandb/sdk/launch/runner/kubernetes_monitor.py +474 -0
- wandb/sdk/launch/runner/kubernetes_runner.py +963 -0
- wandb/sdk/launch/runner/local_container.py +301 -0
- wandb/sdk/launch/runner/local_process.py +78 -0
- wandb/sdk/launch/runner/sagemaker_runner.py +426 -0
- wandb/sdk/launch/runner/vertex_runner.py +230 -0
- wandb/sdk/launch/sweeps/__init__.py +39 -0
- wandb/sdk/launch/sweeps/scheduler.py +742 -0
- wandb/sdk/launch/sweeps/scheduler_sweep.py +91 -0
- wandb/sdk/launch/sweeps/utils.py +316 -0
- wandb/sdk/launch/utils.py +746 -0
- wandb/sdk/launch/wandb_reference.py +138 -0
- wandb/sdk/lib/__init__.py +5 -0
- wandb/sdk/lib/_settings_toposort_generate.py +159 -0
- wandb/sdk/lib/_settings_toposort_generated.py +250 -0
- wandb/sdk/lib/_wburls_generate.py +25 -0
- wandb/sdk/lib/_wburls_generated.py +22 -0
- wandb/sdk/lib/apikey.py +273 -0
- wandb/sdk/lib/capped_dict.py +26 -0
- wandb/sdk/lib/config_util.py +101 -0
- wandb/sdk/lib/credentials.py +141 -0
- wandb/sdk/lib/deprecate.py +42 -0
- wandb/sdk/lib/disabled.py +29 -0
- wandb/sdk/lib/exit_hooks.py +54 -0
- wandb/sdk/lib/file_stream_utils.py +118 -0
- wandb/sdk/lib/filenames.py +64 -0
- wandb/sdk/lib/filesystem.py +372 -0
- wandb/sdk/lib/fsm.py +174 -0
- wandb/sdk/lib/gitlib.py +239 -0
- wandb/sdk/lib/gql_request.py +65 -0
- wandb/sdk/lib/handler_util.py +21 -0
- wandb/sdk/lib/hashutil.py +84 -0
- wandb/sdk/lib/import_hooks.py +275 -0
- wandb/sdk/lib/ipython.py +146 -0
- wandb/sdk/lib/json_util.py +80 -0
- wandb/sdk/lib/lazyloader.py +63 -0
- wandb/sdk/lib/mailbox.py +460 -0
- wandb/sdk/lib/module.py +69 -0
- wandb/sdk/lib/paths.py +106 -0
- wandb/sdk/lib/preinit.py +42 -0
- wandb/sdk/lib/printer.py +313 -0
- wandb/sdk/lib/proto_util.py +90 -0
- wandb/sdk/lib/redirect.py +845 -0
- wandb/sdk/lib/reporting.py +99 -0
- wandb/sdk/lib/retry.py +289 -0
- wandb/sdk/lib/run_moment.py +78 -0
- wandb/sdk/lib/runid.py +12 -0
- wandb/sdk/lib/server.py +52 -0
- wandb/sdk/lib/service_connection.py +216 -0
- wandb/sdk/lib/service_token.py +94 -0
- wandb/sdk/lib/sock_client.py +295 -0
- wandb/sdk/lib/sparkline.py +45 -0
- wandb/sdk/lib/telemetry.py +100 -0
- wandb/sdk/lib/timed_input.py +133 -0
- wandb/sdk/lib/timer.py +19 -0
- wandb/sdk/lib/tracelog.py +255 -0
- wandb/sdk/lib/wburls.py +46 -0
- wandb/sdk/service/__init__.py +0 -0
- wandb/sdk/service/_startup_debug.py +22 -0
- wandb/sdk/service/port_file.py +53 -0
- wandb/sdk/service/server.py +116 -0
- wandb/sdk/service/server_sock.py +276 -0
- wandb/sdk/service/service.py +242 -0
- wandb/sdk/service/streams.py +417 -0
- wandb/sdk/verify/__init__.py +0 -0
- wandb/sdk/verify/verify.py +501 -0
- wandb/sdk/wandb_alerts.py +12 -0
- wandb/sdk/wandb_config.py +322 -0
- wandb/sdk/wandb_helper.py +54 -0
- wandb/sdk/wandb_init.py +1266 -0
- wandb/sdk/wandb_login.py +349 -0
- wandb/sdk/wandb_metric.py +110 -0
- wandb/sdk/wandb_require.py +97 -0
- wandb/sdk/wandb_require_helpers.py +44 -0
- wandb/sdk/wandb_run.py +4236 -0
- wandb/sdk/wandb_settings.py +2001 -0
- wandb/sdk/wandb_setup.py +409 -0
- wandb/sdk/wandb_summary.py +150 -0
- wandb/sdk/wandb_sweep.py +119 -0
- wandb/sdk/wandb_sync.py +81 -0
- wandb/sdk/wandb_watch.py +144 -0
- wandb/sklearn.py +35 -0
- wandb/sync/__init__.py +3 -0
- wandb/sync/sync.py +443 -0
- wandb/trigger.py +29 -0
- wandb/util.py +1956 -0
- wandb/vendor/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/setup.py +40 -0
- wandb/vendor/gql-0.2.0/tests/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/tests/starwars/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/tests/starwars/fixtures.py +96 -0
- wandb/vendor/gql-0.2.0/tests/starwars/schema.py +146 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_dsl.py +293 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_query.py +355 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_validation.py +171 -0
- wandb/vendor/gql-0.2.0/tests/test_client.py +31 -0
- wandb/vendor/gql-0.2.0/tests/test_transport.py +89 -0
- wandb/vendor/gql-0.2.0/wandb_gql/__init__.py +4 -0
- wandb/vendor/gql-0.2.0/wandb_gql/client.py +75 -0
- wandb/vendor/gql-0.2.0/wandb_gql/dsl.py +152 -0
- wandb/vendor/gql-0.2.0/wandb_gql/gql.py +10 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/http.py +6 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/local_schema.py +15 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/requests.py +46 -0
- wandb/vendor/gql-0.2.0/wandb_gql/utils.py +21 -0
- wandb/vendor/graphql-core-1.1/setup.py +86 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/__init__.py +287 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/__init__.py +6 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/base.py +42 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/format_error.py +11 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/located_error.py +29 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/syntax_error.py +36 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/__init__.py +26 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/base.py +311 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executor.py +398 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/asyncio.py +53 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/gevent.py +22 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/process.py +32 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/sync.py +7 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/thread.py +35 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/utils.py +6 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/executor.py +66 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/fragment.py +252 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/resolver.py +151 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/utils.py +7 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/middleware.py +57 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/values.py +145 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/graphql.py +60 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/ast.py +1349 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/base.py +19 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/lexer.py +435 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/location.py +30 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/parser.py +779 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/printer.py +193 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/source.py +18 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor.py +222 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor_meta.py +82 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/cached_property.py +17 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/contain_subset.py +28 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/default_ordered_dict.py +40 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/ordereddict.py +8 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/pair_set.py +43 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/version.py +78 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/__init__.py +67 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/definition.py +619 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/directives.py +132 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/introspection.py +440 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/scalars.py +131 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/schema.py +100 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/typemap.py +145 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/assert_valid_name.py +9 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_from_value.py +65 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_code.py +49 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_dict.py +24 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/base.py +75 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_ast_schema.py +291 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_client_schema.py +250 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/concat_ast.py +9 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/extend_schema.py +357 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_field_def.py +27 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_operation_ast.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/introspection_query.py +90 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_literal_value.py +67 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_value.py +66 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/quoted_or_list.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/schema_printer.py +168 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/suggestion_list.py +56 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_comparators.py +69 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_from_ast.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_info.py +149 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/value_from_ast.py +69 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/__init__.py +4 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/__init__.py +79 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/arguments_of_correct_type.py +24 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/base.py +8 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/default_values_of_correct_type.py +44 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fields_on_correct_type.py +113 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fragments_on_composite_types.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_argument_names.py +70 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_directives.py +97 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_fragment_names.py +19 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_type_names.py +43 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/lone_anonymous_operation.py +23 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_fragment_cycles.py +59 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_undefined_variables.py +36 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_fragments.py +38 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_variables.py +37 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/overlapping_fields_can_be_merged.py +529 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/possible_fragment_spreads.py +44 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/provided_non_null_arguments.py +46 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/scalar_leafs.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_argument_names.py +32 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_fragment_names.py +28 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_input_field_names.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_operation_names.py +31 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_variable_names.py +27 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_are_input_types.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_in_allowed_position.py +53 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/validation.py +158 -0
- wandb/vendor/promise-2.3.0/conftest.py +30 -0
- wandb/vendor/promise-2.3.0/setup.py +64 -0
- wandb/vendor/promise-2.3.0/tests/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/tests/conftest.py +8 -0
- wandb/vendor/promise-2.3.0/tests/test_awaitable.py +32 -0
- wandb/vendor/promise-2.3.0/tests/test_awaitable_35.py +47 -0
- wandb/vendor/promise-2.3.0/tests/test_benchmark.py +116 -0
- wandb/vendor/promise-2.3.0/tests/test_complex_threads.py +23 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader.py +452 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader_awaitable_35.py +99 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader_extra.py +65 -0
- wandb/vendor/promise-2.3.0/tests/test_extra.py +670 -0
- wandb/vendor/promise-2.3.0/tests/test_issues.py +132 -0
- wandb/vendor/promise-2.3.0/tests/test_promise_list.py +70 -0
- wandb/vendor/promise-2.3.0/tests/test_spec.py +584 -0
- wandb/vendor/promise-2.3.0/tests/test_thread_safety.py +115 -0
- wandb/vendor/promise-2.3.0/tests/utils.py +3 -0
- wandb/vendor/promise-2.3.0/wandb_promise/__init__.py +38 -0
- wandb/vendor/promise-2.3.0/wandb_promise/async_.py +135 -0
- wandb/vendor/promise-2.3.0/wandb_promise/compat.py +32 -0
- wandb/vendor/promise-2.3.0/wandb_promise/dataloader.py +326 -0
- wandb/vendor/promise-2.3.0/wandb_promise/iterate_promise.py +12 -0
- wandb/vendor/promise-2.3.0/wandb_promise/promise.py +848 -0
- wandb/vendor/promise-2.3.0/wandb_promise/promise_list.py +151 -0
- wandb/vendor/promise-2.3.0/wandb_promise/pyutils/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/wandb_promise/pyutils/version.py +83 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/asyncio.py +22 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/gevent.py +21 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/immediate.py +27 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/thread.py +18 -0
- wandb/vendor/promise-2.3.0/wandb_promise/utils.py +56 -0
- wandb/vendor/pygments/__init__.py +90 -0
- wandb/vendor/pygments/cmdline.py +568 -0
- wandb/vendor/pygments/console.py +74 -0
- wandb/vendor/pygments/filter.py +74 -0
- wandb/vendor/pygments/filters/__init__.py +350 -0
- wandb/vendor/pygments/formatter.py +95 -0
- wandb/vendor/pygments/formatters/__init__.py +153 -0
- wandb/vendor/pygments/formatters/_mapping.py +85 -0
- wandb/vendor/pygments/formatters/bbcode.py +109 -0
- wandb/vendor/pygments/formatters/html.py +851 -0
- wandb/vendor/pygments/formatters/img.py +600 -0
- wandb/vendor/pygments/formatters/irc.py +182 -0
- wandb/vendor/pygments/formatters/latex.py +482 -0
- wandb/vendor/pygments/formatters/other.py +160 -0
- wandb/vendor/pygments/formatters/rtf.py +147 -0
- wandb/vendor/pygments/formatters/svg.py +153 -0
- wandb/vendor/pygments/formatters/terminal.py +136 -0
- wandb/vendor/pygments/formatters/terminal256.py +309 -0
- wandb/vendor/pygments/lexer.py +871 -0
- wandb/vendor/pygments/lexers/__init__.py +329 -0
- wandb/vendor/pygments/lexers/_asy_builtins.py +1645 -0
- wandb/vendor/pygments/lexers/_cl_builtins.py +232 -0
- wandb/vendor/pygments/lexers/_cocoa_builtins.py +72 -0
- wandb/vendor/pygments/lexers/_csound_builtins.py +1346 -0
- wandb/vendor/pygments/lexers/_lasso_builtins.py +5327 -0
- wandb/vendor/pygments/lexers/_lua_builtins.py +295 -0
- wandb/vendor/pygments/lexers/_mapping.py +500 -0
- wandb/vendor/pygments/lexers/_mql_builtins.py +1172 -0
- wandb/vendor/pygments/lexers/_openedge_builtins.py +2547 -0
- wandb/vendor/pygments/lexers/_php_builtins.py +4756 -0
- wandb/vendor/pygments/lexers/_postgres_builtins.py +621 -0
- wandb/vendor/pygments/lexers/_scilab_builtins.py +3094 -0
- wandb/vendor/pygments/lexers/_sourcemod_builtins.py +1163 -0
- wandb/vendor/pygments/lexers/_stan_builtins.py +532 -0
- wandb/vendor/pygments/lexers/_stata_builtins.py +419 -0
- wandb/vendor/pygments/lexers/_tsql_builtins.py +1004 -0
- wandb/vendor/pygments/lexers/_vim_builtins.py +1939 -0
- wandb/vendor/pygments/lexers/actionscript.py +240 -0
- wandb/vendor/pygments/lexers/agile.py +24 -0
- wandb/vendor/pygments/lexers/algebra.py +221 -0
- wandb/vendor/pygments/lexers/ambient.py +76 -0
- wandb/vendor/pygments/lexers/ampl.py +87 -0
- wandb/vendor/pygments/lexers/apl.py +101 -0
- wandb/vendor/pygments/lexers/archetype.py +318 -0
- wandb/vendor/pygments/lexers/asm.py +641 -0
- wandb/vendor/pygments/lexers/automation.py +374 -0
- wandb/vendor/pygments/lexers/basic.py +500 -0
- wandb/vendor/pygments/lexers/bibtex.py +160 -0
- wandb/vendor/pygments/lexers/business.py +612 -0
- wandb/vendor/pygments/lexers/c_cpp.py +252 -0
- wandb/vendor/pygments/lexers/c_like.py +541 -0
- wandb/vendor/pygments/lexers/capnproto.py +78 -0
- wandb/vendor/pygments/lexers/chapel.py +102 -0
- wandb/vendor/pygments/lexers/clean.py +288 -0
- wandb/vendor/pygments/lexers/compiled.py +34 -0
- wandb/vendor/pygments/lexers/configs.py +833 -0
- wandb/vendor/pygments/lexers/console.py +114 -0
- wandb/vendor/pygments/lexers/crystal.py +393 -0
- wandb/vendor/pygments/lexers/csound.py +366 -0
- wandb/vendor/pygments/lexers/css.py +689 -0
- wandb/vendor/pygments/lexers/d.py +251 -0
- wandb/vendor/pygments/lexers/dalvik.py +125 -0
- wandb/vendor/pygments/lexers/data.py +555 -0
- wandb/vendor/pygments/lexers/diff.py +165 -0
- wandb/vendor/pygments/lexers/dotnet.py +691 -0
- wandb/vendor/pygments/lexers/dsls.py +878 -0
- wandb/vendor/pygments/lexers/dylan.py +289 -0
- wandb/vendor/pygments/lexers/ecl.py +125 -0
- wandb/vendor/pygments/lexers/eiffel.py +65 -0
- wandb/vendor/pygments/lexers/elm.py +121 -0
- wandb/vendor/pygments/lexers/erlang.py +533 -0
- wandb/vendor/pygments/lexers/esoteric.py +277 -0
- wandb/vendor/pygments/lexers/ezhil.py +69 -0
- wandb/vendor/pygments/lexers/factor.py +344 -0
- wandb/vendor/pygments/lexers/fantom.py +250 -0
- wandb/vendor/pygments/lexers/felix.py +273 -0
- wandb/vendor/pygments/lexers/forth.py +177 -0
- wandb/vendor/pygments/lexers/fortran.py +205 -0
- wandb/vendor/pygments/lexers/foxpro.py +428 -0
- wandb/vendor/pygments/lexers/functional.py +21 -0
- wandb/vendor/pygments/lexers/go.py +101 -0
- wandb/vendor/pygments/lexers/grammar_notation.py +213 -0
- wandb/vendor/pygments/lexers/graph.py +80 -0
- wandb/vendor/pygments/lexers/graphics.py +553 -0
- wandb/vendor/pygments/lexers/haskell.py +843 -0
- wandb/vendor/pygments/lexers/haxe.py +936 -0
- wandb/vendor/pygments/lexers/hdl.py +382 -0
- wandb/vendor/pygments/lexers/hexdump.py +103 -0
- wandb/vendor/pygments/lexers/html.py +602 -0
- wandb/vendor/pygments/lexers/idl.py +270 -0
- wandb/vendor/pygments/lexers/igor.py +288 -0
- wandb/vendor/pygments/lexers/inferno.py +96 -0
- wandb/vendor/pygments/lexers/installers.py +322 -0
- wandb/vendor/pygments/lexers/int_fiction.py +1343 -0
- wandb/vendor/pygments/lexers/iolang.py +63 -0
- wandb/vendor/pygments/lexers/j.py +146 -0
- wandb/vendor/pygments/lexers/javascript.py +1525 -0
- wandb/vendor/pygments/lexers/julia.py +333 -0
- wandb/vendor/pygments/lexers/jvm.py +1573 -0
- wandb/vendor/pygments/lexers/lisp.py +2621 -0
- wandb/vendor/pygments/lexers/make.py +202 -0
- wandb/vendor/pygments/lexers/markup.py +595 -0
- wandb/vendor/pygments/lexers/math.py +21 -0
- wandb/vendor/pygments/lexers/matlab.py +663 -0
- wandb/vendor/pygments/lexers/ml.py +769 -0
- wandb/vendor/pygments/lexers/modeling.py +358 -0
- wandb/vendor/pygments/lexers/modula2.py +1561 -0
- wandb/vendor/pygments/lexers/monte.py +204 -0
- wandb/vendor/pygments/lexers/ncl.py +894 -0
- wandb/vendor/pygments/lexers/nimrod.py +159 -0
- wandb/vendor/pygments/lexers/nit.py +64 -0
- wandb/vendor/pygments/lexers/nix.py +136 -0
- wandb/vendor/pygments/lexers/oberon.py +105 -0
- wandb/vendor/pygments/lexers/objective.py +504 -0
- wandb/vendor/pygments/lexers/ooc.py +85 -0
- wandb/vendor/pygments/lexers/other.py +41 -0
- wandb/vendor/pygments/lexers/parasail.py +79 -0
- wandb/vendor/pygments/lexers/parsers.py +835 -0
- wandb/vendor/pygments/lexers/pascal.py +644 -0
- wandb/vendor/pygments/lexers/pawn.py +199 -0
- wandb/vendor/pygments/lexers/perl.py +620 -0
- wandb/vendor/pygments/lexers/php.py +267 -0
- wandb/vendor/pygments/lexers/praat.py +294 -0
- wandb/vendor/pygments/lexers/prolog.py +306 -0
- wandb/vendor/pygments/lexers/python.py +939 -0
- wandb/vendor/pygments/lexers/qvt.py +152 -0
- wandb/vendor/pygments/lexers/r.py +453 -0
- wandb/vendor/pygments/lexers/rdf.py +270 -0
- wandb/vendor/pygments/lexers/rebol.py +431 -0
- wandb/vendor/pygments/lexers/resource.py +85 -0
- wandb/vendor/pygments/lexers/rnc.py +67 -0
- wandb/vendor/pygments/lexers/roboconf.py +82 -0
- wandb/vendor/pygments/lexers/robotframework.py +560 -0
- wandb/vendor/pygments/lexers/ruby.py +519 -0
- wandb/vendor/pygments/lexers/rust.py +220 -0
- wandb/vendor/pygments/lexers/sas.py +228 -0
- wandb/vendor/pygments/lexers/scripting.py +1222 -0
- wandb/vendor/pygments/lexers/shell.py +794 -0
- wandb/vendor/pygments/lexers/smalltalk.py +195 -0
- wandb/vendor/pygments/lexers/smv.py +79 -0
- wandb/vendor/pygments/lexers/snobol.py +83 -0
- wandb/vendor/pygments/lexers/special.py +103 -0
- wandb/vendor/pygments/lexers/sql.py +681 -0
- wandb/vendor/pygments/lexers/stata.py +108 -0
- wandb/vendor/pygments/lexers/supercollider.py +90 -0
- wandb/vendor/pygments/lexers/tcl.py +145 -0
- wandb/vendor/pygments/lexers/templates.py +2283 -0
- wandb/vendor/pygments/lexers/testing.py +207 -0
- wandb/vendor/pygments/lexers/text.py +25 -0
- wandb/vendor/pygments/lexers/textedit.py +169 -0
- wandb/vendor/pygments/lexers/textfmts.py +297 -0
- wandb/vendor/pygments/lexers/theorem.py +458 -0
- wandb/vendor/pygments/lexers/trafficscript.py +54 -0
- wandb/vendor/pygments/lexers/typoscript.py +226 -0
- wandb/vendor/pygments/lexers/urbi.py +133 -0
- wandb/vendor/pygments/lexers/varnish.py +190 -0
- wandb/vendor/pygments/lexers/verification.py +111 -0
- wandb/vendor/pygments/lexers/web.py +24 -0
- wandb/vendor/pygments/lexers/webmisc.py +988 -0
- wandb/vendor/pygments/lexers/whiley.py +116 -0
- wandb/vendor/pygments/lexers/x10.py +69 -0
- wandb/vendor/pygments/modeline.py +44 -0
- wandb/vendor/pygments/plugin.py +68 -0
- wandb/vendor/pygments/regexopt.py +92 -0
- wandb/vendor/pygments/scanner.py +105 -0
- wandb/vendor/pygments/sphinxext.py +158 -0
- wandb/vendor/pygments/style.py +155 -0
- wandb/vendor/pygments/styles/__init__.py +80 -0
- wandb/vendor/pygments/styles/abap.py +29 -0
- wandb/vendor/pygments/styles/algol.py +63 -0
- wandb/vendor/pygments/styles/algol_nu.py +63 -0
- wandb/vendor/pygments/styles/arduino.py +98 -0
- wandb/vendor/pygments/styles/autumn.py +65 -0
- wandb/vendor/pygments/styles/borland.py +51 -0
- wandb/vendor/pygments/styles/bw.py +49 -0
- wandb/vendor/pygments/styles/colorful.py +81 -0
- wandb/vendor/pygments/styles/default.py +73 -0
- wandb/vendor/pygments/styles/emacs.py +72 -0
- wandb/vendor/pygments/styles/friendly.py +72 -0
- wandb/vendor/pygments/styles/fruity.py +42 -0
- wandb/vendor/pygments/styles/igor.py +29 -0
- wandb/vendor/pygments/styles/lovelace.py +97 -0
- wandb/vendor/pygments/styles/manni.py +75 -0
- wandb/vendor/pygments/styles/monokai.py +106 -0
- wandb/vendor/pygments/styles/murphy.py +80 -0
- wandb/vendor/pygments/styles/native.py +65 -0
- wandb/vendor/pygments/styles/paraiso_dark.py +125 -0
- wandb/vendor/pygments/styles/paraiso_light.py +125 -0
- wandb/vendor/pygments/styles/pastie.py +75 -0
- wandb/vendor/pygments/styles/perldoc.py +69 -0
- wandb/vendor/pygments/styles/rainbow_dash.py +89 -0
- wandb/vendor/pygments/styles/rrt.py +33 -0
- wandb/vendor/pygments/styles/sas.py +44 -0
- wandb/vendor/pygments/styles/stata.py +40 -0
- wandb/vendor/pygments/styles/tango.py +141 -0
- wandb/vendor/pygments/styles/trac.py +63 -0
- wandb/vendor/pygments/styles/vim.py +63 -0
- wandb/vendor/pygments/styles/vs.py +38 -0
- wandb/vendor/pygments/styles/xcode.py +51 -0
- wandb/vendor/pygments/token.py +213 -0
- wandb/vendor/pygments/unistring.py +217 -0
- wandb/vendor/pygments/util.py +388 -0
- wandb/vendor/pynvml/__init__.py +0 -0
- wandb/vendor/pynvml/pynvml.py +4779 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/__init__.py +17 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/events.py +615 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/__init__.py +98 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/api.py +369 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents.py +172 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents2.py +239 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify.py +218 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_buffer.py +81 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_c.py +575 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/kqueue.py +730 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/polling.py +145 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/read_directory_changes.py +133 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/winapi.py +348 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/patterns.py +265 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/tricks/__init__.py +174 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/__init__.py +151 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/bricks.py +249 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/compat.py +29 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/decorators.py +198 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/delayed_queue.py +88 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/dirsnapshot.py +293 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/echo.py +157 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/event_backport.py +41 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/importlib2.py +40 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/platform.py +57 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/unicode_paths.py +64 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/win32stat.py +123 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/version.py +28 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/watchmedo.py +577 -0
- wandb/wandb_agent.py +588 -0
- wandb/wandb_controller.py +721 -0
- wandb/wandb_run.py +9 -0
- wandb-0.18.2.dist-info/METADATA +213 -0
- wandb-0.18.2.dist-info/RECORD +827 -0
- wandb-0.18.2.dist-info/WHEEL +5 -0
- wandb-0.18.2.dist-info/entry_points.txt +3 -0
- wandb-0.18.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,426 @@
|
|
1
|
+
"""Implementation of the SageMakerRunner class."""
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
import logging
|
5
|
+
from typing import Any, Dict, List, Optional, cast
|
6
|
+
|
7
|
+
if False:
|
8
|
+
import boto3 # type: ignore
|
9
|
+
|
10
|
+
import wandb
|
11
|
+
from wandb.apis.internal import Api
|
12
|
+
from wandb.sdk.launch.environment.aws_environment import AwsEnvironment
|
13
|
+
from wandb.sdk.launch.errors import LaunchError
|
14
|
+
|
15
|
+
from .._project_spec import EntryPoint, LaunchProject
|
16
|
+
from ..registry.abstract import AbstractRegistry
|
17
|
+
from ..utils import (
|
18
|
+
LOG_PREFIX,
|
19
|
+
MAX_ENV_LENGTHS,
|
20
|
+
PROJECT_SYNCHRONOUS,
|
21
|
+
event_loop_thread_exec,
|
22
|
+
to_camel_case,
|
23
|
+
)
|
24
|
+
from .abstract import AbstractRun, AbstractRunner, Status
|
25
|
+
|
26
|
+
_logger = logging.getLogger(__name__)
|
27
|
+
|
28
|
+
|
29
|
+
class SagemakerSubmittedRun(AbstractRun):
|
30
|
+
"""Instance of ``AbstractRun`` corresponding to a subprocess launched to run an entry point command on aws sagemaker."""
|
31
|
+
|
32
|
+
def __init__(
|
33
|
+
self,
|
34
|
+
training_job_name: str,
|
35
|
+
client: "boto3.Client",
|
36
|
+
log_client: Optional["boto3.Client"] = None,
|
37
|
+
) -> None:
|
38
|
+
super().__init__()
|
39
|
+
self.client = client
|
40
|
+
self.log_client = log_client
|
41
|
+
self.training_job_name = training_job_name
|
42
|
+
self._status = Status("running")
|
43
|
+
|
44
|
+
@property
|
45
|
+
def id(self) -> str:
|
46
|
+
return f"sagemaker-{self.training_job_name}"
|
47
|
+
|
48
|
+
async def get_logs(self) -> Optional[str]:
|
49
|
+
if self.log_client is None:
|
50
|
+
return None
|
51
|
+
try:
|
52
|
+
describe_log_streams = event_loop_thread_exec(
|
53
|
+
self.log_client.describe_log_streams
|
54
|
+
)
|
55
|
+
describe_res = await describe_log_streams(
|
56
|
+
logGroupName="/aws/sagemaker/TrainingJobs",
|
57
|
+
logStreamNamePrefix=self.training_job_name,
|
58
|
+
)
|
59
|
+
if len(describe_res["logStreams"]) == 0:
|
60
|
+
wandb.termwarn(
|
61
|
+
f"Failed to get logs for training job: {self.training_job_name}"
|
62
|
+
)
|
63
|
+
return None
|
64
|
+
log_name = describe_res["logStreams"][0]["logStreamName"]
|
65
|
+
get_log_events = event_loop_thread_exec(self.log_client.get_log_events)
|
66
|
+
res = await get_log_events(
|
67
|
+
logGroupName="/aws/sagemaker/TrainingJobs",
|
68
|
+
logStreamName=log_name,
|
69
|
+
)
|
70
|
+
assert "events" in res
|
71
|
+
return "\n".join(
|
72
|
+
[f'{event["timestamp"]}:{event["message"]}' for event in res["events"]]
|
73
|
+
)
|
74
|
+
except self.log_client.exceptions.ResourceNotFoundException:
|
75
|
+
wandb.termwarn(
|
76
|
+
f"Failed to get logs for training job: {self.training_job_name}"
|
77
|
+
)
|
78
|
+
return None
|
79
|
+
except Exception as e:
|
80
|
+
wandb.termwarn(
|
81
|
+
f"Failed to handle logs for training job: {self.training_job_name} with error {str(e)}"
|
82
|
+
)
|
83
|
+
return None
|
84
|
+
|
85
|
+
async def wait(self) -> bool:
|
86
|
+
while True:
|
87
|
+
status_state = (await self.get_status()).state
|
88
|
+
wandb.termlog(
|
89
|
+
f"{LOG_PREFIX}Training job {self.training_job_name} status: {status_state}"
|
90
|
+
)
|
91
|
+
if status_state in ["stopped", "failed", "finished"]:
|
92
|
+
break
|
93
|
+
await asyncio.sleep(5)
|
94
|
+
return status_state == "finished"
|
95
|
+
|
96
|
+
async def cancel(self) -> None:
|
97
|
+
# Interrupt child process if it hasn't already exited
|
98
|
+
status = await self.get_status()
|
99
|
+
if status.state == "running":
|
100
|
+
self.client.stop_training_job(TrainingJobName=self.training_job_name)
|
101
|
+
await self.wait()
|
102
|
+
|
103
|
+
async def get_status(self) -> Status:
|
104
|
+
describe_training_job = event_loop_thread_exec(
|
105
|
+
self.client.describe_training_job
|
106
|
+
)
|
107
|
+
job_status = (
|
108
|
+
await describe_training_job(TrainingJobName=self.training_job_name)
|
109
|
+
)["TrainingJobStatus"]
|
110
|
+
if job_status == "Completed" or job_status == "Stopped":
|
111
|
+
self._status = Status("finished")
|
112
|
+
elif job_status == "Failed":
|
113
|
+
self._status = Status("failed")
|
114
|
+
elif job_status == "Stopping":
|
115
|
+
self._status = Status("stopping")
|
116
|
+
elif job_status == "InProgress":
|
117
|
+
self._status = Status("running")
|
118
|
+
return self._status
|
119
|
+
|
120
|
+
|
121
|
+
class SageMakerRunner(AbstractRunner):
|
122
|
+
"""Runner class, uses a project to create a SagemakerSubmittedRun."""
|
123
|
+
|
124
|
+
def __init__(
|
125
|
+
self,
|
126
|
+
api: Api,
|
127
|
+
backend_config: Dict[str, Any],
|
128
|
+
environment: AwsEnvironment,
|
129
|
+
registry: AbstractRegistry,
|
130
|
+
) -> None:
|
131
|
+
"""Initialize the SagemakerRunner.
|
132
|
+
|
133
|
+
Arguments:
|
134
|
+
api (Api): The API instance.
|
135
|
+
backend_config (Dict[str, Any]): The backend configuration.
|
136
|
+
environment (AwsEnvironment): The AWS environment.
|
137
|
+
|
138
|
+
Raises:
|
139
|
+
LaunchError: If the runner cannot be initialized.
|
140
|
+
"""
|
141
|
+
super().__init__(api, backend_config)
|
142
|
+
self.environment = environment
|
143
|
+
self.registry = registry
|
144
|
+
|
145
|
+
async def run(
|
146
|
+
self,
|
147
|
+
launch_project: LaunchProject,
|
148
|
+
image_uri: str,
|
149
|
+
) -> Optional[AbstractRun]:
|
150
|
+
"""Run a project on Amazon Sagemaker.
|
151
|
+
|
152
|
+
Arguments:
|
153
|
+
launch_project (LaunchProject): The project to run.
|
154
|
+
|
155
|
+
Returns:
|
156
|
+
Optional[AbstractRun]: The run instance.
|
157
|
+
|
158
|
+
Raises:
|
159
|
+
LaunchError: If the launch is unsuccessful.
|
160
|
+
"""
|
161
|
+
_logger.info("using AWSSagemakerRunner")
|
162
|
+
|
163
|
+
given_sagemaker_args = launch_project.resource_args.get("sagemaker")
|
164
|
+
if given_sagemaker_args is None:
|
165
|
+
raise LaunchError(
|
166
|
+
"No sagemaker args specified. Specify sagemaker args in resource_args"
|
167
|
+
)
|
168
|
+
|
169
|
+
default_output_path = self.backend_config.get("runner", {}).get(
|
170
|
+
"s3_output_path"
|
171
|
+
)
|
172
|
+
if default_output_path is not None and not default_output_path.startswith(
|
173
|
+
"s3://"
|
174
|
+
):
|
175
|
+
default_output_path = f"s3://{default_output_path}"
|
176
|
+
|
177
|
+
session = await self.environment.get_session()
|
178
|
+
client = await event_loop_thread_exec(session.client)("sts")
|
179
|
+
caller_id = client.get_caller_identity()
|
180
|
+
account_id = caller_id["Account"]
|
181
|
+
_logger.info(f"Using account ID {account_id}")
|
182
|
+
partition = await self.environment.get_partition()
|
183
|
+
role_arn = get_role_arn(
|
184
|
+
given_sagemaker_args, self.backend_config, account_id, partition
|
185
|
+
)
|
186
|
+
|
187
|
+
# Create a sagemaker client to launch the job.
|
188
|
+
sagemaker_client = session.client("sagemaker")
|
189
|
+
log_client = None
|
190
|
+
try:
|
191
|
+
log_client = session.client("logs")
|
192
|
+
except Exception as e:
|
193
|
+
wandb.termwarn(
|
194
|
+
f"Failed to connect to cloudwatch logs with error {str(e)}, logs will not be available"
|
195
|
+
)
|
196
|
+
|
197
|
+
# if the user provided the image they want to use, use that, but warn it won't have swappable artifacts
|
198
|
+
if (
|
199
|
+
given_sagemaker_args.get("AlgorithmSpecification", {}).get("TrainingImage")
|
200
|
+
is not None
|
201
|
+
):
|
202
|
+
sagemaker_args = build_sagemaker_args(
|
203
|
+
launch_project,
|
204
|
+
self._api,
|
205
|
+
role_arn,
|
206
|
+
launch_project.override_entrypoint,
|
207
|
+
launch_project.override_args,
|
208
|
+
MAX_ENV_LENGTHS[self.__class__.__name__],
|
209
|
+
given_sagemaker_args.get("AlgorithmSpecification", {}).get(
|
210
|
+
"TrainingImage"
|
211
|
+
),
|
212
|
+
default_output_path,
|
213
|
+
)
|
214
|
+
_logger.info(
|
215
|
+
f"Launching sagemaker job on user supplied image with args: {sagemaker_args}"
|
216
|
+
)
|
217
|
+
run = await launch_sagemaker_job(
|
218
|
+
launch_project, sagemaker_args, sagemaker_client, log_client
|
219
|
+
)
|
220
|
+
if self.backend_config[PROJECT_SYNCHRONOUS]:
|
221
|
+
await run.wait()
|
222
|
+
return run
|
223
|
+
|
224
|
+
_logger.info("Connecting to sagemaker client")
|
225
|
+
entry_point = (
|
226
|
+
launch_project.override_entrypoint or launch_project.get_job_entry_point()
|
227
|
+
)
|
228
|
+
command_args = []
|
229
|
+
if entry_point is not None:
|
230
|
+
command_args += entry_point.command
|
231
|
+
command_args += launch_project.override_args
|
232
|
+
if command_args:
|
233
|
+
command_str = " ".join(command_args)
|
234
|
+
wandb.termlog(
|
235
|
+
f"{LOG_PREFIX}Launching run on sagemaker with entrypoint: {command_str}"
|
236
|
+
)
|
237
|
+
else:
|
238
|
+
wandb.termlog(
|
239
|
+
f"{LOG_PREFIX}Launching run on sagemaker with user-provided entrypoint in image"
|
240
|
+
)
|
241
|
+
sagemaker_args = build_sagemaker_args(
|
242
|
+
launch_project,
|
243
|
+
self._api,
|
244
|
+
role_arn,
|
245
|
+
entry_point,
|
246
|
+
launch_project.override_args,
|
247
|
+
MAX_ENV_LENGTHS[self.__class__.__name__],
|
248
|
+
image_uri,
|
249
|
+
default_output_path,
|
250
|
+
)
|
251
|
+
_logger.info(f"Launching sagemaker job with args: {sagemaker_args}")
|
252
|
+
run = await launch_sagemaker_job(
|
253
|
+
launch_project, sagemaker_args, sagemaker_client, log_client
|
254
|
+
)
|
255
|
+
if self.backend_config[PROJECT_SYNCHRONOUS]:
|
256
|
+
await run.wait()
|
257
|
+
return run
|
258
|
+
|
259
|
+
|
260
|
+
def merge_image_uri_with_algorithm_specification(
|
261
|
+
algorithm_specification: Optional[Dict[str, Any]],
|
262
|
+
image_uri: Optional[str],
|
263
|
+
entrypoint_command: List[str],
|
264
|
+
args: Optional[List[str]],
|
265
|
+
) -> Dict[str, Any]:
|
266
|
+
"""Create an AWS AlgorithmSpecification.
|
267
|
+
|
268
|
+
AWS Sagemaker algorithms require a training image and an input mode. If the user
|
269
|
+
does not specify the specification themselves, define the spec minimally using these
|
270
|
+
two fields. Otherwise, if they specify the AlgorithmSpecification set the training
|
271
|
+
image if it is not set.
|
272
|
+
"""
|
273
|
+
if algorithm_specification is None:
|
274
|
+
algorithm_specification = {
|
275
|
+
"TrainingImage": image_uri,
|
276
|
+
"TrainingInputMode": "File",
|
277
|
+
}
|
278
|
+
else:
|
279
|
+
if image_uri:
|
280
|
+
algorithm_specification["TrainingImage"] = image_uri
|
281
|
+
if entrypoint_command:
|
282
|
+
algorithm_specification["ContainerEntrypoint"] = entrypoint_command
|
283
|
+
if args:
|
284
|
+
algorithm_specification["ContainerArguments"] = args
|
285
|
+
|
286
|
+
if algorithm_specification["TrainingImage"] is None:
|
287
|
+
raise LaunchError("Failed determine tag for training image")
|
288
|
+
return algorithm_specification
|
289
|
+
|
290
|
+
|
291
|
+
def build_sagemaker_args(
|
292
|
+
launch_project: LaunchProject,
|
293
|
+
api: Api,
|
294
|
+
role_arn: str,
|
295
|
+
entry_point: Optional[EntryPoint],
|
296
|
+
args: Optional[List[str]],
|
297
|
+
max_env_length: int,
|
298
|
+
image_uri: str,
|
299
|
+
default_output_path: Optional[str] = None,
|
300
|
+
) -> Dict[str, Any]:
|
301
|
+
sagemaker_args: Dict[str, Any] = {}
|
302
|
+
resource_args = launch_project.fill_macros(image_uri)
|
303
|
+
given_sagemaker_args: Optional[Dict[str, Any]] = resource_args.get("sagemaker")
|
304
|
+
|
305
|
+
if given_sagemaker_args is None:
|
306
|
+
raise LaunchError(
|
307
|
+
"No sagemaker args specified. Specify sagemaker args in resource_args"
|
308
|
+
)
|
309
|
+
if (
|
310
|
+
given_sagemaker_args.get("OutputDataConfig") is None
|
311
|
+
and default_output_path is not None
|
312
|
+
):
|
313
|
+
sagemaker_args["OutputDataConfig"] = {"S3OutputPath": default_output_path}
|
314
|
+
else:
|
315
|
+
sagemaker_args["OutputDataConfig"] = given_sagemaker_args.get(
|
316
|
+
"OutputDataConfig"
|
317
|
+
)
|
318
|
+
|
319
|
+
if sagemaker_args.get("OutputDataConfig") is None:
|
320
|
+
raise LaunchError(
|
321
|
+
"Sagemaker launcher requires an OutputDataConfig Sagemaker resource argument"
|
322
|
+
)
|
323
|
+
training_job_name = cast(
|
324
|
+
str, (given_sagemaker_args.get("TrainingJobName") or launch_project.run_id)
|
325
|
+
)
|
326
|
+
sagemaker_args["TrainingJobName"] = training_job_name
|
327
|
+
entry_cmd = entry_point.command if entry_point else []
|
328
|
+
|
329
|
+
sagemaker_args["AlgorithmSpecification"] = (
|
330
|
+
merge_image_uri_with_algorithm_specification(
|
331
|
+
given_sagemaker_args.get(
|
332
|
+
"AlgorithmSpecification",
|
333
|
+
given_sagemaker_args.get("algorithm_specification"),
|
334
|
+
),
|
335
|
+
image_uri,
|
336
|
+
entry_cmd,
|
337
|
+
args,
|
338
|
+
)
|
339
|
+
)
|
340
|
+
|
341
|
+
sagemaker_args["RoleArn"] = role_arn
|
342
|
+
|
343
|
+
camel_case_args = {
|
344
|
+
to_camel_case(key): item for key, item in given_sagemaker_args.items()
|
345
|
+
}
|
346
|
+
sagemaker_args = {
|
347
|
+
**camel_case_args,
|
348
|
+
**sagemaker_args,
|
349
|
+
}
|
350
|
+
|
351
|
+
if sagemaker_args.get("ResourceConfig") is None:
|
352
|
+
raise LaunchError(
|
353
|
+
"Sagemaker launcher requires a ResourceConfig resource argument"
|
354
|
+
)
|
355
|
+
|
356
|
+
if sagemaker_args.get("StoppingCondition") is None:
|
357
|
+
raise LaunchError(
|
358
|
+
"Sagemaker launcher requires a StoppingCondition resource argument"
|
359
|
+
)
|
360
|
+
|
361
|
+
given_env = given_sagemaker_args.get(
|
362
|
+
"Environment", sagemaker_args.get("environment", {})
|
363
|
+
)
|
364
|
+
calced_env = launch_project.get_env_vars_dict(api, max_env_length)
|
365
|
+
total_env = {**calced_env, **given_env}
|
366
|
+
sagemaker_args["Environment"] = total_env
|
367
|
+
|
368
|
+
# Add wandb tag
|
369
|
+
tags = sagemaker_args.get("Tags", [])
|
370
|
+
tags.append({"Key": "WandbRunId", "Value": launch_project.run_id})
|
371
|
+
sagemaker_args["Tags"] = tags
|
372
|
+
|
373
|
+
# remove args that were passed in for launch but not passed to sagemaker
|
374
|
+
sagemaker_args.pop("EcrRepoName", None)
|
375
|
+
sagemaker_args.pop("region", None)
|
376
|
+
sagemaker_args.pop("profile", None)
|
377
|
+
|
378
|
+
# clear the args that are None so they are not passed
|
379
|
+
filtered_args = {k: v for k, v in sagemaker_args.items() if v is not None}
|
380
|
+
|
381
|
+
return filtered_args
|
382
|
+
|
383
|
+
|
384
|
+
async def launch_sagemaker_job(
|
385
|
+
launch_project: LaunchProject,
|
386
|
+
sagemaker_args: Dict[str, Any],
|
387
|
+
sagemaker_client: "boto3.Client",
|
388
|
+
log_client: Optional["boto3.Client"] = None,
|
389
|
+
) -> SagemakerSubmittedRun:
|
390
|
+
training_job_name = sagemaker_args.get("TrainingJobName") or launch_project.run_id
|
391
|
+
create_training_job = event_loop_thread_exec(sagemaker_client.create_training_job)
|
392
|
+
resp = await create_training_job(**sagemaker_args)
|
393
|
+
|
394
|
+
if resp.get("TrainingJobArn") is None:
|
395
|
+
raise LaunchError("Failed to create training job when submitting to SageMaker")
|
396
|
+
|
397
|
+
run = SagemakerSubmittedRun(training_job_name, sagemaker_client, log_client)
|
398
|
+
wandb.termlog(
|
399
|
+
f"{LOG_PREFIX}Run job submitted with arn: {resp.get('TrainingJobArn')}"
|
400
|
+
)
|
401
|
+
url = "https://{region}.console.aws.amazon.com/sagemaker/home?region={region}#/jobs/{job_name}".format(
|
402
|
+
region=sagemaker_client.meta.region_name, job_name=training_job_name
|
403
|
+
)
|
404
|
+
wandb.termlog(f"{LOG_PREFIX}See training job status at: {url}")
|
405
|
+
return run
|
406
|
+
|
407
|
+
|
408
|
+
def get_role_arn(
|
409
|
+
sagemaker_args: Dict[str, Any],
|
410
|
+
backend_config: Dict[str, Any],
|
411
|
+
account_id: str,
|
412
|
+
partition: str,
|
413
|
+
) -> str:
|
414
|
+
"""Get the role arn from the sagemaker args or the backend config."""
|
415
|
+
role_arn = sagemaker_args.get("RoleArn") or sagemaker_args.get("role_arn")
|
416
|
+
if role_arn is None:
|
417
|
+
role_arn = backend_config.get("runner", {}).get("role_arn")
|
418
|
+
if role_arn is None or not isinstance(role_arn, str):
|
419
|
+
raise LaunchError(
|
420
|
+
"AWS sagemaker require a string RoleArn set this by adding a `RoleArn` key to the sagemaker"
|
421
|
+
"field of resource_args"
|
422
|
+
)
|
423
|
+
if role_arn.startswith(f"arn:{partition}:iam::"):
|
424
|
+
return role_arn # type: ignore
|
425
|
+
|
426
|
+
return f"arn:{partition}:iam::{account_id}:role/{role_arn}"
|
@@ -0,0 +1,230 @@
|
|
1
|
+
import asyncio
|
2
|
+
import logging
|
3
|
+
from typing import Any, Dict, Optional
|
4
|
+
|
5
|
+
if False:
|
6
|
+
from google.cloud import aiplatform # type: ignore # noqa: F401
|
7
|
+
|
8
|
+
from wandb.apis.internal import Api
|
9
|
+
from wandb.util import get_module
|
10
|
+
|
11
|
+
from .._project_spec import LaunchProject
|
12
|
+
from ..environment.gcp_environment import GcpEnvironment
|
13
|
+
from ..errors import LaunchError
|
14
|
+
from ..registry.abstract import AbstractRegistry
|
15
|
+
from ..utils import MAX_ENV_LENGTHS, PROJECT_SYNCHRONOUS, event_loop_thread_exec
|
16
|
+
from .abstract import AbstractRun, AbstractRunner, Status
|
17
|
+
|
18
|
+
GCP_CONSOLE_URI = "https://console.cloud.google.com"
|
19
|
+
|
20
|
+
_logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
WANDB_RUN_ID_KEY = "wandb-run-id"
|
24
|
+
|
25
|
+
|
26
|
+
class VertexSubmittedRun(AbstractRun):
|
27
|
+
def __init__(self, job: Any) -> None:
|
28
|
+
self._job = job
|
29
|
+
|
30
|
+
@property
|
31
|
+
def id(self) -> str:
|
32
|
+
# numeric ID of the custom training job
|
33
|
+
return self._job.name # type: ignore
|
34
|
+
|
35
|
+
async def get_logs(self) -> Optional[str]:
|
36
|
+
# TODO: implement
|
37
|
+
return None
|
38
|
+
|
39
|
+
@property
|
40
|
+
def name(self) -> str:
|
41
|
+
return self._job.display_name # type: ignore
|
42
|
+
|
43
|
+
@property
|
44
|
+
def gcp_region(self) -> str:
|
45
|
+
return self._job.location # type: ignore
|
46
|
+
|
47
|
+
@property
|
48
|
+
def gcp_project(self) -> str:
|
49
|
+
return self._job.project # type: ignore
|
50
|
+
|
51
|
+
def get_page_link(self) -> str:
|
52
|
+
return "{console_uri}/vertex-ai/locations/{region}/training/{job_id}?project={project}".format(
|
53
|
+
console_uri=GCP_CONSOLE_URI,
|
54
|
+
region=self.gcp_region,
|
55
|
+
job_id=self.id,
|
56
|
+
project=self.gcp_project,
|
57
|
+
)
|
58
|
+
|
59
|
+
async def wait(self) -> bool:
|
60
|
+
# TODO: run this in a separate thread.
|
61
|
+
await self._job.wait()
|
62
|
+
return (await self.get_status()).state == "finished"
|
63
|
+
|
64
|
+
async def get_status(self) -> Status:
|
65
|
+
job_state = str(self._job.state) # extract from type PipelineState
|
66
|
+
if job_state == "JobState.JOB_STATE_SUCCEEDED":
|
67
|
+
return Status("finished")
|
68
|
+
if job_state == "JobState.JOB_STATE_FAILED":
|
69
|
+
return Status("failed")
|
70
|
+
if job_state == "JobState.JOB_STATE_RUNNING":
|
71
|
+
return Status("running")
|
72
|
+
if job_state == "JobState.JOB_STATE_PENDING":
|
73
|
+
return Status("starting")
|
74
|
+
return Status("unknown")
|
75
|
+
|
76
|
+
async def cancel(self) -> None:
|
77
|
+
self._job.cancel()
|
78
|
+
|
79
|
+
|
80
|
+
class VertexRunner(AbstractRunner):
|
81
|
+
"""Runner class, uses a project to create a VertexSubmittedRun."""
|
82
|
+
|
83
|
+
def __init__(
|
84
|
+
self,
|
85
|
+
api: Api,
|
86
|
+
backend_config: Dict[str, Any],
|
87
|
+
environment: GcpEnvironment,
|
88
|
+
registry: AbstractRegistry,
|
89
|
+
) -> None:
|
90
|
+
"""Initialize a VertexRunner instance."""
|
91
|
+
super().__init__(api, backend_config)
|
92
|
+
self.environment = environment
|
93
|
+
self.registry = registry
|
94
|
+
|
95
|
+
async def run(
|
96
|
+
self, launch_project: LaunchProject, image_uri: str
|
97
|
+
) -> Optional[AbstractRun]:
|
98
|
+
"""Run a Vertex job."""
|
99
|
+
full_resource_args = launch_project.fill_macros(image_uri)
|
100
|
+
resource_args = full_resource_args.get("vertex")
|
101
|
+
# We support setting under gcp-vertex for historical reasons.
|
102
|
+
if not resource_args:
|
103
|
+
resource_args = full_resource_args.get("gcp-vertex")
|
104
|
+
if not resource_args:
|
105
|
+
raise LaunchError(
|
106
|
+
"No Vertex resource args specified. Specify args via --resource-args with a JSON file or string under top-level key gcp_vertex"
|
107
|
+
)
|
108
|
+
|
109
|
+
spec_args = resource_args.get("spec", {})
|
110
|
+
run_args = resource_args.get("run", {})
|
111
|
+
|
112
|
+
synchronous: bool = self.backend_config[PROJECT_SYNCHRONOUS]
|
113
|
+
|
114
|
+
entry_point = (
|
115
|
+
launch_project.override_entrypoint or launch_project.get_job_entry_point()
|
116
|
+
)
|
117
|
+
|
118
|
+
# TODO: Set entrypoint in each container
|
119
|
+
entry_cmd = []
|
120
|
+
if entry_point is not None:
|
121
|
+
entry_cmd += entry_point.command
|
122
|
+
entry_cmd += launch_project.override_args
|
123
|
+
|
124
|
+
env_vars = launch_project.get_env_vars_dict(
|
125
|
+
api=self._api,
|
126
|
+
max_env_length=MAX_ENV_LENGTHS[self.__class__.__name__],
|
127
|
+
)
|
128
|
+
|
129
|
+
worker_specs = spec_args.get("worker_pool_specs", [])
|
130
|
+
if not worker_specs:
|
131
|
+
raise LaunchError(
|
132
|
+
"Vertex requires at least one worker pool spec. Please specify "
|
133
|
+
"a worker pool spec in resource arguments under the key "
|
134
|
+
"`vertex.spec.worker_pool_specs`."
|
135
|
+
)
|
136
|
+
|
137
|
+
# TODO: Add entrypoint + args to each worker pool spec
|
138
|
+
for spec in worker_specs:
|
139
|
+
if not spec.get("container_spec"):
|
140
|
+
raise LaunchError(
|
141
|
+
"Vertex requires a container spec for each worker pool spec. "
|
142
|
+
"Please specify a container spec in resource arguments under "
|
143
|
+
"the key `vertex.spec.worker_pool_specs[].container_spec`."
|
144
|
+
)
|
145
|
+
spec["container_spec"]["command"] = entry_cmd
|
146
|
+
|
147
|
+
# Add our env vars to user supplied env vars
|
148
|
+
env = spec["container_spec"].get("env", [])
|
149
|
+
env.extend(
|
150
|
+
[{"name": key, "value": value} for key, value in env_vars.items()]
|
151
|
+
)
|
152
|
+
spec["container_spec"]["env"] = env
|
153
|
+
|
154
|
+
if not spec_args.get("staging_bucket"):
|
155
|
+
raise LaunchError(
|
156
|
+
"Vertex requires a staging bucket. Please specify a staging bucket "
|
157
|
+
"in resource arguments under the key `vertex.spec.staging_bucket`."
|
158
|
+
)
|
159
|
+
|
160
|
+
_logger.info("Launching Vertex job...")
|
161
|
+
submitted_run = await launch_vertex_job(
|
162
|
+
launch_project,
|
163
|
+
spec_args,
|
164
|
+
run_args,
|
165
|
+
self.environment,
|
166
|
+
synchronous,
|
167
|
+
)
|
168
|
+
return submitted_run
|
169
|
+
|
170
|
+
|
171
|
+
async def launch_vertex_job(
|
172
|
+
launch_project: LaunchProject,
|
173
|
+
spec_args: Dict[str, Any],
|
174
|
+
run_args: Dict[str, Any],
|
175
|
+
environment: GcpEnvironment,
|
176
|
+
synchronous: bool = False,
|
177
|
+
) -> VertexSubmittedRun:
|
178
|
+
try:
|
179
|
+
await environment.verify()
|
180
|
+
aiplatform = get_module( # noqa: F811
|
181
|
+
"google.cloud.aiplatform",
|
182
|
+
"VertexRunner requires google.cloud.aiplatform to be installed",
|
183
|
+
)
|
184
|
+
init = event_loop_thread_exec(aiplatform.init)
|
185
|
+
await init(
|
186
|
+
project=environment.project,
|
187
|
+
location=environment.region,
|
188
|
+
staging_bucket=spec_args.get("staging_bucket"),
|
189
|
+
credentials=await environment.get_credentials(),
|
190
|
+
)
|
191
|
+
labels = spec_args.get("labels", {})
|
192
|
+
labels[WANDB_RUN_ID_KEY] = launch_project.run_id
|
193
|
+
job = aiplatform.CustomJob(
|
194
|
+
display_name=launch_project.name,
|
195
|
+
worker_pool_specs=spec_args.get("worker_pool_specs"),
|
196
|
+
base_output_dir=spec_args.get("base_output_dir"),
|
197
|
+
encryption_spec_key_name=spec_args.get("encryption_spec_key_name"),
|
198
|
+
labels=labels,
|
199
|
+
)
|
200
|
+
execution_kwargs = dict(
|
201
|
+
timeout=run_args.get("timeout"),
|
202
|
+
service_account=run_args.get("service_account"),
|
203
|
+
network=run_args.get("network"),
|
204
|
+
enable_web_access=run_args.get("enable_web_access", False),
|
205
|
+
experiment=run_args.get("experiment"),
|
206
|
+
experiment_run=run_args.get("experiment_run"),
|
207
|
+
tensorboard=run_args.get("tensorboard"),
|
208
|
+
restart_job_on_worker_restart=run_args.get(
|
209
|
+
"restart_job_on_worker_restart", False
|
210
|
+
),
|
211
|
+
)
|
212
|
+
# Unclear if there are exceptions that can be thrown where we should
|
213
|
+
# retry instead of erroring. For now, just catch all exceptions and they
|
214
|
+
# go to the UI for the user to interpret.
|
215
|
+
except Exception as e:
|
216
|
+
raise LaunchError(f"Failed to create Vertex job: {e}")
|
217
|
+
|
218
|
+
if synchronous:
|
219
|
+
run = event_loop_thread_exec(job.run)
|
220
|
+
await run(**execution_kwargs, sync=True)
|
221
|
+
else:
|
222
|
+
submit = event_loop_thread_exec(job.submit)
|
223
|
+
await submit(**execution_kwargs)
|
224
|
+
submitted_run = VertexSubmittedRun(job)
|
225
|
+
interval = 1
|
226
|
+
while not getattr(job._gca_resource, "name", None):
|
227
|
+
# give time for the gcp job object to be created and named, this should only loop a couple times max
|
228
|
+
await asyncio.sleep(interval)
|
229
|
+
interval = min(30, interval * 2)
|
230
|
+
return submitted_run
|
@@ -0,0 +1,39 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Any, Callable, Dict
|
3
|
+
|
4
|
+
log = logging.getLogger(__name__)
|
5
|
+
|
6
|
+
|
7
|
+
class SchedulerError(Exception):
|
8
|
+
"""Raised when a known error occurs with wandb sweep scheduler."""
|
9
|
+
|
10
|
+
pass
|
11
|
+
|
12
|
+
|
13
|
+
def _import_sweep_scheduler() -> Any:
|
14
|
+
from .scheduler_sweep import SweepScheduler
|
15
|
+
|
16
|
+
return SweepScheduler
|
17
|
+
|
18
|
+
|
19
|
+
_WANDB_SCHEDULERS: Dict[str, Callable] = {
|
20
|
+
"wandb": _import_sweep_scheduler,
|
21
|
+
}
|
22
|
+
|
23
|
+
|
24
|
+
def load_scheduler(scheduler_type: str) -> Any:
|
25
|
+
scheduler_type = scheduler_type.lower()
|
26
|
+
if scheduler_type not in _WANDB_SCHEDULERS:
|
27
|
+
raise SchedulerError(
|
28
|
+
f"The `scheduler_name` argument must be one of "
|
29
|
+
f"{list(_WANDB_SCHEDULERS.keys())}, got: {scheduler_type}"
|
30
|
+
)
|
31
|
+
|
32
|
+
log.warn(f"Loading dependencies for Scheduler of type: {scheduler_type}")
|
33
|
+
import_func = _WANDB_SCHEDULERS[scheduler_type]
|
34
|
+
return import_func()
|
35
|
+
|
36
|
+
|
37
|
+
__all__ = [
|
38
|
+
"load_scheduler",
|
39
|
+
]
|