wandb 0.18.2__py3-none-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package_readme.md +89 -0
- wandb/__init__.py +245 -0
- wandb/__init__.pyi +1139 -0
- wandb/__main__.py +3 -0
- wandb/_globals.py +19 -0
- wandb/agents/__init__.py +0 -0
- wandb/agents/pyagent.py +363 -0
- wandb/analytics/__init__.py +3 -0
- wandb/analytics/sentry.py +266 -0
- wandb/apis/__init__.py +48 -0
- wandb/apis/attrs.py +40 -0
- wandb/apis/importers/__init__.py +1 -0
- wandb/apis/importers/internals/internal.py +385 -0
- wandb/apis/importers/internals/protocols.py +99 -0
- wandb/apis/importers/internals/util.py +78 -0
- wandb/apis/importers/mlflow.py +254 -0
- wandb/apis/importers/validation.py +108 -0
- wandb/apis/importers/wandb.py +1603 -0
- wandb/apis/internal.py +232 -0
- wandb/apis/normalize.py +89 -0
- wandb/apis/paginator.py +81 -0
- wandb/apis/public/__init__.py +34 -0
- wandb/apis/public/api.py +1305 -0
- wandb/apis/public/artifacts.py +1090 -0
- wandb/apis/public/const.py +4 -0
- wandb/apis/public/files.py +195 -0
- wandb/apis/public/history.py +149 -0
- wandb/apis/public/jobs.py +659 -0
- wandb/apis/public/projects.py +154 -0
- wandb/apis/public/query_generator.py +166 -0
- wandb/apis/public/reports.py +469 -0
- wandb/apis/public/runs.py +914 -0
- wandb/apis/public/sweeps.py +240 -0
- wandb/apis/public/teams.py +198 -0
- wandb/apis/public/users.py +136 -0
- wandb/apis/reports/__init__.py +1 -0
- wandb/apis/reports/v1/__init__.py +8 -0
- wandb/apis/reports/v2/__init__.py +8 -0
- wandb/apis/workspaces/__init__.py +8 -0
- wandb/beta/workflows.py +288 -0
- wandb/bin/nvidia_gpu_stats +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/__init__.py +0 -0
- wandb/cli/cli.py +3004 -0
- wandb/data_types.py +63 -0
- wandb/docker/__init__.py +342 -0
- wandb/docker/auth.py +436 -0
- wandb/docker/wandb-entrypoint.sh +33 -0
- wandb/docker/www_authenticate.py +94 -0
- wandb/env.py +514 -0
- wandb/errors/__init__.py +17 -0
- wandb/errors/errors.py +37 -0
- wandb/errors/term.py +103 -0
- wandb/errors/util.py +57 -0
- wandb/errors/warnings.py +2 -0
- wandb/filesync/__init__.py +0 -0
- wandb/filesync/dir_watcher.py +403 -0
- wandb/filesync/stats.py +100 -0
- wandb/filesync/step_checksum.py +142 -0
- wandb/filesync/step_prepare.py +179 -0
- wandb/filesync/step_upload.py +290 -0
- wandb/filesync/upload_job.py +142 -0
- wandb/integration/__init__.py +0 -0
- wandb/integration/catboost/__init__.py +5 -0
- wandb/integration/catboost/catboost.py +178 -0
- wandb/integration/cohere/__init__.py +3 -0
- wandb/integration/cohere/cohere.py +21 -0
- wandb/integration/cohere/resolver.py +347 -0
- wandb/integration/diffusers/__init__.py +3 -0
- wandb/integration/diffusers/autologger.py +76 -0
- wandb/integration/diffusers/pipeline_resolver.py +50 -0
- wandb/integration/diffusers/resolvers/__init__.py +9 -0
- wandb/integration/diffusers/resolvers/multimodal.py +882 -0
- wandb/integration/diffusers/resolvers/utils.py +102 -0
- wandb/integration/fastai/__init__.py +249 -0
- wandb/integration/gym/__init__.py +105 -0
- wandb/integration/huggingface/__init__.py +3 -0
- wandb/integration/huggingface/huggingface.py +18 -0
- wandb/integration/huggingface/resolver.py +213 -0
- wandb/integration/keras/__init__.py +11 -0
- wandb/integration/keras/callbacks/__init__.py +5 -0
- wandb/integration/keras/callbacks/metrics_logger.py +136 -0
- wandb/integration/keras/callbacks/model_checkpoint.py +195 -0
- wandb/integration/keras/callbacks/tables_builder.py +226 -0
- wandb/integration/keras/keras.py +1091 -0
- wandb/integration/kfp/__init__.py +6 -0
- wandb/integration/kfp/helpers.py +28 -0
- wandb/integration/kfp/kfp_patch.py +324 -0
- wandb/integration/kfp/wandb_logging.py +182 -0
- wandb/integration/langchain/__init__.py +3 -0
- wandb/integration/langchain/wandb_tracer.py +48 -0
- wandb/integration/lightgbm/__init__.py +239 -0
- wandb/integration/lightning/__init__.py +0 -0
- wandb/integration/lightning/fabric/__init__.py +3 -0
- wandb/integration/lightning/fabric/logger.py +762 -0
- wandb/integration/magic.py +556 -0
- wandb/integration/metaflow/__init__.py +3 -0
- wandb/integration/metaflow/metaflow.py +383 -0
- wandb/integration/openai/__init__.py +3 -0
- wandb/integration/openai/fine_tuning.py +480 -0
- wandb/integration/openai/openai.py +22 -0
- wandb/integration/openai/resolver.py +240 -0
- wandb/integration/prodigy/__init__.py +3 -0
- wandb/integration/prodigy/prodigy.py +299 -0
- wandb/integration/sacred/__init__.py +117 -0
- wandb/integration/sagemaker/__init__.py +12 -0
- wandb/integration/sagemaker/auth.py +28 -0
- wandb/integration/sagemaker/config.py +49 -0
- wandb/integration/sagemaker/files.py +3 -0
- wandb/integration/sagemaker/resources.py +34 -0
- wandb/integration/sb3/__init__.py +3 -0
- wandb/integration/sb3/sb3.py +153 -0
- wandb/integration/sklearn/__init__.py +37 -0
- wandb/integration/sklearn/calculate/__init__.py +32 -0
- wandb/integration/sklearn/calculate/calibration_curves.py +125 -0
- wandb/integration/sklearn/calculate/class_proportions.py +68 -0
- wandb/integration/sklearn/calculate/confusion_matrix.py +93 -0
- wandb/integration/sklearn/calculate/decision_boundaries.py +40 -0
- wandb/integration/sklearn/calculate/elbow_curve.py +55 -0
- wandb/integration/sklearn/calculate/feature_importances.py +67 -0
- wandb/integration/sklearn/calculate/learning_curve.py +64 -0
- wandb/integration/sklearn/calculate/outlier_candidates.py +69 -0
- wandb/integration/sklearn/calculate/residuals.py +86 -0
- wandb/integration/sklearn/calculate/silhouette.py +118 -0
- wandb/integration/sklearn/calculate/summary_metrics.py +62 -0
- wandb/integration/sklearn/plot/__init__.py +35 -0
- wandb/integration/sklearn/plot/classifier.py +329 -0
- wandb/integration/sklearn/plot/clusterer.py +146 -0
- wandb/integration/sklearn/plot/regressor.py +121 -0
- wandb/integration/sklearn/plot/shared.py +91 -0
- wandb/integration/sklearn/utils.py +183 -0
- wandb/integration/tensorboard/__init__.py +10 -0
- wandb/integration/tensorboard/log.py +355 -0
- wandb/integration/tensorboard/monkeypatch.py +185 -0
- wandb/integration/tensorflow/__init__.py +5 -0
- wandb/integration/tensorflow/estimator_hook.py +54 -0
- wandb/integration/torch/__init__.py +0 -0
- wandb/integration/torch/wandb_torch.py +554 -0
- wandb/integration/ultralytics/__init__.py +11 -0
- wandb/integration/ultralytics/bbox_utils.py +208 -0
- wandb/integration/ultralytics/callback.py +524 -0
- wandb/integration/ultralytics/classification_utils.py +83 -0
- wandb/integration/ultralytics/mask_utils.py +202 -0
- wandb/integration/ultralytics/pose_utils.py +103 -0
- wandb/integration/xgboost/__init__.py +11 -0
- wandb/integration/xgboost/xgboost.py +189 -0
- wandb/integration/yolov8/__init__.py +0 -0
- wandb/integration/yolov8/yolov8.py +284 -0
- wandb/jupyter.py +515 -0
- wandb/magic.py +3 -0
- wandb/mpmain/__init__.py +0 -0
- wandb/mpmain/__main__.py +1 -0
- wandb/old/__init__.py +0 -0
- wandb/old/core.py +53 -0
- wandb/old/settings.py +173 -0
- wandb/old/summary.py +440 -0
- wandb/plot/__init__.py +19 -0
- wandb/plot/bar.py +45 -0
- wandb/plot/confusion_matrix.py +100 -0
- wandb/plot/histogram.py +39 -0
- wandb/plot/line.py +43 -0
- wandb/plot/line_series.py +88 -0
- wandb/plot/pr_curve.py +136 -0
- wandb/plot/roc_curve.py +118 -0
- wandb/plot/scatter.py +32 -0
- wandb/plot/utils.py +183 -0
- wandb/plot/viz.py +123 -0
- wandb/proto/__init__.py +0 -0
- wandb/proto/v3/__init__.py +0 -0
- wandb/proto/v3/wandb_base_pb2.py +55 -0
- wandb/proto/v3/wandb_internal_pb2.py +1608 -0
- wandb/proto/v3/wandb_server_pb2.py +208 -0
- wandb/proto/v3/wandb_settings_pb2.py +112 -0
- wandb/proto/v3/wandb_telemetry_pb2.py +106 -0
- wandb/proto/v4/__init__.py +0 -0
- wandb/proto/v4/wandb_base_pb2.py +30 -0
- wandb/proto/v4/wandb_internal_pb2.py +360 -0
- wandb/proto/v4/wandb_server_pb2.py +63 -0
- wandb/proto/v4/wandb_settings_pb2.py +45 -0
- wandb/proto/v4/wandb_telemetry_pb2.py +41 -0
- wandb/proto/v5/wandb_base_pb2.py +31 -0
- wandb/proto/v5/wandb_internal_pb2.py +361 -0
- wandb/proto/v5/wandb_server_pb2.py +64 -0
- wandb/proto/v5/wandb_settings_pb2.py +46 -0
- wandb/proto/v5/wandb_telemetry_pb2.py +42 -0
- wandb/proto/wandb_base_pb2.py +10 -0
- wandb/proto/wandb_deprecated.py +53 -0
- wandb/proto/wandb_generate_deprecated.py +34 -0
- wandb/proto/wandb_generate_proto.py +49 -0
- wandb/proto/wandb_internal_pb2.py +16 -0
- wandb/proto/wandb_server_pb2.py +10 -0
- wandb/proto/wandb_settings_pb2.py +10 -0
- wandb/proto/wandb_telemetry_pb2.py +10 -0
- wandb/py.typed +0 -0
- wandb/sdk/__init__.py +37 -0
- wandb/sdk/artifacts/__init__.py +0 -0
- wandb/sdk/artifacts/_validators.py +90 -0
- wandb/sdk/artifacts/artifact.py +2389 -0
- wandb/sdk/artifacts/artifact_download_logger.py +43 -0
- wandb/sdk/artifacts/artifact_file_cache.py +253 -0
- wandb/sdk/artifacts/artifact_instance_cache.py +17 -0
- wandb/sdk/artifacts/artifact_manifest.py +74 -0
- wandb/sdk/artifacts/artifact_manifest_entry.py +249 -0
- wandb/sdk/artifacts/artifact_manifests/__init__.py +0 -0
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +92 -0
- wandb/sdk/artifacts/artifact_saver.py +269 -0
- wandb/sdk/artifacts/artifact_state.py +11 -0
- wandb/sdk/artifacts/artifact_ttl.py +7 -0
- wandb/sdk/artifacts/exceptions.py +57 -0
- wandb/sdk/artifacts/staging.py +25 -0
- wandb/sdk/artifacts/storage_handler.py +62 -0
- wandb/sdk/artifacts/storage_handlers/__init__.py +0 -0
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +208 -0
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +228 -0
- wandb/sdk/artifacts/storage_handlers/http_handler.py +114 -0
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +141 -0
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +56 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +300 -0
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +72 -0
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +135 -0
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +74 -0
- wandb/sdk/artifacts/storage_layout.py +6 -0
- wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
- wandb/sdk/artifacts/storage_policies/register.py +1 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +378 -0
- wandb/sdk/artifacts/storage_policy.py +72 -0
- wandb/sdk/backend/__init__.py +0 -0
- wandb/sdk/backend/backend.py +222 -0
- wandb/sdk/data_types/__init__.py +0 -0
- wandb/sdk/data_types/_dtypes.py +914 -0
- wandb/sdk/data_types/_private.py +10 -0
- wandb/sdk/data_types/audio.py +165 -0
- wandb/sdk/data_types/base_types/__init__.py +0 -0
- wandb/sdk/data_types/base_types/json_metadata.py +55 -0
- wandb/sdk/data_types/base_types/media.py +315 -0
- wandb/sdk/data_types/base_types/wb_value.py +272 -0
- wandb/sdk/data_types/bokeh.py +70 -0
- wandb/sdk/data_types/graph.py +405 -0
- wandb/sdk/data_types/helper_types/__init__.py +0 -0
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +295 -0
- wandb/sdk/data_types/helper_types/classes.py +159 -0
- wandb/sdk/data_types/helper_types/image_mask.py +235 -0
- wandb/sdk/data_types/histogram.py +96 -0
- wandb/sdk/data_types/html.py +115 -0
- wandb/sdk/data_types/image.py +845 -0
- wandb/sdk/data_types/molecule.py +241 -0
- wandb/sdk/data_types/object_3d.py +474 -0
- wandb/sdk/data_types/plotly.py +82 -0
- wandb/sdk/data_types/saved_model.py +446 -0
- wandb/sdk/data_types/table.py +1204 -0
- wandb/sdk/data_types/trace_tree.py +438 -0
- wandb/sdk/data_types/utils.py +229 -0
- wandb/sdk/data_types/video.py +247 -0
- wandb/sdk/integration_utils/__init__.py +0 -0
- wandb/sdk/integration_utils/auto_logging.py +239 -0
- wandb/sdk/integration_utils/data_logging.py +475 -0
- wandb/sdk/interface/__init__.py +0 -0
- wandb/sdk/interface/constants.py +4 -0
- wandb/sdk/interface/interface.py +972 -0
- wandb/sdk/interface/interface_queue.py +59 -0
- wandb/sdk/interface/interface_relay.py +53 -0
- wandb/sdk/interface/interface_shared.py +537 -0
- wandb/sdk/interface/interface_sock.py +61 -0
- wandb/sdk/interface/message_future.py +27 -0
- wandb/sdk/interface/message_future_poll.py +50 -0
- wandb/sdk/interface/router.py +118 -0
- wandb/sdk/interface/router_queue.py +44 -0
- wandb/sdk/interface/router_relay.py +39 -0
- wandb/sdk/interface/router_sock.py +36 -0
- wandb/sdk/interface/summary_record.py +67 -0
- wandb/sdk/internal/__init__.py +0 -0
- wandb/sdk/internal/context.py +89 -0
- wandb/sdk/internal/datastore.py +297 -0
- wandb/sdk/internal/file_pusher.py +181 -0
- wandb/sdk/internal/file_stream.py +695 -0
- wandb/sdk/internal/flow_control.py +263 -0
- wandb/sdk/internal/handler.py +901 -0
- wandb/sdk/internal/internal.py +417 -0
- wandb/sdk/internal/internal_api.py +4358 -0
- wandb/sdk/internal/internal_util.py +100 -0
- wandb/sdk/internal/job_builder.py +629 -0
- wandb/sdk/internal/profiler.py +78 -0
- wandb/sdk/internal/progress.py +83 -0
- wandb/sdk/internal/run.py +25 -0
- wandb/sdk/internal/sample.py +70 -0
- wandb/sdk/internal/sender.py +1686 -0
- wandb/sdk/internal/sender_config.py +197 -0
- wandb/sdk/internal/settings_static.py +90 -0
- wandb/sdk/internal/system/__init__.py +0 -0
- wandb/sdk/internal/system/assets/__init__.py +27 -0
- wandb/sdk/internal/system/assets/aggregators.py +37 -0
- wandb/sdk/internal/system/assets/asset_registry.py +20 -0
- wandb/sdk/internal/system/assets/cpu.py +163 -0
- wandb/sdk/internal/system/assets/disk.py +210 -0
- wandb/sdk/internal/system/assets/gpu.py +416 -0
- wandb/sdk/internal/system/assets/gpu_amd.py +239 -0
- wandb/sdk/internal/system/assets/gpu_apple.py +177 -0
- wandb/sdk/internal/system/assets/interfaces.py +207 -0
- wandb/sdk/internal/system/assets/ipu.py +177 -0
- wandb/sdk/internal/system/assets/memory.py +166 -0
- wandb/sdk/internal/system/assets/network.py +125 -0
- wandb/sdk/internal/system/assets/open_metrics.py +299 -0
- wandb/sdk/internal/system/assets/tpu.py +154 -0
- wandb/sdk/internal/system/assets/trainium.py +399 -0
- wandb/sdk/internal/system/env_probe_helpers.py +13 -0
- wandb/sdk/internal/system/system_info.py +249 -0
- wandb/sdk/internal/system/system_monitor.py +229 -0
- wandb/sdk/internal/tb_watcher.py +518 -0
- wandb/sdk/internal/thread_local_settings.py +18 -0
- wandb/sdk/internal/writer.py +206 -0
- wandb/sdk/launch/__init__.py +14 -0
- wandb/sdk/launch/_launch.py +330 -0
- wandb/sdk/launch/_launch_add.py +255 -0
- wandb/sdk/launch/_project_spec.py +566 -0
- wandb/sdk/launch/agent/__init__.py +5 -0
- wandb/sdk/launch/agent/agent.py +924 -0
- wandb/sdk/launch/agent/config.py +296 -0
- wandb/sdk/launch/agent/job_status_tracker.py +53 -0
- wandb/sdk/launch/agent/run_queue_item_file_saver.py +45 -0
- wandb/sdk/launch/builder/__init__.py +0 -0
- wandb/sdk/launch/builder/abstract.py +156 -0
- wandb/sdk/launch/builder/build.py +297 -0
- wandb/sdk/launch/builder/context_manager.py +235 -0
- wandb/sdk/launch/builder/docker_builder.py +177 -0
- wandb/sdk/launch/builder/kaniko_builder.py +595 -0
- wandb/sdk/launch/builder/noop.py +58 -0
- wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +188 -0
- wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
- wandb/sdk/launch/create_job.py +528 -0
- wandb/sdk/launch/environment/abstract.py +29 -0
- wandb/sdk/launch/environment/aws_environment.py +322 -0
- wandb/sdk/launch/environment/azure_environment.py +105 -0
- wandb/sdk/launch/environment/gcp_environment.py +335 -0
- wandb/sdk/launch/environment/local_environment.py +66 -0
- wandb/sdk/launch/errors.py +19 -0
- wandb/sdk/launch/git_reference.py +109 -0
- wandb/sdk/launch/inputs/files.py +148 -0
- wandb/sdk/launch/inputs/internal.py +315 -0
- wandb/sdk/launch/inputs/manage.py +113 -0
- wandb/sdk/launch/inputs/schema.py +39 -0
- wandb/sdk/launch/loader.py +249 -0
- wandb/sdk/launch/registry/abstract.py +48 -0
- wandb/sdk/launch/registry/anon.py +29 -0
- wandb/sdk/launch/registry/azure_container_registry.py +124 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +192 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +219 -0
- wandb/sdk/launch/registry/local_registry.py +67 -0
- wandb/sdk/launch/runner/__init__.py +0 -0
- wandb/sdk/launch/runner/abstract.py +195 -0
- wandb/sdk/launch/runner/kubernetes_monitor.py +474 -0
- wandb/sdk/launch/runner/kubernetes_runner.py +963 -0
- wandb/sdk/launch/runner/local_container.py +301 -0
- wandb/sdk/launch/runner/local_process.py +78 -0
- wandb/sdk/launch/runner/sagemaker_runner.py +426 -0
- wandb/sdk/launch/runner/vertex_runner.py +230 -0
- wandb/sdk/launch/sweeps/__init__.py +39 -0
- wandb/sdk/launch/sweeps/scheduler.py +742 -0
- wandb/sdk/launch/sweeps/scheduler_sweep.py +91 -0
- wandb/sdk/launch/sweeps/utils.py +316 -0
- wandb/sdk/launch/utils.py +746 -0
- wandb/sdk/launch/wandb_reference.py +138 -0
- wandb/sdk/lib/__init__.py +5 -0
- wandb/sdk/lib/_settings_toposort_generate.py +159 -0
- wandb/sdk/lib/_settings_toposort_generated.py +250 -0
- wandb/sdk/lib/_wburls_generate.py +25 -0
- wandb/sdk/lib/_wburls_generated.py +22 -0
- wandb/sdk/lib/apikey.py +273 -0
- wandb/sdk/lib/capped_dict.py +26 -0
- wandb/sdk/lib/config_util.py +101 -0
- wandb/sdk/lib/credentials.py +141 -0
- wandb/sdk/lib/deprecate.py +42 -0
- wandb/sdk/lib/disabled.py +29 -0
- wandb/sdk/lib/exit_hooks.py +54 -0
- wandb/sdk/lib/file_stream_utils.py +118 -0
- wandb/sdk/lib/filenames.py +64 -0
- wandb/sdk/lib/filesystem.py +372 -0
- wandb/sdk/lib/fsm.py +174 -0
- wandb/sdk/lib/gitlib.py +239 -0
- wandb/sdk/lib/gql_request.py +65 -0
- wandb/sdk/lib/handler_util.py +21 -0
- wandb/sdk/lib/hashutil.py +84 -0
- wandb/sdk/lib/import_hooks.py +275 -0
- wandb/sdk/lib/ipython.py +146 -0
- wandb/sdk/lib/json_util.py +80 -0
- wandb/sdk/lib/lazyloader.py +63 -0
- wandb/sdk/lib/mailbox.py +460 -0
- wandb/sdk/lib/module.py +69 -0
- wandb/sdk/lib/paths.py +106 -0
- wandb/sdk/lib/preinit.py +42 -0
- wandb/sdk/lib/printer.py +313 -0
- wandb/sdk/lib/proto_util.py +90 -0
- wandb/sdk/lib/redirect.py +845 -0
- wandb/sdk/lib/reporting.py +99 -0
- wandb/sdk/lib/retry.py +289 -0
- wandb/sdk/lib/run_moment.py +78 -0
- wandb/sdk/lib/runid.py +12 -0
- wandb/sdk/lib/server.py +52 -0
- wandb/sdk/lib/service_connection.py +216 -0
- wandb/sdk/lib/service_token.py +94 -0
- wandb/sdk/lib/sock_client.py +295 -0
- wandb/sdk/lib/sparkline.py +45 -0
- wandb/sdk/lib/telemetry.py +100 -0
- wandb/sdk/lib/timed_input.py +133 -0
- wandb/sdk/lib/timer.py +19 -0
- wandb/sdk/lib/tracelog.py +255 -0
- wandb/sdk/lib/wburls.py +46 -0
- wandb/sdk/service/__init__.py +0 -0
- wandb/sdk/service/_startup_debug.py +22 -0
- wandb/sdk/service/port_file.py +53 -0
- wandb/sdk/service/server.py +116 -0
- wandb/sdk/service/server_sock.py +276 -0
- wandb/sdk/service/service.py +242 -0
- wandb/sdk/service/streams.py +417 -0
- wandb/sdk/verify/__init__.py +0 -0
- wandb/sdk/verify/verify.py +501 -0
- wandb/sdk/wandb_alerts.py +12 -0
- wandb/sdk/wandb_config.py +322 -0
- wandb/sdk/wandb_helper.py +54 -0
- wandb/sdk/wandb_init.py +1266 -0
- wandb/sdk/wandb_login.py +349 -0
- wandb/sdk/wandb_metric.py +110 -0
- wandb/sdk/wandb_require.py +97 -0
- wandb/sdk/wandb_require_helpers.py +44 -0
- wandb/sdk/wandb_run.py +4236 -0
- wandb/sdk/wandb_settings.py +2001 -0
- wandb/sdk/wandb_setup.py +409 -0
- wandb/sdk/wandb_summary.py +150 -0
- wandb/sdk/wandb_sweep.py +119 -0
- wandb/sdk/wandb_sync.py +81 -0
- wandb/sdk/wandb_watch.py +144 -0
- wandb/sklearn.py +35 -0
- wandb/sync/__init__.py +3 -0
- wandb/sync/sync.py +443 -0
- wandb/trigger.py +29 -0
- wandb/util.py +1956 -0
- wandb/vendor/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/setup.py +40 -0
- wandb/vendor/gql-0.2.0/tests/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/tests/starwars/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/tests/starwars/fixtures.py +96 -0
- wandb/vendor/gql-0.2.0/tests/starwars/schema.py +146 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_dsl.py +293 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_query.py +355 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_validation.py +171 -0
- wandb/vendor/gql-0.2.0/tests/test_client.py +31 -0
- wandb/vendor/gql-0.2.0/tests/test_transport.py +89 -0
- wandb/vendor/gql-0.2.0/wandb_gql/__init__.py +4 -0
- wandb/vendor/gql-0.2.0/wandb_gql/client.py +75 -0
- wandb/vendor/gql-0.2.0/wandb_gql/dsl.py +152 -0
- wandb/vendor/gql-0.2.0/wandb_gql/gql.py +10 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/http.py +6 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/local_schema.py +15 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/requests.py +46 -0
- wandb/vendor/gql-0.2.0/wandb_gql/utils.py +21 -0
- wandb/vendor/graphql-core-1.1/setup.py +86 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/__init__.py +287 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/__init__.py +6 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/base.py +42 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/format_error.py +11 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/located_error.py +29 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/syntax_error.py +36 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/__init__.py +26 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/base.py +311 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executor.py +398 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/asyncio.py +53 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/gevent.py +22 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/process.py +32 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/sync.py +7 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/thread.py +35 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/utils.py +6 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/executor.py +66 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/fragment.py +252 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/resolver.py +151 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/utils.py +7 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/middleware.py +57 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/values.py +145 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/graphql.py +60 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/ast.py +1349 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/base.py +19 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/lexer.py +435 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/location.py +30 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/parser.py +779 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/printer.py +193 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/source.py +18 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor.py +222 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor_meta.py +82 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/cached_property.py +17 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/contain_subset.py +28 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/default_ordered_dict.py +40 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/ordereddict.py +8 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/pair_set.py +43 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/version.py +78 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/__init__.py +67 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/definition.py +619 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/directives.py +132 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/introspection.py +440 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/scalars.py +131 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/schema.py +100 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/typemap.py +145 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/assert_valid_name.py +9 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_from_value.py +65 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_code.py +49 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_dict.py +24 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/base.py +75 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_ast_schema.py +291 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_client_schema.py +250 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/concat_ast.py +9 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/extend_schema.py +357 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_field_def.py +27 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_operation_ast.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/introspection_query.py +90 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_literal_value.py +67 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_value.py +66 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/quoted_or_list.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/schema_printer.py +168 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/suggestion_list.py +56 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_comparators.py +69 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_from_ast.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_info.py +149 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/value_from_ast.py +69 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/__init__.py +4 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/__init__.py +79 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/arguments_of_correct_type.py +24 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/base.py +8 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/default_values_of_correct_type.py +44 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fields_on_correct_type.py +113 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fragments_on_composite_types.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_argument_names.py +70 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_directives.py +97 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_fragment_names.py +19 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_type_names.py +43 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/lone_anonymous_operation.py +23 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_fragment_cycles.py +59 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_undefined_variables.py +36 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_fragments.py +38 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_variables.py +37 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/overlapping_fields_can_be_merged.py +529 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/possible_fragment_spreads.py +44 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/provided_non_null_arguments.py +46 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/scalar_leafs.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_argument_names.py +32 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_fragment_names.py +28 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_input_field_names.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_operation_names.py +31 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_variable_names.py +27 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_are_input_types.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_in_allowed_position.py +53 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/validation.py +158 -0
- wandb/vendor/promise-2.3.0/conftest.py +30 -0
- wandb/vendor/promise-2.3.0/setup.py +64 -0
- wandb/vendor/promise-2.3.0/tests/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/tests/conftest.py +8 -0
- wandb/vendor/promise-2.3.0/tests/test_awaitable.py +32 -0
- wandb/vendor/promise-2.3.0/tests/test_awaitable_35.py +47 -0
- wandb/vendor/promise-2.3.0/tests/test_benchmark.py +116 -0
- wandb/vendor/promise-2.3.0/tests/test_complex_threads.py +23 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader.py +452 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader_awaitable_35.py +99 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader_extra.py +65 -0
- wandb/vendor/promise-2.3.0/tests/test_extra.py +670 -0
- wandb/vendor/promise-2.3.0/tests/test_issues.py +132 -0
- wandb/vendor/promise-2.3.0/tests/test_promise_list.py +70 -0
- wandb/vendor/promise-2.3.0/tests/test_spec.py +584 -0
- wandb/vendor/promise-2.3.0/tests/test_thread_safety.py +115 -0
- wandb/vendor/promise-2.3.0/tests/utils.py +3 -0
- wandb/vendor/promise-2.3.0/wandb_promise/__init__.py +38 -0
- wandb/vendor/promise-2.3.0/wandb_promise/async_.py +135 -0
- wandb/vendor/promise-2.3.0/wandb_promise/compat.py +32 -0
- wandb/vendor/promise-2.3.0/wandb_promise/dataloader.py +326 -0
- wandb/vendor/promise-2.3.0/wandb_promise/iterate_promise.py +12 -0
- wandb/vendor/promise-2.3.0/wandb_promise/promise.py +848 -0
- wandb/vendor/promise-2.3.0/wandb_promise/promise_list.py +151 -0
- wandb/vendor/promise-2.3.0/wandb_promise/pyutils/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/wandb_promise/pyutils/version.py +83 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/asyncio.py +22 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/gevent.py +21 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/immediate.py +27 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/thread.py +18 -0
- wandb/vendor/promise-2.3.0/wandb_promise/utils.py +56 -0
- wandb/vendor/pygments/__init__.py +90 -0
- wandb/vendor/pygments/cmdline.py +568 -0
- wandb/vendor/pygments/console.py +74 -0
- wandb/vendor/pygments/filter.py +74 -0
- wandb/vendor/pygments/filters/__init__.py +350 -0
- wandb/vendor/pygments/formatter.py +95 -0
- wandb/vendor/pygments/formatters/__init__.py +153 -0
- wandb/vendor/pygments/formatters/_mapping.py +85 -0
- wandb/vendor/pygments/formatters/bbcode.py +109 -0
- wandb/vendor/pygments/formatters/html.py +851 -0
- wandb/vendor/pygments/formatters/img.py +600 -0
- wandb/vendor/pygments/formatters/irc.py +182 -0
- wandb/vendor/pygments/formatters/latex.py +482 -0
- wandb/vendor/pygments/formatters/other.py +160 -0
- wandb/vendor/pygments/formatters/rtf.py +147 -0
- wandb/vendor/pygments/formatters/svg.py +153 -0
- wandb/vendor/pygments/formatters/terminal.py +136 -0
- wandb/vendor/pygments/formatters/terminal256.py +309 -0
- wandb/vendor/pygments/lexer.py +871 -0
- wandb/vendor/pygments/lexers/__init__.py +329 -0
- wandb/vendor/pygments/lexers/_asy_builtins.py +1645 -0
- wandb/vendor/pygments/lexers/_cl_builtins.py +232 -0
- wandb/vendor/pygments/lexers/_cocoa_builtins.py +72 -0
- wandb/vendor/pygments/lexers/_csound_builtins.py +1346 -0
- wandb/vendor/pygments/lexers/_lasso_builtins.py +5327 -0
- wandb/vendor/pygments/lexers/_lua_builtins.py +295 -0
- wandb/vendor/pygments/lexers/_mapping.py +500 -0
- wandb/vendor/pygments/lexers/_mql_builtins.py +1172 -0
- wandb/vendor/pygments/lexers/_openedge_builtins.py +2547 -0
- wandb/vendor/pygments/lexers/_php_builtins.py +4756 -0
- wandb/vendor/pygments/lexers/_postgres_builtins.py +621 -0
- wandb/vendor/pygments/lexers/_scilab_builtins.py +3094 -0
- wandb/vendor/pygments/lexers/_sourcemod_builtins.py +1163 -0
- wandb/vendor/pygments/lexers/_stan_builtins.py +532 -0
- wandb/vendor/pygments/lexers/_stata_builtins.py +419 -0
- wandb/vendor/pygments/lexers/_tsql_builtins.py +1004 -0
- wandb/vendor/pygments/lexers/_vim_builtins.py +1939 -0
- wandb/vendor/pygments/lexers/actionscript.py +240 -0
- wandb/vendor/pygments/lexers/agile.py +24 -0
- wandb/vendor/pygments/lexers/algebra.py +221 -0
- wandb/vendor/pygments/lexers/ambient.py +76 -0
- wandb/vendor/pygments/lexers/ampl.py +87 -0
- wandb/vendor/pygments/lexers/apl.py +101 -0
- wandb/vendor/pygments/lexers/archetype.py +318 -0
- wandb/vendor/pygments/lexers/asm.py +641 -0
- wandb/vendor/pygments/lexers/automation.py +374 -0
- wandb/vendor/pygments/lexers/basic.py +500 -0
- wandb/vendor/pygments/lexers/bibtex.py +160 -0
- wandb/vendor/pygments/lexers/business.py +612 -0
- wandb/vendor/pygments/lexers/c_cpp.py +252 -0
- wandb/vendor/pygments/lexers/c_like.py +541 -0
- wandb/vendor/pygments/lexers/capnproto.py +78 -0
- wandb/vendor/pygments/lexers/chapel.py +102 -0
- wandb/vendor/pygments/lexers/clean.py +288 -0
- wandb/vendor/pygments/lexers/compiled.py +34 -0
- wandb/vendor/pygments/lexers/configs.py +833 -0
- wandb/vendor/pygments/lexers/console.py +114 -0
- wandb/vendor/pygments/lexers/crystal.py +393 -0
- wandb/vendor/pygments/lexers/csound.py +366 -0
- wandb/vendor/pygments/lexers/css.py +689 -0
- wandb/vendor/pygments/lexers/d.py +251 -0
- wandb/vendor/pygments/lexers/dalvik.py +125 -0
- wandb/vendor/pygments/lexers/data.py +555 -0
- wandb/vendor/pygments/lexers/diff.py +165 -0
- wandb/vendor/pygments/lexers/dotnet.py +691 -0
- wandb/vendor/pygments/lexers/dsls.py +878 -0
- wandb/vendor/pygments/lexers/dylan.py +289 -0
- wandb/vendor/pygments/lexers/ecl.py +125 -0
- wandb/vendor/pygments/lexers/eiffel.py +65 -0
- wandb/vendor/pygments/lexers/elm.py +121 -0
- wandb/vendor/pygments/lexers/erlang.py +533 -0
- wandb/vendor/pygments/lexers/esoteric.py +277 -0
- wandb/vendor/pygments/lexers/ezhil.py +69 -0
- wandb/vendor/pygments/lexers/factor.py +344 -0
- wandb/vendor/pygments/lexers/fantom.py +250 -0
- wandb/vendor/pygments/lexers/felix.py +273 -0
- wandb/vendor/pygments/lexers/forth.py +177 -0
- wandb/vendor/pygments/lexers/fortran.py +205 -0
- wandb/vendor/pygments/lexers/foxpro.py +428 -0
- wandb/vendor/pygments/lexers/functional.py +21 -0
- wandb/vendor/pygments/lexers/go.py +101 -0
- wandb/vendor/pygments/lexers/grammar_notation.py +213 -0
- wandb/vendor/pygments/lexers/graph.py +80 -0
- wandb/vendor/pygments/lexers/graphics.py +553 -0
- wandb/vendor/pygments/lexers/haskell.py +843 -0
- wandb/vendor/pygments/lexers/haxe.py +936 -0
- wandb/vendor/pygments/lexers/hdl.py +382 -0
- wandb/vendor/pygments/lexers/hexdump.py +103 -0
- wandb/vendor/pygments/lexers/html.py +602 -0
- wandb/vendor/pygments/lexers/idl.py +270 -0
- wandb/vendor/pygments/lexers/igor.py +288 -0
- wandb/vendor/pygments/lexers/inferno.py +96 -0
- wandb/vendor/pygments/lexers/installers.py +322 -0
- wandb/vendor/pygments/lexers/int_fiction.py +1343 -0
- wandb/vendor/pygments/lexers/iolang.py +63 -0
- wandb/vendor/pygments/lexers/j.py +146 -0
- wandb/vendor/pygments/lexers/javascript.py +1525 -0
- wandb/vendor/pygments/lexers/julia.py +333 -0
- wandb/vendor/pygments/lexers/jvm.py +1573 -0
- wandb/vendor/pygments/lexers/lisp.py +2621 -0
- wandb/vendor/pygments/lexers/make.py +202 -0
- wandb/vendor/pygments/lexers/markup.py +595 -0
- wandb/vendor/pygments/lexers/math.py +21 -0
- wandb/vendor/pygments/lexers/matlab.py +663 -0
- wandb/vendor/pygments/lexers/ml.py +769 -0
- wandb/vendor/pygments/lexers/modeling.py +358 -0
- wandb/vendor/pygments/lexers/modula2.py +1561 -0
- wandb/vendor/pygments/lexers/monte.py +204 -0
- wandb/vendor/pygments/lexers/ncl.py +894 -0
- wandb/vendor/pygments/lexers/nimrod.py +159 -0
- wandb/vendor/pygments/lexers/nit.py +64 -0
- wandb/vendor/pygments/lexers/nix.py +136 -0
- wandb/vendor/pygments/lexers/oberon.py +105 -0
- wandb/vendor/pygments/lexers/objective.py +504 -0
- wandb/vendor/pygments/lexers/ooc.py +85 -0
- wandb/vendor/pygments/lexers/other.py +41 -0
- wandb/vendor/pygments/lexers/parasail.py +79 -0
- wandb/vendor/pygments/lexers/parsers.py +835 -0
- wandb/vendor/pygments/lexers/pascal.py +644 -0
- wandb/vendor/pygments/lexers/pawn.py +199 -0
- wandb/vendor/pygments/lexers/perl.py +620 -0
- wandb/vendor/pygments/lexers/php.py +267 -0
- wandb/vendor/pygments/lexers/praat.py +294 -0
- wandb/vendor/pygments/lexers/prolog.py +306 -0
- wandb/vendor/pygments/lexers/python.py +939 -0
- wandb/vendor/pygments/lexers/qvt.py +152 -0
- wandb/vendor/pygments/lexers/r.py +453 -0
- wandb/vendor/pygments/lexers/rdf.py +270 -0
- wandb/vendor/pygments/lexers/rebol.py +431 -0
- wandb/vendor/pygments/lexers/resource.py +85 -0
- wandb/vendor/pygments/lexers/rnc.py +67 -0
- wandb/vendor/pygments/lexers/roboconf.py +82 -0
- wandb/vendor/pygments/lexers/robotframework.py +560 -0
- wandb/vendor/pygments/lexers/ruby.py +519 -0
- wandb/vendor/pygments/lexers/rust.py +220 -0
- wandb/vendor/pygments/lexers/sas.py +228 -0
- wandb/vendor/pygments/lexers/scripting.py +1222 -0
- wandb/vendor/pygments/lexers/shell.py +794 -0
- wandb/vendor/pygments/lexers/smalltalk.py +195 -0
- wandb/vendor/pygments/lexers/smv.py +79 -0
- wandb/vendor/pygments/lexers/snobol.py +83 -0
- wandb/vendor/pygments/lexers/special.py +103 -0
- wandb/vendor/pygments/lexers/sql.py +681 -0
- wandb/vendor/pygments/lexers/stata.py +108 -0
- wandb/vendor/pygments/lexers/supercollider.py +90 -0
- wandb/vendor/pygments/lexers/tcl.py +145 -0
- wandb/vendor/pygments/lexers/templates.py +2283 -0
- wandb/vendor/pygments/lexers/testing.py +207 -0
- wandb/vendor/pygments/lexers/text.py +25 -0
- wandb/vendor/pygments/lexers/textedit.py +169 -0
- wandb/vendor/pygments/lexers/textfmts.py +297 -0
- wandb/vendor/pygments/lexers/theorem.py +458 -0
- wandb/vendor/pygments/lexers/trafficscript.py +54 -0
- wandb/vendor/pygments/lexers/typoscript.py +226 -0
- wandb/vendor/pygments/lexers/urbi.py +133 -0
- wandb/vendor/pygments/lexers/varnish.py +190 -0
- wandb/vendor/pygments/lexers/verification.py +111 -0
- wandb/vendor/pygments/lexers/web.py +24 -0
- wandb/vendor/pygments/lexers/webmisc.py +988 -0
- wandb/vendor/pygments/lexers/whiley.py +116 -0
- wandb/vendor/pygments/lexers/x10.py +69 -0
- wandb/vendor/pygments/modeline.py +44 -0
- wandb/vendor/pygments/plugin.py +68 -0
- wandb/vendor/pygments/regexopt.py +92 -0
- wandb/vendor/pygments/scanner.py +105 -0
- wandb/vendor/pygments/sphinxext.py +158 -0
- wandb/vendor/pygments/style.py +155 -0
- wandb/vendor/pygments/styles/__init__.py +80 -0
- wandb/vendor/pygments/styles/abap.py +29 -0
- wandb/vendor/pygments/styles/algol.py +63 -0
- wandb/vendor/pygments/styles/algol_nu.py +63 -0
- wandb/vendor/pygments/styles/arduino.py +98 -0
- wandb/vendor/pygments/styles/autumn.py +65 -0
- wandb/vendor/pygments/styles/borland.py +51 -0
- wandb/vendor/pygments/styles/bw.py +49 -0
- wandb/vendor/pygments/styles/colorful.py +81 -0
- wandb/vendor/pygments/styles/default.py +73 -0
- wandb/vendor/pygments/styles/emacs.py +72 -0
- wandb/vendor/pygments/styles/friendly.py +72 -0
- wandb/vendor/pygments/styles/fruity.py +42 -0
- wandb/vendor/pygments/styles/igor.py +29 -0
- wandb/vendor/pygments/styles/lovelace.py +97 -0
- wandb/vendor/pygments/styles/manni.py +75 -0
- wandb/vendor/pygments/styles/monokai.py +106 -0
- wandb/vendor/pygments/styles/murphy.py +80 -0
- wandb/vendor/pygments/styles/native.py +65 -0
- wandb/vendor/pygments/styles/paraiso_dark.py +125 -0
- wandb/vendor/pygments/styles/paraiso_light.py +125 -0
- wandb/vendor/pygments/styles/pastie.py +75 -0
- wandb/vendor/pygments/styles/perldoc.py +69 -0
- wandb/vendor/pygments/styles/rainbow_dash.py +89 -0
- wandb/vendor/pygments/styles/rrt.py +33 -0
- wandb/vendor/pygments/styles/sas.py +44 -0
- wandb/vendor/pygments/styles/stata.py +40 -0
- wandb/vendor/pygments/styles/tango.py +141 -0
- wandb/vendor/pygments/styles/trac.py +63 -0
- wandb/vendor/pygments/styles/vim.py +63 -0
- wandb/vendor/pygments/styles/vs.py +38 -0
- wandb/vendor/pygments/styles/xcode.py +51 -0
- wandb/vendor/pygments/token.py +213 -0
- wandb/vendor/pygments/unistring.py +217 -0
- wandb/vendor/pygments/util.py +388 -0
- wandb/vendor/pynvml/__init__.py +0 -0
- wandb/vendor/pynvml/pynvml.py +4779 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/__init__.py +17 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/events.py +615 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/__init__.py +98 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/api.py +369 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents.py +172 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents2.py +239 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify.py +218 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_buffer.py +81 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_c.py +575 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/kqueue.py +730 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/polling.py +145 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/read_directory_changes.py +133 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/winapi.py +348 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/patterns.py +265 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/tricks/__init__.py +174 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/__init__.py +151 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/bricks.py +249 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/compat.py +29 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/decorators.py +198 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/delayed_queue.py +88 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/dirsnapshot.py +293 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/echo.py +157 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/event_backport.py +41 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/importlib2.py +40 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/platform.py +57 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/unicode_paths.py +64 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/win32stat.py +123 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/version.py +28 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/watchmedo.py +577 -0
- wandb/wandb_agent.py +588 -0
- wandb/wandb_controller.py +721 -0
- wandb/wandb_run.py +9 -0
- wandb-0.18.2.dist-info/METADATA +213 -0
- wandb-0.18.2.dist-info/RECORD +827 -0
- wandb-0.18.2.dist-info/WHEEL +5 -0
- wandb-0.18.2.dist-info/entry_points.txt +3 -0
- wandb-0.18.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1686 @@
|
|
1
|
+
"""sender."""
|
2
|
+
|
3
|
+
import contextlib
|
4
|
+
import gzip
|
5
|
+
import json
|
6
|
+
import logging
|
7
|
+
import os
|
8
|
+
import queue
|
9
|
+
import sys
|
10
|
+
import threading
|
11
|
+
import time
|
12
|
+
import traceback
|
13
|
+
from collections import defaultdict
|
14
|
+
from datetime import datetime
|
15
|
+
from queue import Queue
|
16
|
+
from typing import (
|
17
|
+
TYPE_CHECKING,
|
18
|
+
Any,
|
19
|
+
Dict,
|
20
|
+
Generator,
|
21
|
+
List,
|
22
|
+
Optional,
|
23
|
+
Tuple,
|
24
|
+
Type,
|
25
|
+
Union,
|
26
|
+
)
|
27
|
+
|
28
|
+
import requests
|
29
|
+
|
30
|
+
import wandb
|
31
|
+
from wandb import util
|
32
|
+
from wandb.errors import CommError, UsageError
|
33
|
+
from wandb.errors.util import ProtobufErrorHandler
|
34
|
+
from wandb.filesync.dir_watcher import DirWatcher
|
35
|
+
from wandb.proto import wandb_internal_pb2
|
36
|
+
from wandb.sdk.artifacts.artifact_saver import ArtifactSaver
|
37
|
+
from wandb.sdk.interface import interface
|
38
|
+
from wandb.sdk.interface.interface_queue import InterfaceQueue
|
39
|
+
from wandb.sdk.internal import (
|
40
|
+
context,
|
41
|
+
datastore,
|
42
|
+
file_stream,
|
43
|
+
internal_api,
|
44
|
+
sender_config,
|
45
|
+
)
|
46
|
+
from wandb.sdk.internal.file_pusher import FilePusher
|
47
|
+
from wandb.sdk.internal.job_builder import JobBuilder
|
48
|
+
from wandb.sdk.internal.settings_static import SettingsStatic
|
49
|
+
from wandb.sdk.lib import (
|
50
|
+
config_util,
|
51
|
+
filenames,
|
52
|
+
filesystem,
|
53
|
+
proto_util,
|
54
|
+
redirect,
|
55
|
+
telemetry,
|
56
|
+
tracelog,
|
57
|
+
)
|
58
|
+
from wandb.sdk.lib.mailbox import ContextCancelledError
|
59
|
+
from wandb.sdk.lib.proto_util import message_to_dict
|
60
|
+
|
61
|
+
if sys.version_info >= (3, 8):
|
62
|
+
from typing import Literal
|
63
|
+
else:
|
64
|
+
from typing_extensions import Literal
|
65
|
+
|
66
|
+
if TYPE_CHECKING:
|
67
|
+
from wandb.proto.wandb_internal_pb2 import (
|
68
|
+
ArtifactManifest,
|
69
|
+
ArtifactManifestEntry,
|
70
|
+
ArtifactRecord,
|
71
|
+
HttpResponse,
|
72
|
+
LocalInfo,
|
73
|
+
Record,
|
74
|
+
Result,
|
75
|
+
RunExitResult,
|
76
|
+
RunRecord,
|
77
|
+
SummaryRecord,
|
78
|
+
)
|
79
|
+
|
80
|
+
StreamLiterals = Literal["stdout", "stderr"]
|
81
|
+
|
82
|
+
|
83
|
+
logger = logging.getLogger(__name__)
|
84
|
+
|
85
|
+
|
86
|
+
_OUTPUT_MIN_CALLBACK_INTERVAL = 2 # seconds
|
87
|
+
|
88
|
+
|
89
|
+
def _framework_priority() -> Generator[Tuple[str, str], None, None]:
|
90
|
+
yield from [
|
91
|
+
("lightgbm", "lightgbm"),
|
92
|
+
("catboost", "catboost"),
|
93
|
+
("xgboost", "xgboost"),
|
94
|
+
("transformers_huggingface", "huggingface"), # backwards compatibility
|
95
|
+
("transformers", "huggingface"),
|
96
|
+
("pytorch_ignite", "ignite"), # backwards compatibility
|
97
|
+
("ignite", "ignite"),
|
98
|
+
("pytorch_lightning", "lightning"),
|
99
|
+
("fastai", "fastai"),
|
100
|
+
("torch", "torch"),
|
101
|
+
("keras", "keras"),
|
102
|
+
("tensorflow", "tensorflow"),
|
103
|
+
("sklearn", "sklearn"),
|
104
|
+
]
|
105
|
+
|
106
|
+
|
107
|
+
def _manifest_json_from_proto(manifest: "ArtifactManifest") -> Dict:
|
108
|
+
if manifest.version == 1:
|
109
|
+
if manifest.manifest_file_path:
|
110
|
+
contents = {}
|
111
|
+
with gzip.open(manifest.manifest_file_path, "rt") as f:
|
112
|
+
for line in f:
|
113
|
+
entry_json = json.loads(line)
|
114
|
+
path = entry_json.pop("path")
|
115
|
+
contents[path] = entry_json
|
116
|
+
else:
|
117
|
+
contents = {
|
118
|
+
content.path: _manifest_entry_from_proto(content)
|
119
|
+
for content in manifest.contents
|
120
|
+
}
|
121
|
+
else:
|
122
|
+
raise ValueError(f"unknown artifact manifest version: {manifest.version}")
|
123
|
+
|
124
|
+
return {
|
125
|
+
"version": manifest.version,
|
126
|
+
"storagePolicy": manifest.storage_policy,
|
127
|
+
"storagePolicyConfig": {
|
128
|
+
config.key: json.loads(config.value_json)
|
129
|
+
for config in manifest.storage_policy_config
|
130
|
+
},
|
131
|
+
"contents": contents,
|
132
|
+
}
|
133
|
+
|
134
|
+
|
135
|
+
def _manifest_entry_from_proto(entry: "ArtifactManifestEntry") -> Dict:
|
136
|
+
birth_artifact_id = entry.birth_artifact_id if entry.birth_artifact_id else None
|
137
|
+
return {
|
138
|
+
"digest": entry.digest,
|
139
|
+
"birthArtifactID": birth_artifact_id,
|
140
|
+
"ref": entry.ref if entry.ref else None,
|
141
|
+
"size": entry.size if entry.size is not None else None,
|
142
|
+
"local_path": entry.local_path if entry.local_path else None,
|
143
|
+
"skip_cache": entry.skip_cache,
|
144
|
+
"extra": {extra.key: json.loads(extra.value_json) for extra in entry.extra},
|
145
|
+
}
|
146
|
+
|
147
|
+
|
148
|
+
class ResumeState:
|
149
|
+
resumed: bool
|
150
|
+
step: int
|
151
|
+
history: int
|
152
|
+
events: int
|
153
|
+
output: int
|
154
|
+
runtime: float
|
155
|
+
wandb_runtime: Optional[int]
|
156
|
+
summary: Optional[Dict[str, Any]]
|
157
|
+
config: Optional[Dict[str, Any]]
|
158
|
+
tags: Optional[List[str]]
|
159
|
+
|
160
|
+
def __init__(self) -> None:
|
161
|
+
self.resumed = False
|
162
|
+
self.step = 0
|
163
|
+
self.history = 0
|
164
|
+
self.events = 0
|
165
|
+
self.output = 0
|
166
|
+
self.runtime = 0
|
167
|
+
# wandb_runtime is the canonical runtime (stored in summary._wandb.runtime)
|
168
|
+
self.wandb_runtime = None
|
169
|
+
self.summary = None
|
170
|
+
self.config = None
|
171
|
+
self.tags = None
|
172
|
+
|
173
|
+
def __str__(self) -> str:
|
174
|
+
obj = ",".join(map(lambda it: f"{it[0]}={it[1]}", vars(self).items()))
|
175
|
+
return f"ResumeState({obj})"
|
176
|
+
|
177
|
+
|
178
|
+
class _OutputRawStream:
|
179
|
+
_stopped: threading.Event
|
180
|
+
_queue: queue.Queue
|
181
|
+
_emulator: redirect.TerminalEmulator
|
182
|
+
_writer_thr: threading.Thread
|
183
|
+
_reader_thr: threading.Thread
|
184
|
+
|
185
|
+
def __init__(self, stream: str, sm: "SendManager"):
|
186
|
+
self._stopped = threading.Event()
|
187
|
+
self._queue = queue.Queue()
|
188
|
+
self._emulator = redirect.TerminalEmulator()
|
189
|
+
self._writer_thr = threading.Thread(
|
190
|
+
target=sm._output_raw_writer_thread,
|
191
|
+
kwargs=dict(stream=stream),
|
192
|
+
daemon=True,
|
193
|
+
name=f"OutRawWr-{stream}",
|
194
|
+
)
|
195
|
+
self._reader_thr = threading.Thread(
|
196
|
+
target=sm._output_raw_reader_thread,
|
197
|
+
kwargs=dict(stream=stream),
|
198
|
+
daemon=True,
|
199
|
+
name=f"OutRawRd-{stream}",
|
200
|
+
)
|
201
|
+
|
202
|
+
def start(self) -> None:
|
203
|
+
self._writer_thr.start()
|
204
|
+
self._reader_thr.start()
|
205
|
+
|
206
|
+
|
207
|
+
class SendManager:
|
208
|
+
UPDATE_CONFIG_TIME: int = 30
|
209
|
+
UPDATE_STATUS_TIME: int = 5
|
210
|
+
|
211
|
+
_settings: SettingsStatic
|
212
|
+
_record_q: "Queue[Record]"
|
213
|
+
_result_q: "Queue[Result]"
|
214
|
+
_interface: InterfaceQueue
|
215
|
+
_api_settings: Dict[str, str]
|
216
|
+
_partial_output: Dict[str, str]
|
217
|
+
_context_keeper: context.ContextKeeper
|
218
|
+
|
219
|
+
_telemetry_obj: telemetry.TelemetryRecord
|
220
|
+
_fs: Optional["file_stream.FileStreamApi"]
|
221
|
+
_run: Optional["RunRecord"]
|
222
|
+
_entity: Optional[str]
|
223
|
+
_project: Optional[str]
|
224
|
+
_dir_watcher: Optional["DirWatcher"]
|
225
|
+
_pusher: Optional["FilePusher"]
|
226
|
+
_record_exit: Optional["Record"]
|
227
|
+
_exit_result: Optional["RunExitResult"]
|
228
|
+
_resume_state: ResumeState
|
229
|
+
_rewind_response: Optional[Dict[str, Any]]
|
230
|
+
_cached_server_info: Dict[str, Any]
|
231
|
+
_cached_viewer: Dict[str, Any]
|
232
|
+
_server_messages: List[Dict[str, Any]]
|
233
|
+
_ds: Optional[datastore.DataStore]
|
234
|
+
_output_raw_streams: Dict["StreamLiterals", _OutputRawStream]
|
235
|
+
_output_raw_file: Optional[filesystem.CRDedupedFile]
|
236
|
+
_send_record_num: int
|
237
|
+
_send_end_offset: int
|
238
|
+
_debounce_config_time: float
|
239
|
+
_debounce_status_time: float
|
240
|
+
|
241
|
+
def __init__(
|
242
|
+
self,
|
243
|
+
settings: SettingsStatic,
|
244
|
+
record_q: "Queue[Record]",
|
245
|
+
result_q: "Queue[Result]",
|
246
|
+
interface: InterfaceQueue,
|
247
|
+
context_keeper: context.ContextKeeper,
|
248
|
+
) -> None:
|
249
|
+
self._settings = settings
|
250
|
+
self._record_q = record_q
|
251
|
+
self._result_q = result_q
|
252
|
+
self._interface = interface
|
253
|
+
self._context_keeper = context_keeper
|
254
|
+
|
255
|
+
self._ds = None
|
256
|
+
self._send_record_num = 0
|
257
|
+
self._send_end_offset = 0
|
258
|
+
|
259
|
+
self._fs = None
|
260
|
+
self._pusher = None
|
261
|
+
self._dir_watcher = None
|
262
|
+
|
263
|
+
# State updated by login
|
264
|
+
self._entity = None
|
265
|
+
self._flags = None
|
266
|
+
|
267
|
+
# State updated by wandb.init
|
268
|
+
self._run = None
|
269
|
+
self._project = None
|
270
|
+
|
271
|
+
# keep track of config from key/val updates
|
272
|
+
self._consolidated_config = sender_config.ConfigState()
|
273
|
+
|
274
|
+
self._start_time: int = 0
|
275
|
+
self._telemetry_obj = telemetry.TelemetryRecord()
|
276
|
+
self._config_metric_pbdict_list: List[Dict[int, Any]] = []
|
277
|
+
self._metadata_summary: Dict[str, Any] = defaultdict()
|
278
|
+
self._cached_summary: Dict[str, Any] = dict()
|
279
|
+
self._config_metric_index_dict: Dict[str, int] = {}
|
280
|
+
self._config_metric_dict: Dict[str, wandb_internal_pb2.MetricRecord] = {}
|
281
|
+
self._consolidated_summary: Dict[str, Any] = dict()
|
282
|
+
|
283
|
+
self._cached_server_info = dict()
|
284
|
+
self._cached_viewer = dict()
|
285
|
+
self._server_messages = []
|
286
|
+
|
287
|
+
# State updated by resuming
|
288
|
+
self._resume_state = ResumeState()
|
289
|
+
self._rewind_response = None
|
290
|
+
|
291
|
+
# State added when run_exit is initiated and complete
|
292
|
+
self._record_exit = None
|
293
|
+
self._exit_result = None
|
294
|
+
|
295
|
+
self._api = internal_api.Api(
|
296
|
+
default_settings=settings, retry_callback=self.retry_callback
|
297
|
+
)
|
298
|
+
self._api_settings = dict()
|
299
|
+
|
300
|
+
# queue filled by retry_callback
|
301
|
+
self._retry_q: Queue[HttpResponse] = queue.Queue()
|
302
|
+
|
303
|
+
# do we need to debounce?
|
304
|
+
self._config_needs_debounce: bool = False
|
305
|
+
|
306
|
+
# TODO(jhr): do something better, why do we need to send full lines?
|
307
|
+
self._partial_output = dict()
|
308
|
+
|
309
|
+
self._exit_code = 0
|
310
|
+
|
311
|
+
# internal vars for handing raw console output
|
312
|
+
self._output_raw_streams = dict()
|
313
|
+
self._output_raw_file = None
|
314
|
+
|
315
|
+
# job builder
|
316
|
+
self._job_builder = JobBuilder(settings)
|
317
|
+
|
318
|
+
time_now = time.monotonic()
|
319
|
+
self._debounce_config_time = time_now
|
320
|
+
self._debounce_status_time = time_now
|
321
|
+
|
322
|
+
@classmethod
|
323
|
+
def setup(
|
324
|
+
cls,
|
325
|
+
root_dir: str,
|
326
|
+
resume: Union[None, bool, str],
|
327
|
+
) -> "SendManager":
|
328
|
+
"""Set up a standalone SendManager.
|
329
|
+
|
330
|
+
Currently, we're using this primarily for `sync.py`.
|
331
|
+
"""
|
332
|
+
files_dir = os.path.join(root_dir, "files")
|
333
|
+
settings = wandb.Settings(
|
334
|
+
files_dir=files_dir,
|
335
|
+
root_dir=root_dir,
|
336
|
+
# _start_time=0,
|
337
|
+
resume=resume,
|
338
|
+
# ignore_globs=(),
|
339
|
+
_sync=True,
|
340
|
+
disable_job_creation=False,
|
341
|
+
_file_stream_timeout_seconds=0,
|
342
|
+
)
|
343
|
+
record_q: Queue[Record] = queue.Queue()
|
344
|
+
result_q: Queue[Result] = queue.Queue()
|
345
|
+
publish_interface = InterfaceQueue(record_q=record_q)
|
346
|
+
context_keeper = context.ContextKeeper()
|
347
|
+
return SendManager(
|
348
|
+
settings=SettingsStatic(settings.to_proto()),
|
349
|
+
record_q=record_q,
|
350
|
+
result_q=result_q,
|
351
|
+
interface=publish_interface,
|
352
|
+
context_keeper=context_keeper,
|
353
|
+
)
|
354
|
+
|
355
|
+
def __len__(self) -> int:
|
356
|
+
return self._record_q.qsize()
|
357
|
+
|
358
|
+
def __enter__(self) -> "SendManager":
|
359
|
+
return self
|
360
|
+
|
361
|
+
def __exit__(
|
362
|
+
self,
|
363
|
+
exc_type: Optional[Type[BaseException]],
|
364
|
+
exc_value: Optional[BaseException],
|
365
|
+
exc_traceback: Optional[traceback.TracebackException],
|
366
|
+
) -> Literal[False]:
|
367
|
+
while self:
|
368
|
+
data = next(self)
|
369
|
+
self.send(data)
|
370
|
+
self.finish()
|
371
|
+
return False
|
372
|
+
|
373
|
+
def retry_callback(self, status: int, response_text: str) -> None:
|
374
|
+
response = wandb_internal_pb2.HttpResponse()
|
375
|
+
response.http_status_code = status
|
376
|
+
response.http_response_text = response_text
|
377
|
+
self._retry_q.put(response)
|
378
|
+
|
379
|
+
def send(self, record: "Record") -> None:
|
380
|
+
self._update_record_num(record.num)
|
381
|
+
self._update_end_offset(record.control.end_offset)
|
382
|
+
|
383
|
+
record_type = record.WhichOneof("record_type")
|
384
|
+
assert record_type
|
385
|
+
handler_str = "send_" + record_type
|
386
|
+
send_handler = getattr(self, handler_str, None)
|
387
|
+
# Don't log output to reduce log noise
|
388
|
+
if record_type not in {"output", "request", "output_raw"}:
|
389
|
+
logger.debug(f"send: {record_type}")
|
390
|
+
assert send_handler, f"unknown send handler: {handler_str}"
|
391
|
+
|
392
|
+
context_id = context.context_id_from_record(record)
|
393
|
+
api_context = self._context_keeper.get(context_id)
|
394
|
+
try:
|
395
|
+
self._api.set_local_context(api_context)
|
396
|
+
send_handler(record)
|
397
|
+
except ContextCancelledError:
|
398
|
+
logger.debug(f"Record cancelled: {record_type}")
|
399
|
+
self._context_keeper.release(context_id)
|
400
|
+
finally:
|
401
|
+
self._api.clear_local_context()
|
402
|
+
|
403
|
+
def send_preempting(self, _: "Record") -> None:
|
404
|
+
if self._fs:
|
405
|
+
self._fs.enqueue_preempting()
|
406
|
+
|
407
|
+
def send_request_sender_mark(self, _: "Record") -> None:
|
408
|
+
self._maybe_report_status(always=True)
|
409
|
+
|
410
|
+
def send_request(self, record: "Record") -> None:
|
411
|
+
request_type = record.request.WhichOneof("request_type")
|
412
|
+
assert request_type
|
413
|
+
handler_str = "send_request_" + request_type
|
414
|
+
send_handler = getattr(self, handler_str, None)
|
415
|
+
if request_type != "network_status":
|
416
|
+
logger.debug(f"send_request: {request_type}")
|
417
|
+
assert send_handler, f"unknown handle: {handler_str}"
|
418
|
+
send_handler(record)
|
419
|
+
|
420
|
+
def _respond_result(self, result: "Result") -> None:
|
421
|
+
tracelog.log_message_queue(result, self._result_q)
|
422
|
+
context_id = context.context_id_from_result(result)
|
423
|
+
self._context_keeper.release(context_id)
|
424
|
+
self._result_q.put(result)
|
425
|
+
|
426
|
+
def _flatten(self, dictionary: Dict) -> None:
|
427
|
+
if isinstance(dictionary, dict):
|
428
|
+
for k, v in list(dictionary.items()):
|
429
|
+
if isinstance(v, dict):
|
430
|
+
self._flatten(v)
|
431
|
+
dictionary.pop(k)
|
432
|
+
for k2, v2 in v.items():
|
433
|
+
dictionary[k + "." + k2] = v2
|
434
|
+
|
435
|
+
def _update_record_num(self, record_num: int) -> None:
|
436
|
+
if not record_num:
|
437
|
+
return
|
438
|
+
# Currently how we handle offline mode and syncing is not
|
439
|
+
# compatible with this assertion due to how the exit record
|
440
|
+
# is (mis)handled:
|
441
|
+
# - using "always_send" in offline mode to trigger defer
|
442
|
+
# state machine
|
443
|
+
# - skipping the exit record in `wandb sync` mode so that
|
444
|
+
# it is always executed as the last record
|
445
|
+
if not self._settings._offline and not self._settings._sync:
|
446
|
+
assert record_num == self._send_record_num + 1
|
447
|
+
self._send_record_num = record_num
|
448
|
+
|
449
|
+
def _update_end_offset(self, end_offset: int) -> None:
|
450
|
+
if not end_offset:
|
451
|
+
return
|
452
|
+
self._send_end_offset = end_offset
|
453
|
+
|
454
|
+
def send_request_sender_read(self, record: "Record") -> None:
|
455
|
+
if self._ds is None:
|
456
|
+
self._ds = datastore.DataStore()
|
457
|
+
self._ds.open_for_scan(self._settings.sync_file)
|
458
|
+
|
459
|
+
# TODO(cancel_paused): implement cancel_set logic
|
460
|
+
# The idea is that there is an active request to cancel a
|
461
|
+
# message that is being read from the transaction log below
|
462
|
+
|
463
|
+
start_offset = record.request.sender_read.start_offset
|
464
|
+
final_offset = record.request.sender_read.final_offset
|
465
|
+
self._ds.seek(start_offset)
|
466
|
+
|
467
|
+
current_end_offset = 0
|
468
|
+
while current_end_offset < final_offset:
|
469
|
+
data = self._ds.scan_data()
|
470
|
+
assert data
|
471
|
+
current_end_offset = self._ds.get_offset()
|
472
|
+
|
473
|
+
send_record = wandb_internal_pb2.Record()
|
474
|
+
send_record.ParseFromString(data)
|
475
|
+
self._update_end_offset(current_end_offset)
|
476
|
+
self.send(send_record)
|
477
|
+
|
478
|
+
# make sure we perform deferred operations
|
479
|
+
self.debounce()
|
480
|
+
|
481
|
+
# make sure that we always update writer for every sended read request
|
482
|
+
self._maybe_report_status(always=True)
|
483
|
+
|
484
|
+
def send_request_stop_status(self, record: "Record") -> None:
|
485
|
+
result = proto_util._result_from_record(record)
|
486
|
+
status_resp = result.response.stop_status_response
|
487
|
+
status_resp.run_should_stop = False
|
488
|
+
if self._entity and self._project and self._run and self._run.run_id:
|
489
|
+
try:
|
490
|
+
status_resp.run_should_stop = self._api.check_stop_requested(
|
491
|
+
self._project, self._entity, self._run.run_id
|
492
|
+
)
|
493
|
+
except Exception as e:
|
494
|
+
logger.warning("Failed to check stop requested status: %s", e)
|
495
|
+
self._respond_result(result)
|
496
|
+
|
497
|
+
def _maybe_update_config(self, always: bool = False) -> None:
|
498
|
+
time_now = time.monotonic()
|
499
|
+
if (
|
500
|
+
not always
|
501
|
+
and time_now < self._debounce_config_time + self.UPDATE_CONFIG_TIME
|
502
|
+
):
|
503
|
+
return
|
504
|
+
if self._config_needs_debounce:
|
505
|
+
self._debounce_config()
|
506
|
+
self._debounce_config_time = time_now
|
507
|
+
|
508
|
+
def _maybe_report_status(self, always: bool = False) -> None:
|
509
|
+
time_now = time.monotonic()
|
510
|
+
if (
|
511
|
+
not always
|
512
|
+
and time_now < self._debounce_status_time + self.UPDATE_STATUS_TIME
|
513
|
+
):
|
514
|
+
return
|
515
|
+
self._debounce_status_time = time_now
|
516
|
+
|
517
|
+
status_report = wandb_internal_pb2.StatusReportRequest(
|
518
|
+
record_num=self._send_record_num,
|
519
|
+
sent_offset=self._send_end_offset,
|
520
|
+
)
|
521
|
+
status_time = time.time()
|
522
|
+
status_report.sync_time.FromMicroseconds(int(status_time * 1e6))
|
523
|
+
record = self._interface._make_request(status_report=status_report)
|
524
|
+
self._interface._publish(record)
|
525
|
+
|
526
|
+
def debounce(self, final: bool = False) -> None:
|
527
|
+
self._maybe_report_status(always=final)
|
528
|
+
self._maybe_update_config(always=final)
|
529
|
+
|
530
|
+
def _debounce_config(self) -> None:
|
531
|
+
config_value_dict = self._config_backend_dict()
|
532
|
+
# TODO(jhr): check result of upsert_run?
|
533
|
+
if self._run:
|
534
|
+
self._api.upsert_run(
|
535
|
+
name=self._run.run_id,
|
536
|
+
config=config_value_dict,
|
537
|
+
**self._api_settings, # type: ignore
|
538
|
+
)
|
539
|
+
self._config_save(config_value_dict)
|
540
|
+
self._config_needs_debounce = False
|
541
|
+
|
542
|
+
def send_request_network_status(self, record: "Record") -> None:
|
543
|
+
result = proto_util._result_from_record(record)
|
544
|
+
status_resp = result.response.network_status_response
|
545
|
+
while True:
|
546
|
+
try:
|
547
|
+
status_resp.network_responses.append(self._retry_q.get_nowait())
|
548
|
+
except queue.Empty:
|
549
|
+
break
|
550
|
+
except Exception as e:
|
551
|
+
logger.warning(f"Error emptying retry queue: {e}")
|
552
|
+
self._respond_result(result)
|
553
|
+
|
554
|
+
def send_request_login(self, record: "Record") -> None:
|
555
|
+
# TODO: do something with api_key or anonymous?
|
556
|
+
# TODO: return an error if we aren't logged in?
|
557
|
+
self._api.reauth()
|
558
|
+
viewer = self.get_viewer_info()
|
559
|
+
server_info = self.get_server_info()
|
560
|
+
# self._login_flags = json.loads(viewer.get("flags", "{}"))
|
561
|
+
# self._login_entity = viewer.get("entity")
|
562
|
+
if server_info:
|
563
|
+
logger.info(f"Login server info: {server_info}")
|
564
|
+
self._entity = viewer.get("entity")
|
565
|
+
if record.control.req_resp:
|
566
|
+
result = proto_util._result_from_record(record)
|
567
|
+
if self._entity:
|
568
|
+
result.response.login_response.active_entity = self._entity
|
569
|
+
self._respond_result(result)
|
570
|
+
|
571
|
+
def send_exit(self, record: "Record") -> None:
|
572
|
+
# track where the exit came from
|
573
|
+
self._record_exit = record
|
574
|
+
|
575
|
+
run_exit = record.exit
|
576
|
+
self._exit_code = run_exit.exit_code
|
577
|
+
logger.info("handling exit code: %s", run_exit.exit_code)
|
578
|
+
runtime = run_exit.runtime
|
579
|
+
logger.info("handling runtime: %s", run_exit.runtime)
|
580
|
+
self._metadata_summary["runtime"] = runtime
|
581
|
+
self._update_summary()
|
582
|
+
|
583
|
+
# We need to give the request queue a chance to empty between states
|
584
|
+
# so use handle_request_defer as a state machine.
|
585
|
+
logger.info("send defer")
|
586
|
+
self._interface.publish_defer()
|
587
|
+
|
588
|
+
def send_final(self, record: "Record") -> None:
|
589
|
+
pass
|
590
|
+
|
591
|
+
def _flush_run(self) -> None:
|
592
|
+
pass
|
593
|
+
|
594
|
+
def send_request_status_report(self, record: "Record") -> None:
|
595
|
+
# todo? this is just a noop to please wandb sync
|
596
|
+
pass
|
597
|
+
|
598
|
+
def send_request_defer(self, record: "Record") -> None: # noqa: C901
|
599
|
+
defer = record.request.defer
|
600
|
+
state = defer.state
|
601
|
+
logger.info(f"handle sender defer: {state}")
|
602
|
+
|
603
|
+
def transition_state() -> None:
|
604
|
+
state = defer.state + 1
|
605
|
+
logger.info(f"send defer: {state}")
|
606
|
+
self._interface.publish_defer(state)
|
607
|
+
|
608
|
+
done = False
|
609
|
+
if state == defer.BEGIN:
|
610
|
+
transition_state()
|
611
|
+
elif state == defer.FLUSH_RUN:
|
612
|
+
self._flush_run()
|
613
|
+
transition_state()
|
614
|
+
elif state == defer.FLUSH_STATS:
|
615
|
+
# NOTE: this is handled in handler.py:handle_request_defer()
|
616
|
+
transition_state()
|
617
|
+
elif state == defer.FLUSH_PARTIAL_HISTORY:
|
618
|
+
# NOTE: this is handled in handler.py:handle_request_defer()
|
619
|
+
transition_state()
|
620
|
+
elif state == defer.FLUSH_TB:
|
621
|
+
# NOTE: this is handled in handler.py:handle_request_defer()
|
622
|
+
transition_state()
|
623
|
+
elif state == defer.FLUSH_SUM:
|
624
|
+
# NOTE: this is handled in handler.py:handle_request_defer()
|
625
|
+
transition_state()
|
626
|
+
elif state == defer.FLUSH_DEBOUNCER:
|
627
|
+
self.debounce(final=True)
|
628
|
+
transition_state()
|
629
|
+
elif state == defer.FLUSH_OUTPUT:
|
630
|
+
self._output_raw_finish()
|
631
|
+
transition_state()
|
632
|
+
elif state == defer.FLUSH_JOB:
|
633
|
+
self._flush_job()
|
634
|
+
transition_state()
|
635
|
+
elif state == defer.FLUSH_DIR:
|
636
|
+
if self._dir_watcher:
|
637
|
+
self._dir_watcher.finish()
|
638
|
+
self._dir_watcher = None
|
639
|
+
transition_state()
|
640
|
+
elif state == defer.FLUSH_FP:
|
641
|
+
if self._pusher:
|
642
|
+
# FilePusher generates some events for FileStreamApi, so we
|
643
|
+
# need to wait for pusher to finish before going to the next
|
644
|
+
# state to ensure that filestream gets all the events that we
|
645
|
+
# want before telling it to finish up
|
646
|
+
self._pusher.finish(transition_state)
|
647
|
+
else:
|
648
|
+
transition_state()
|
649
|
+
elif state == defer.JOIN_FP:
|
650
|
+
if self._pusher:
|
651
|
+
self._pusher.join()
|
652
|
+
transition_state()
|
653
|
+
elif state == defer.FLUSH_FS:
|
654
|
+
if self._fs:
|
655
|
+
# TODO(jhr): now is a good time to output pending output lines
|
656
|
+
self._fs.finish(self._exit_code)
|
657
|
+
self._fs = None
|
658
|
+
transition_state()
|
659
|
+
elif state == defer.FLUSH_FINAL:
|
660
|
+
self._interface.publish_final()
|
661
|
+
self._interface.publish_footer()
|
662
|
+
transition_state()
|
663
|
+
elif state == defer.END:
|
664
|
+
done = True
|
665
|
+
else:
|
666
|
+
raise AssertionError("unknown state")
|
667
|
+
|
668
|
+
if not done:
|
669
|
+
return
|
670
|
+
|
671
|
+
exit_result = wandb_internal_pb2.RunExitResult()
|
672
|
+
|
673
|
+
# mark exit done in case we are polling on exit
|
674
|
+
self._exit_result = exit_result
|
675
|
+
|
676
|
+
# Report response to mailbox
|
677
|
+
if self._record_exit and self._record_exit.control.mailbox_slot:
|
678
|
+
result = proto_util._result_from_record(self._record_exit)
|
679
|
+
result.exit_result.CopyFrom(exit_result)
|
680
|
+
self._respond_result(result)
|
681
|
+
|
682
|
+
def send_request_poll_exit(self, record: "Record") -> None:
|
683
|
+
if not record.control.req_resp and not record.control.mailbox_slot:
|
684
|
+
return
|
685
|
+
|
686
|
+
result = proto_util._result_from_record(record)
|
687
|
+
|
688
|
+
if self._pusher:
|
689
|
+
_alive, status = self._pusher.get_status()
|
690
|
+
file_counts = self._pusher.file_counts_by_category()
|
691
|
+
resp = result.response.poll_exit_response
|
692
|
+
resp.pusher_stats.uploaded_bytes = status.uploaded_bytes
|
693
|
+
resp.pusher_stats.total_bytes = status.total_bytes
|
694
|
+
resp.pusher_stats.deduped_bytes = status.deduped_bytes
|
695
|
+
resp.file_counts.wandb_count = file_counts.wandb
|
696
|
+
resp.file_counts.media_count = file_counts.media
|
697
|
+
resp.file_counts.artifact_count = file_counts.artifact
|
698
|
+
resp.file_counts.other_count = file_counts.other
|
699
|
+
|
700
|
+
if self._exit_result:
|
701
|
+
result.response.poll_exit_response.done = True
|
702
|
+
result.response.poll_exit_response.exit_result.CopyFrom(self._exit_result)
|
703
|
+
|
704
|
+
self._respond_result(result)
|
705
|
+
|
706
|
+
def _setup_resume(
|
707
|
+
self, run: "RunRecord"
|
708
|
+
) -> Optional["wandb_internal_pb2.ErrorInfo"]:
|
709
|
+
"""Queries the backend for a run; fail if the settings are incompatible."""
|
710
|
+
if not self._settings.resume:
|
711
|
+
return None
|
712
|
+
|
713
|
+
# TODO: This causes a race, we need to make the upsert atomically
|
714
|
+
# only create or update depending on the resume config
|
715
|
+
# we use the runs entity if set, otherwise fallback to users entity
|
716
|
+
# todo: ensure entity is not None as self._entity is Optional[str]
|
717
|
+
entity = run.entity or self._entity
|
718
|
+
logger.info(
|
719
|
+
"checking resume status for %s/%s/%s", entity, run.project, run.run_id
|
720
|
+
)
|
721
|
+
resume_status = self._api.run_resume_status(
|
722
|
+
entity=entity, # type: ignore
|
723
|
+
project_name=run.project,
|
724
|
+
name=run.run_id,
|
725
|
+
)
|
726
|
+
# No resume status = run does not exist; No t key in wandbConfig = run exists but hasn't been inited
|
727
|
+
if not resume_status or '"t":' not in resume_status.get("wandbConfig", ""):
|
728
|
+
if self._settings.resume == "must":
|
729
|
+
error = wandb_internal_pb2.ErrorInfo()
|
730
|
+
error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
|
731
|
+
error.message = (
|
732
|
+
"You provided an invalid value for the `resume` argument."
|
733
|
+
f" The value 'must' is not a valid option for resuming a run ({run.run_id}) that has not been initialized."
|
734
|
+
" Please check your inputs and try again with a valid run ID."
|
735
|
+
" If you are trying to start a new run, please omit the `resume` argument or use `resume='allow'`."
|
736
|
+
)
|
737
|
+
return error
|
738
|
+
return None
|
739
|
+
|
740
|
+
#
|
741
|
+
# handle cases where we have resume_status
|
742
|
+
#
|
743
|
+
if self._settings.resume == "never":
|
744
|
+
error = wandb_internal_pb2.ErrorInfo()
|
745
|
+
error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
|
746
|
+
error.message = (
|
747
|
+
"You provided an invalid value for the `resume` argument."
|
748
|
+
f" The value 'never' is not a valid option for resuming a run ({run.run_id}) that already exists."
|
749
|
+
" Please check your inputs and try again with a valid value for the `resume` argument."
|
750
|
+
)
|
751
|
+
return error
|
752
|
+
|
753
|
+
history = {}
|
754
|
+
events = {}
|
755
|
+
config = {}
|
756
|
+
summary = {}
|
757
|
+
try:
|
758
|
+
events_rt = 0
|
759
|
+
history_rt = 0
|
760
|
+
history = json.loads(resume_status["historyTail"])
|
761
|
+
if history:
|
762
|
+
history = json.loads(history[-1])
|
763
|
+
history_rt = history.get("_runtime", 0)
|
764
|
+
events = json.loads(resume_status["eventsTail"])
|
765
|
+
if events:
|
766
|
+
events = json.loads(events[-1])
|
767
|
+
events_rt = events.get("_runtime", 0)
|
768
|
+
config = json.loads(resume_status["config"] or "{}")
|
769
|
+
summary = json.loads(resume_status["summaryMetrics"] or "{}")
|
770
|
+
new_runtime = summary.get("_wandb", {}).get("runtime", None)
|
771
|
+
if new_runtime is not None:
|
772
|
+
self._resume_state.wandb_runtime = new_runtime
|
773
|
+
tags = resume_status.get("tags") or []
|
774
|
+
|
775
|
+
except (IndexError, ValueError) as e:
|
776
|
+
logger.error("unable to load resume tails", exc_info=e)
|
777
|
+
if self._settings.resume == "must":
|
778
|
+
error = wandb_internal_pb2.ErrorInfo()
|
779
|
+
error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
|
780
|
+
error.message = "resume='must' but could not resume ({}) ".format(
|
781
|
+
run.run_id
|
782
|
+
)
|
783
|
+
return error
|
784
|
+
|
785
|
+
# TODO: Do we need to restore config / summary?
|
786
|
+
# System metrics runtime is usually greater than history
|
787
|
+
self._resume_state.runtime = max(events_rt, history_rt)
|
788
|
+
last_step = history.get("_step", 0)
|
789
|
+
history_line_count = resume_status["historyLineCount"]
|
790
|
+
self._resume_state.step = last_step + 1 if history_line_count > 0 else last_step
|
791
|
+
self._resume_state.history = history_line_count
|
792
|
+
self._resume_state.events = resume_status["eventsLineCount"]
|
793
|
+
self._resume_state.output = resume_status["logLineCount"]
|
794
|
+
self._resume_state.config = config
|
795
|
+
self._resume_state.summary = summary
|
796
|
+
self._resume_state.tags = tags
|
797
|
+
self._resume_state.resumed = True
|
798
|
+
logger.info("configured resuming with: {}".format(self._resume_state))
|
799
|
+
return None
|
800
|
+
|
801
|
+
def _telemetry_get_framework(self) -> str:
|
802
|
+
"""Get telemetry data for internal config structure."""
|
803
|
+
# detect framework by checking what is loaded
|
804
|
+
imports: telemetry.TelemetryImports
|
805
|
+
if self._telemetry_obj.HasField("imports_finish"):
|
806
|
+
imports = self._telemetry_obj.imports_finish
|
807
|
+
elif self._telemetry_obj.HasField("imports_init"):
|
808
|
+
imports = self._telemetry_obj.imports_init
|
809
|
+
else:
|
810
|
+
return ""
|
811
|
+
framework = next(
|
812
|
+
(n for f, n in _framework_priority() if getattr(imports, f, False)), ""
|
813
|
+
)
|
814
|
+
return framework
|
815
|
+
|
816
|
+
def _config_backend_dict(self) -> sender_config.BackendConfigDict:
|
817
|
+
config = self._consolidated_config or sender_config.ConfigState()
|
818
|
+
|
819
|
+
return config.to_backend_dict(
|
820
|
+
telemetry_record=self._telemetry_obj,
|
821
|
+
framework=self._telemetry_get_framework(),
|
822
|
+
start_time_millis=self._start_time,
|
823
|
+
metric_pbdicts=self._config_metric_pbdict_list,
|
824
|
+
)
|
825
|
+
|
826
|
+
def _config_save(
|
827
|
+
self,
|
828
|
+
config_value_dict: sender_config.BackendConfigDict,
|
829
|
+
) -> None:
|
830
|
+
config_path = os.path.join(self._settings.files_dir, "config.yaml")
|
831
|
+
config_util.save_config_file_from_dict(config_path, config_value_dict)
|
832
|
+
|
833
|
+
def _sync_spell(self) -> None:
|
834
|
+
"""Sync this run with spell."""
|
835
|
+
if not self._run:
|
836
|
+
return
|
837
|
+
try:
|
838
|
+
env = os.environ
|
839
|
+
self._interface.publish_config(
|
840
|
+
key=("_wandb", "spell_url"), val=env.get("SPELL_RUN_URL")
|
841
|
+
)
|
842
|
+
url = "{}/{}/{}/runs/{}".format(
|
843
|
+
self._api.app_url, self._run.entity, self._run.project, self._run.run_id
|
844
|
+
)
|
845
|
+
requests.put(
|
846
|
+
env.get("SPELL_API_URL", "https://api.spell.run") + "/wandb_url",
|
847
|
+
json={"access_token": env.get("WANDB_ACCESS_TOKEN"), "url": url},
|
848
|
+
timeout=2,
|
849
|
+
)
|
850
|
+
except requests.RequestException:
|
851
|
+
pass
|
852
|
+
# TODO: do something if sync spell is not successful?
|
853
|
+
|
854
|
+
def _setup_fork(self, server_run: dict):
|
855
|
+
assert self._settings.fork_from
|
856
|
+
assert self._settings.fork_from.metric == "_step"
|
857
|
+
assert self._run
|
858
|
+
first_step = int(self._settings.fork_from.value) + 1
|
859
|
+
self._resume_state.step = first_step
|
860
|
+
self._resume_state.history = server_run.get("historyLineCount", 0)
|
861
|
+
self._run.forked = True
|
862
|
+
self._run.starting_step = first_step
|
863
|
+
|
864
|
+
def _load_rewind_state(self, run: "RunRecord"):
|
865
|
+
assert self._settings.resume_from
|
866
|
+
self._rewind_response = self._api.rewind_run(
|
867
|
+
run_name=run.run_id,
|
868
|
+
entity=run.entity or None,
|
869
|
+
project=run.project or None,
|
870
|
+
metric_name=self._settings.resume_from.metric,
|
871
|
+
metric_value=self._settings.resume_from.value,
|
872
|
+
program_path=self._settings.program or None,
|
873
|
+
)
|
874
|
+
self._resume_state.history = self._rewind_response.get("historyLineCount", 0)
|
875
|
+
self._resume_state.config = json.loads(
|
876
|
+
self._rewind_response.get("config", "{}")
|
877
|
+
)
|
878
|
+
|
879
|
+
def _install_rewind_state(self):
|
880
|
+
assert self._settings.resume_from
|
881
|
+
assert self._settings.resume_from.metric == "_step"
|
882
|
+
assert self._run
|
883
|
+
assert self._rewind_response
|
884
|
+
|
885
|
+
first_step = int(self._settings.resume_from.value) + 1
|
886
|
+
self._resume_state.step = first_step
|
887
|
+
|
888
|
+
# We set the fork flag here because rewind uses the forking
|
889
|
+
# infrastructure under the hood. Setting `forked` here
|
890
|
+
# ensures that run._step is properly set in the user process.
|
891
|
+
self._run.forked = True
|
892
|
+
self._run.starting_step = first_step
|
893
|
+
|
894
|
+
def _handle_error(
|
895
|
+
self,
|
896
|
+
record: "Record",
|
897
|
+
error: "wandb_internal_pb2.ErrorInfo",
|
898
|
+
run: "RunRecord",
|
899
|
+
) -> None:
|
900
|
+
if record.control.req_resp or record.control.mailbox_slot:
|
901
|
+
result = proto_util._result_from_record(record)
|
902
|
+
result.run_result.run.CopyFrom(run)
|
903
|
+
result.run_result.error.CopyFrom(error)
|
904
|
+
self._respond_result(result)
|
905
|
+
else:
|
906
|
+
logger.error("Got error in async mode: %s", error.message)
|
907
|
+
|
908
|
+
def send_run(self, record: "Record", file_dir: Optional[str] = None) -> None:
|
909
|
+
run = record.run
|
910
|
+
error = None
|
911
|
+
is_wandb_init = self._run is None
|
912
|
+
|
913
|
+
# save start time of a run
|
914
|
+
self._start_time = int(run.start_time.ToMicroseconds() // 1e6)
|
915
|
+
|
916
|
+
# update telemetry
|
917
|
+
if run.telemetry:
|
918
|
+
self._telemetry_obj.MergeFrom(run.telemetry)
|
919
|
+
if self._settings._sync:
|
920
|
+
self._telemetry_obj.feature.sync = True
|
921
|
+
|
922
|
+
# build config dict
|
923
|
+
config_value_dict: Optional[sender_config.BackendConfigDict] = None
|
924
|
+
if run.config:
|
925
|
+
self._consolidated_config.update_from_proto(run.config)
|
926
|
+
config_value_dict = self._config_backend_dict()
|
927
|
+
self._config_save(config_value_dict)
|
928
|
+
|
929
|
+
do_fork = self._settings.fork_from is not None and is_wandb_init
|
930
|
+
do_rewind = self._settings.resume_from is not None and is_wandb_init
|
931
|
+
do_resume = bool(self._settings.resume)
|
932
|
+
|
933
|
+
num_resume_options_set = sum([do_fork, do_rewind, do_resume])
|
934
|
+
if num_resume_options_set > 1:
|
935
|
+
error = wandb_internal_pb2.ErrorInfo()
|
936
|
+
error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
|
937
|
+
error.message = (
|
938
|
+
"Multiple resume options specified. "
|
939
|
+
"Please specify only one of `fork_from`, `resume`, or `resume_from`."
|
940
|
+
)
|
941
|
+
self._handle_error(record, error, run)
|
942
|
+
|
943
|
+
if is_wandb_init:
|
944
|
+
# Ensure we have a project to query for status
|
945
|
+
if run.project == "":
|
946
|
+
run.project = util.auto_project_name(self._settings.program)
|
947
|
+
# Only check resume status on `wandb.init`
|
948
|
+
|
949
|
+
if do_resume:
|
950
|
+
error = self._setup_resume(run)
|
951
|
+
|
952
|
+
elif do_rewind:
|
953
|
+
error = self._load_rewind_state(run)
|
954
|
+
|
955
|
+
if error is not None:
|
956
|
+
self._handle_error(record, error, run)
|
957
|
+
return
|
958
|
+
|
959
|
+
# Save the resumed config
|
960
|
+
if self._resume_state.config is not None:
|
961
|
+
self._consolidated_config.merge_resumed_config(
|
962
|
+
config_util.dict_strip_value_dict(self._resume_state.config)
|
963
|
+
)
|
964
|
+
|
965
|
+
config_value_dict = self._config_backend_dict()
|
966
|
+
self._config_save(config_value_dict)
|
967
|
+
|
968
|
+
# handle empty config
|
969
|
+
# TODO(jhr): consolidate the 4 ways config is built:
|
970
|
+
# (passed config, empty config, resume config, send_config)
|
971
|
+
if not config_value_dict:
|
972
|
+
config_value_dict = self._config_backend_dict()
|
973
|
+
self._config_save(config_value_dict)
|
974
|
+
|
975
|
+
try:
|
976
|
+
server_run = self._init_run(run, config_value_dict)
|
977
|
+
except (CommError, UsageError) as e:
|
978
|
+
logger.error(e, exc_info=True)
|
979
|
+
error = ProtobufErrorHandler.from_exception(e)
|
980
|
+
self._handle_error(record, error, run)
|
981
|
+
return
|
982
|
+
|
983
|
+
assert self._run # self._run is configured in _init_run()
|
984
|
+
|
985
|
+
if do_fork:
|
986
|
+
error = self._setup_fork(server_run)
|
987
|
+
|
988
|
+
if error is not None:
|
989
|
+
self._handle_error(record, error, run)
|
990
|
+
return
|
991
|
+
|
992
|
+
if record.control.req_resp or record.control.mailbox_slot:
|
993
|
+
result = proto_util._result_from_record(record)
|
994
|
+
# TODO: we could do self._interface.publish_defer(resp) to notify
|
995
|
+
# the handler not to actually perform server updates for this uuid
|
996
|
+
# because the user process will send a summary update when we resume
|
997
|
+
result.run_result.run.CopyFrom(self._run)
|
998
|
+
self._respond_result(result)
|
999
|
+
|
1000
|
+
# Only spin up our threads on the first run message
|
1001
|
+
if is_wandb_init:
|
1002
|
+
self._start_run_threads(file_dir)
|
1003
|
+
else:
|
1004
|
+
logger.info("updated run: %s", self._run.run_id)
|
1005
|
+
|
1006
|
+
def _update_resume_state(self, is_rewinding: bool, inserted: bool):
|
1007
|
+
assert self._run
|
1008
|
+
if self._resume_state.resumed:
|
1009
|
+
self._run.resumed = True
|
1010
|
+
if self._resume_state.wandb_runtime is not None:
|
1011
|
+
self._run.runtime = self._resume_state.wandb_runtime
|
1012
|
+
elif is_rewinding:
|
1013
|
+
# because is_rewinding is mutually exclusive with self._resume_state.resumed,
|
1014
|
+
# this block will always execute if is_rewinding is set
|
1015
|
+
self._install_rewind_state()
|
1016
|
+
else:
|
1017
|
+
# If the user is not resuming, and we didn't insert on upsert_run then
|
1018
|
+
# it is likely that we are overwriting the run which we might want to
|
1019
|
+
# prevent in the future. This could be a false signal since an upsert_run
|
1020
|
+
# message which gets retried in the network could also show up as not
|
1021
|
+
# inserted.
|
1022
|
+
if not inserted:
|
1023
|
+
# no need to flush this, it will get updated eventually
|
1024
|
+
self._telemetry_obj.feature.maybe_run_overwrite = True
|
1025
|
+
|
1026
|
+
def _init_run(
|
1027
|
+
self,
|
1028
|
+
run: "RunRecord",
|
1029
|
+
config_dict: Optional[sender_config.BackendConfigDict],
|
1030
|
+
) -> dict:
|
1031
|
+
# We subtract the previous runs runtime when resuming
|
1032
|
+
start_time = (
|
1033
|
+
run.start_time.ToMicroseconds() / 1e6
|
1034
|
+
) - self._resume_state.runtime
|
1035
|
+
# TODO: we don't check inserted currently, ultimately we should make
|
1036
|
+
# the upsert know the resume state and fail transactionally
|
1037
|
+
|
1038
|
+
if self._resume_state and self._resume_state.tags and not run.tags:
|
1039
|
+
run.tags.extend(self._resume_state.tags)
|
1040
|
+
|
1041
|
+
is_rewinding = bool(self._settings.resume_from)
|
1042
|
+
if is_rewinding:
|
1043
|
+
assert self._rewind_response
|
1044
|
+
server_run = self._rewind_response
|
1045
|
+
server_messages = None
|
1046
|
+
inserted = True
|
1047
|
+
else:
|
1048
|
+
server_run, inserted, server_messages = self._api.upsert_run(
|
1049
|
+
name=run.run_id,
|
1050
|
+
entity=run.entity or None,
|
1051
|
+
project=run.project or None,
|
1052
|
+
group=run.run_group or None,
|
1053
|
+
job_type=run.job_type or None,
|
1054
|
+
display_name=run.display_name or None,
|
1055
|
+
notes=run.notes or None,
|
1056
|
+
tags=run.tags[:] or None,
|
1057
|
+
config=config_dict or None,
|
1058
|
+
sweep_name=run.sweep_id or None,
|
1059
|
+
host=run.host or None,
|
1060
|
+
program_path=self._settings.program or None,
|
1061
|
+
repo=run.git.remote_url or None,
|
1062
|
+
commit=run.git.commit or None,
|
1063
|
+
)
|
1064
|
+
|
1065
|
+
# TODO: we don't want to create jobs in sweeps, since the
|
1066
|
+
# executable doesn't appear to be consistent
|
1067
|
+
if run.sweep_id:
|
1068
|
+
self._job_builder.disable = True
|
1069
|
+
|
1070
|
+
self._server_messages = server_messages or []
|
1071
|
+
self._run = run
|
1072
|
+
|
1073
|
+
if self._resume_state.resumed and is_rewinding:
|
1074
|
+
# this should not ever be possible to hit, since we check for
|
1075
|
+
# resumption above and raise an error if resumption is specified
|
1076
|
+
# twice.
|
1077
|
+
raise ValueError(
|
1078
|
+
"Cannot attempt to rewind and resume a run - only one of "
|
1079
|
+
"`resume` or `resume_from` can be specified."
|
1080
|
+
)
|
1081
|
+
|
1082
|
+
self._update_resume_state(is_rewinding, inserted)
|
1083
|
+
self._run.starting_step = self._resume_state.step
|
1084
|
+
self._run.start_time.FromMicroseconds(int(start_time * 1e6))
|
1085
|
+
self._run.config.CopyFrom(self._interface._make_config(config_dict))
|
1086
|
+
if self._resume_state.summary is not None:
|
1087
|
+
self._run.summary.CopyFrom(
|
1088
|
+
self._interface._make_summary_from_dict(self._resume_state.summary)
|
1089
|
+
)
|
1090
|
+
storage_id = server_run.get("id")
|
1091
|
+
if storage_id:
|
1092
|
+
self._run.storage_id = storage_id
|
1093
|
+
id = server_run.get("name")
|
1094
|
+
if id:
|
1095
|
+
self._api.set_current_run_id(id)
|
1096
|
+
display_name = server_run.get("displayName")
|
1097
|
+
if display_name:
|
1098
|
+
self._run.display_name = display_name
|
1099
|
+
project = server_run.get("project")
|
1100
|
+
# TODO: remove self._api.set_settings, and make self._project a property?
|
1101
|
+
if project:
|
1102
|
+
project_name = project.get("name")
|
1103
|
+
if project_name:
|
1104
|
+
self._run.project = project_name
|
1105
|
+
self._project = project_name
|
1106
|
+
self._api_settings["project"] = project_name
|
1107
|
+
self._api.set_setting("project", project_name)
|
1108
|
+
entity = project.get("entity")
|
1109
|
+
if entity:
|
1110
|
+
entity_name = entity.get("name")
|
1111
|
+
if entity_name:
|
1112
|
+
self._run.entity = entity_name
|
1113
|
+
self._entity = entity_name
|
1114
|
+
self._api_settings["entity"] = entity_name
|
1115
|
+
self._api.set_setting("entity", entity_name)
|
1116
|
+
sweep_id = server_run.get("sweepName")
|
1117
|
+
if sweep_id:
|
1118
|
+
self._run.sweep_id = sweep_id
|
1119
|
+
if os.getenv("SPELL_RUN_URL"):
|
1120
|
+
self._sync_spell()
|
1121
|
+
return server_run
|
1122
|
+
|
1123
|
+
def _start_run_threads(self, file_dir: Optional[str] = None) -> None:
|
1124
|
+
assert self._run # self._run is configured by caller
|
1125
|
+
self._fs = file_stream.FileStreamApi(
|
1126
|
+
self._api,
|
1127
|
+
self._run.run_id,
|
1128
|
+
self._run.start_time.ToMicroseconds() / 1e6,
|
1129
|
+
timeout=self._settings._file_stream_timeout_seconds,
|
1130
|
+
settings=self._api_settings,
|
1131
|
+
)
|
1132
|
+
# Ensure the streaming polices have the proper offsets
|
1133
|
+
self._fs.set_file_policy("wandb-summary.json", file_stream.SummaryFilePolicy())
|
1134
|
+
self._fs.set_file_policy(
|
1135
|
+
"wandb-history.jsonl",
|
1136
|
+
file_stream.JsonlFilePolicy(start_chunk_id=self._resume_state.history),
|
1137
|
+
)
|
1138
|
+
self._fs.set_file_policy(
|
1139
|
+
"wandb-events.jsonl",
|
1140
|
+
file_stream.JsonlFilePolicy(start_chunk_id=self._resume_state.events),
|
1141
|
+
)
|
1142
|
+
self._fs.set_file_policy(
|
1143
|
+
"output.log",
|
1144
|
+
file_stream.CRDedupeFilePolicy(start_chunk_id=self._resume_state.output),
|
1145
|
+
)
|
1146
|
+
|
1147
|
+
# hack to merge run_settings and self._settings object together
|
1148
|
+
# so that fields like entity or project are available to be attached to Sentry events.
|
1149
|
+
run_settings = message_to_dict(self._run)
|
1150
|
+
_settings = dict(self._settings)
|
1151
|
+
_settings.update(run_settings)
|
1152
|
+
wandb._sentry.configure_scope(tags=_settings, process_context="internal")
|
1153
|
+
|
1154
|
+
self._fs.start()
|
1155
|
+
self._pusher = FilePusher(self._api, self._fs, settings=self._settings)
|
1156
|
+
self._dir_watcher = DirWatcher(self._settings, self._pusher, file_dir)
|
1157
|
+
logger.info(
|
1158
|
+
"run started: %s with start time %s",
|
1159
|
+
self._run.run_id,
|
1160
|
+
self._run.start_time.ToMicroseconds() / 1e6,
|
1161
|
+
)
|
1162
|
+
|
1163
|
+
def _save_history(self, history_dict: Dict[str, Any]) -> None:
|
1164
|
+
if self._fs:
|
1165
|
+
self._fs.push(filenames.HISTORY_FNAME, json.dumps(history_dict))
|
1166
|
+
|
1167
|
+
def send_history(self, record: "Record") -> None:
|
1168
|
+
history = record.history
|
1169
|
+
history_dict = proto_util.dict_from_proto_list(history.item)
|
1170
|
+
self._save_history(history_dict)
|
1171
|
+
|
1172
|
+
def _update_summary_record(self, summary: "SummaryRecord") -> None:
|
1173
|
+
summary_dict = proto_util.dict_from_proto_list(summary.update)
|
1174
|
+
self._cached_summary = summary_dict
|
1175
|
+
self._update_summary()
|
1176
|
+
|
1177
|
+
def send_summary(self, record: "Record") -> None:
|
1178
|
+
self._update_summary_record(record.summary)
|
1179
|
+
|
1180
|
+
def send_request_summary_record(self, record: "Record") -> None:
|
1181
|
+
self._update_summary_record(record.request.summary_record.summary)
|
1182
|
+
|
1183
|
+
def _update_summary(self) -> None:
|
1184
|
+
summary_dict = self._cached_summary.copy()
|
1185
|
+
summary_dict.pop("_wandb", None)
|
1186
|
+
if self._metadata_summary:
|
1187
|
+
summary_dict["_wandb"] = self._metadata_summary
|
1188
|
+
# merge with consolidated summary
|
1189
|
+
self._consolidated_summary.update(summary_dict)
|
1190
|
+
json_summary = json.dumps(self._consolidated_summary)
|
1191
|
+
if self._fs:
|
1192
|
+
self._fs.push(filenames.SUMMARY_FNAME, json_summary)
|
1193
|
+
# TODO(jhr): we should only write this at the end of the script
|
1194
|
+
summary_path = os.path.join(self._settings.files_dir, filenames.SUMMARY_FNAME)
|
1195
|
+
with open(summary_path, "w") as f:
|
1196
|
+
f.write(json_summary)
|
1197
|
+
self._save_file(interface.GlobStr(filenames.SUMMARY_FNAME))
|
1198
|
+
|
1199
|
+
def send_stats(self, record: "Record") -> None:
|
1200
|
+
stats = record.stats
|
1201
|
+
if stats.stats_type != wandb_internal_pb2.StatsRecord.StatsType.SYSTEM:
|
1202
|
+
return
|
1203
|
+
if not self._fs:
|
1204
|
+
return
|
1205
|
+
if not self._run:
|
1206
|
+
return
|
1207
|
+
now_us = stats.timestamp.ToMicroseconds()
|
1208
|
+
start_us = self._run.start_time.ToMicroseconds()
|
1209
|
+
d = dict()
|
1210
|
+
for item in stats.item:
|
1211
|
+
d[item.key] = json.loads(item.value_json)
|
1212
|
+
row: Dict[str, Any] = dict(system=d)
|
1213
|
+
self._flatten(row)
|
1214
|
+
row["_wandb"] = True
|
1215
|
+
row["_timestamp"] = now_us / 1e6
|
1216
|
+
row["_runtime"] = (now_us - start_us) / 1e6
|
1217
|
+
self._fs.push(filenames.EVENTS_FNAME, json.dumps(row))
|
1218
|
+
# TODO(jhr): check fs.push results?
|
1219
|
+
|
1220
|
+
def _output_raw_finish(self) -> None:
|
1221
|
+
for stream, output_raw in self._output_raw_streams.items():
|
1222
|
+
output_raw._stopped.set()
|
1223
|
+
|
1224
|
+
# shut down threads
|
1225
|
+
output_raw._writer_thr.join(timeout=5)
|
1226
|
+
if output_raw._writer_thr.is_alive():
|
1227
|
+
logger.info("processing output...")
|
1228
|
+
output_raw._writer_thr.join()
|
1229
|
+
output_raw._reader_thr.join()
|
1230
|
+
|
1231
|
+
# flush output buffers and files
|
1232
|
+
self._output_raw_flush(stream)
|
1233
|
+
self._output_raw_streams = {}
|
1234
|
+
if self._output_raw_file:
|
1235
|
+
self._output_raw_file.close()
|
1236
|
+
self._output_raw_file = None
|
1237
|
+
|
1238
|
+
def _output_raw_writer_thread(self, stream: "StreamLiterals") -> None:
|
1239
|
+
while True:
|
1240
|
+
output_raw = self._output_raw_streams[stream]
|
1241
|
+
if output_raw._queue.empty():
|
1242
|
+
if output_raw._stopped.is_set():
|
1243
|
+
return
|
1244
|
+
time.sleep(0.5)
|
1245
|
+
continue
|
1246
|
+
data = []
|
1247
|
+
while not output_raw._queue.empty():
|
1248
|
+
data.append(output_raw._queue.get())
|
1249
|
+
if output_raw._stopped.is_set() and sum(map(len, data)) > 100000:
|
1250
|
+
logger.warning("Terminal output too large. Logging without processing.")
|
1251
|
+
self._output_raw_flush(stream)
|
1252
|
+
for line in data:
|
1253
|
+
self._output_raw_flush(stream, line)
|
1254
|
+
# TODO: lets mark that this happened in telemetry
|
1255
|
+
return
|
1256
|
+
try:
|
1257
|
+
output_raw._emulator.write("".join(data))
|
1258
|
+
except Exception as e:
|
1259
|
+
logger.warning(f"problem writing to output_raw emulator: {e}")
|
1260
|
+
|
1261
|
+
def _output_raw_reader_thread(self, stream: "StreamLiterals") -> None:
|
1262
|
+
output_raw = self._output_raw_streams[stream]
|
1263
|
+
while not (output_raw._stopped.is_set() and output_raw._queue.empty()):
|
1264
|
+
self._output_raw_flush(stream)
|
1265
|
+
time.sleep(_OUTPUT_MIN_CALLBACK_INTERVAL)
|
1266
|
+
|
1267
|
+
def _output_raw_flush(
|
1268
|
+
self, stream: "StreamLiterals", data: Optional[str] = None
|
1269
|
+
) -> None:
|
1270
|
+
if data is None:
|
1271
|
+
output_raw = self._output_raw_streams[stream]
|
1272
|
+
try:
|
1273
|
+
data = output_raw._emulator.read()
|
1274
|
+
except Exception as e:
|
1275
|
+
logger.warning(f"problem reading from output_raw emulator: {e}")
|
1276
|
+
if data:
|
1277
|
+
self._send_output_line(stream, data)
|
1278
|
+
if self._output_raw_file:
|
1279
|
+
self._output_raw_file.write(data.encode("utf-8"))
|
1280
|
+
|
1281
|
+
def send_request_python_packages(self, record: "Record") -> None:
|
1282
|
+
import os
|
1283
|
+
|
1284
|
+
from wandb.sdk.lib.filenames import REQUIREMENTS_FNAME
|
1285
|
+
|
1286
|
+
installed_packages_list = sorted(
|
1287
|
+
f"{r.name}=={r.version}" for r in record.request.python_packages.package
|
1288
|
+
)
|
1289
|
+
with open(os.path.join(self._settings.files_dir, REQUIREMENTS_FNAME), "w") as f:
|
1290
|
+
f.write("\n".join(installed_packages_list))
|
1291
|
+
|
1292
|
+
def send_output(self, record: "Record") -> None:
|
1293
|
+
if not self._fs:
|
1294
|
+
return
|
1295
|
+
out = record.output
|
1296
|
+
stream: StreamLiterals = "stdout"
|
1297
|
+
if out.output_type == wandb_internal_pb2.OutputRecord.OutputType.STDERR:
|
1298
|
+
stream = "stderr"
|
1299
|
+
line = out.line
|
1300
|
+
self._send_output_line(stream, line)
|
1301
|
+
|
1302
|
+
def send_output_raw(self, record: "Record") -> None:
|
1303
|
+
if not self._fs:
|
1304
|
+
return
|
1305
|
+
out = record.output_raw
|
1306
|
+
stream: StreamLiterals = "stdout"
|
1307
|
+
if out.output_type == wandb_internal_pb2.OutputRawRecord.OutputType.STDERR:
|
1308
|
+
stream = "stderr"
|
1309
|
+
line = out.line
|
1310
|
+
|
1311
|
+
output_raw = self._output_raw_streams.get(stream)
|
1312
|
+
if not output_raw:
|
1313
|
+
output_raw = _OutputRawStream(stream=stream, sm=self)
|
1314
|
+
self._output_raw_streams[stream] = output_raw
|
1315
|
+
|
1316
|
+
# open the console output file shared between both streams
|
1317
|
+
if not self._output_raw_file:
|
1318
|
+
output_log_path = os.path.join(
|
1319
|
+
self._settings.files_dir, filenames.OUTPUT_FNAME
|
1320
|
+
)
|
1321
|
+
output_raw_file = None
|
1322
|
+
try:
|
1323
|
+
output_raw_file = filesystem.CRDedupedFile(
|
1324
|
+
open(output_log_path, "wb")
|
1325
|
+
)
|
1326
|
+
except OSError as e:
|
1327
|
+
logger.warning(f"could not open output_raw_file: {e}")
|
1328
|
+
if output_raw_file:
|
1329
|
+
self._output_raw_file = output_raw_file
|
1330
|
+
output_raw.start()
|
1331
|
+
|
1332
|
+
output_raw._queue.put(line)
|
1333
|
+
|
1334
|
+
def _send_output_line(self, stream: "StreamLiterals", line: str) -> None:
|
1335
|
+
"""Combined writer for raw and non raw output lines.
|
1336
|
+
|
1337
|
+
This is combined because they are both post emulator.
|
1338
|
+
"""
|
1339
|
+
prepend = ""
|
1340
|
+
if stream == "stderr":
|
1341
|
+
prepend = "ERROR "
|
1342
|
+
if not line.endswith("\n"):
|
1343
|
+
self._partial_output.setdefault(stream, "")
|
1344
|
+
if line.startswith("\r"):
|
1345
|
+
# TODO: maybe we shouldnt just drop this, what if there was some \ns in the partial
|
1346
|
+
# that should probably be the check instead of not line.endswith(\n")
|
1347
|
+
# logger.info(f"Dropping data {self._partial_output[stream]}")
|
1348
|
+
self._partial_output[stream] = ""
|
1349
|
+
self._partial_output[stream] += line
|
1350
|
+
# TODO(jhr): how do we make sure this gets flushed?
|
1351
|
+
# we might need this for other stuff like telemetry
|
1352
|
+
else:
|
1353
|
+
# TODO(jhr): use time from timestamp proto
|
1354
|
+
# TODO(jhr): do we need to make sure we write full lines?
|
1355
|
+
# seems to be some issues with line breaks
|
1356
|
+
cur_time = time.time()
|
1357
|
+
timestamp = datetime.utcfromtimestamp(cur_time).isoformat() + " "
|
1358
|
+
prev_str = self._partial_output.get(stream, "")
|
1359
|
+
line = f"{prepend}{timestamp}{prev_str}{line}"
|
1360
|
+
if self._fs:
|
1361
|
+
self._fs.push(filenames.OUTPUT_FNAME, line)
|
1362
|
+
self._partial_output[stream] = ""
|
1363
|
+
|
1364
|
+
def _update_config(self) -> None:
|
1365
|
+
self._config_needs_debounce = True
|
1366
|
+
|
1367
|
+
def send_config(self, record: "Record") -> None:
|
1368
|
+
self._consolidated_config.update_from_proto(record.config)
|
1369
|
+
self._update_config()
|
1370
|
+
|
1371
|
+
def send_metric(self, record: "Record") -> None:
|
1372
|
+
metric = record.metric
|
1373
|
+
if metric.glob_name:
|
1374
|
+
logger.warning("Seen metric with glob (shouldn't happen)")
|
1375
|
+
return
|
1376
|
+
|
1377
|
+
# merge or overwrite
|
1378
|
+
old_metric = self._config_metric_dict.get(
|
1379
|
+
metric.name, wandb_internal_pb2.MetricRecord()
|
1380
|
+
)
|
1381
|
+
if metric._control.overwrite:
|
1382
|
+
old_metric.CopyFrom(metric)
|
1383
|
+
else:
|
1384
|
+
old_metric.MergeFrom(metric)
|
1385
|
+
self._config_metric_dict[metric.name] = old_metric
|
1386
|
+
metric = old_metric
|
1387
|
+
|
1388
|
+
# convert step_metric to index
|
1389
|
+
if metric.step_metric:
|
1390
|
+
find_step_idx = self._config_metric_index_dict.get(metric.step_metric)
|
1391
|
+
if find_step_idx is not None:
|
1392
|
+
# make a copy of this metric as we will be modifying it
|
1393
|
+
rec = wandb_internal_pb2.Record()
|
1394
|
+
rec.metric.CopyFrom(metric)
|
1395
|
+
metric = rec.metric
|
1396
|
+
|
1397
|
+
metric.ClearField("step_metric")
|
1398
|
+
metric.step_metric_index = find_step_idx + 1
|
1399
|
+
|
1400
|
+
md: Dict[int, Any] = proto_util.proto_encode_to_dict(metric)
|
1401
|
+
find_idx = self._config_metric_index_dict.get(metric.name)
|
1402
|
+
if find_idx is not None:
|
1403
|
+
self._config_metric_pbdict_list[find_idx] = md
|
1404
|
+
else:
|
1405
|
+
next_idx = len(self._config_metric_pbdict_list)
|
1406
|
+
self._config_metric_pbdict_list.append(md)
|
1407
|
+
self._config_metric_index_dict[metric.name] = next_idx
|
1408
|
+
self._update_config()
|
1409
|
+
|
1410
|
+
def _update_telemetry_record(self, telemetry: telemetry.TelemetryRecord) -> None:
|
1411
|
+
self._telemetry_obj.MergeFrom(telemetry)
|
1412
|
+
self._update_config()
|
1413
|
+
|
1414
|
+
def send_telemetry(self, record: "Record") -> None:
|
1415
|
+
self._update_telemetry_record(record.telemetry)
|
1416
|
+
|
1417
|
+
def send_request_telemetry_record(self, record: "Record") -> None:
|
1418
|
+
self._update_telemetry_record(record.request.telemetry_record.telemetry)
|
1419
|
+
|
1420
|
+
def _save_file(
|
1421
|
+
self, fname: interface.GlobStr, policy: "interface.PolicyName" = "end"
|
1422
|
+
) -> None:
|
1423
|
+
logger.info("saving file %s with policy %s", fname, policy)
|
1424
|
+
if self._dir_watcher:
|
1425
|
+
self._dir_watcher.update_policy(fname, policy)
|
1426
|
+
|
1427
|
+
def send_files(self, record: "Record") -> None:
|
1428
|
+
files = record.files
|
1429
|
+
for k in files.files:
|
1430
|
+
# TODO(jhr): fix paths with directories
|
1431
|
+
self._save_file(
|
1432
|
+
interface.GlobStr(k.path), interface.file_enum_to_policy(k.policy)
|
1433
|
+
)
|
1434
|
+
|
1435
|
+
def send_header(self, record: "Record") -> None:
|
1436
|
+
pass
|
1437
|
+
|
1438
|
+
def send_footer(self, record: "Record") -> None:
|
1439
|
+
pass
|
1440
|
+
|
1441
|
+
def send_tbrecord(self, record: "Record") -> None:
|
1442
|
+
# tbrecord watching threads are handled by handler.py
|
1443
|
+
pass
|
1444
|
+
|
1445
|
+
def send_request_link_artifact(self, record: "Record") -> None:
|
1446
|
+
if not (record.control.req_resp or record.control.mailbox_slot):
|
1447
|
+
raise ValueError(
|
1448
|
+
f"Expected either `req_resp` or `mailbox_slot`, got: {record.control!r}"
|
1449
|
+
)
|
1450
|
+
result = proto_util._result_from_record(record)
|
1451
|
+
link = record.request.link_artifact
|
1452
|
+
client_id = link.client_id
|
1453
|
+
server_id = link.server_id
|
1454
|
+
portfolio_name = link.portfolio_name
|
1455
|
+
entity = link.portfolio_entity
|
1456
|
+
project = link.portfolio_project
|
1457
|
+
aliases = link.portfolio_aliases
|
1458
|
+
logger.debug(
|
1459
|
+
f"link_artifact params - client_id={client_id}, server_id={server_id}, pfolio={portfolio_name}, entity={entity}, project={project}"
|
1460
|
+
)
|
1461
|
+
if (client_id or server_id) and portfolio_name and entity and project:
|
1462
|
+
try:
|
1463
|
+
self._api.link_artifact(
|
1464
|
+
client_id, server_id, portfolio_name, entity, project, aliases
|
1465
|
+
)
|
1466
|
+
except Exception as e:
|
1467
|
+
result.response.log_artifact_response.error_message = f'error linking artifact to "{entity}/{project}/{portfolio_name}"; error: {e}'
|
1468
|
+
logger.warning("Failed to link artifact to portfolio: %s", e)
|
1469
|
+
self._respond_result(result)
|
1470
|
+
|
1471
|
+
def send_use_artifact(self, record: "Record") -> None:
|
1472
|
+
"""Pretend to send a used artifact.
|
1473
|
+
|
1474
|
+
This function doesn't actually send anything, it is just used internally.
|
1475
|
+
"""
|
1476
|
+
use = record.use_artifact
|
1477
|
+
|
1478
|
+
if use.type == "job" and not use.partial.job_name:
|
1479
|
+
self._job_builder.disable = True
|
1480
|
+
elif use.partial.job_name:
|
1481
|
+
# job is partial, let job builder rebuild job, set job source dict
|
1482
|
+
self._job_builder.set_partial_source_id(use.id)
|
1483
|
+
|
1484
|
+
def send_request_log_artifact(self, record: "Record") -> None:
|
1485
|
+
assert record.control.req_resp
|
1486
|
+
result = proto_util._result_from_record(record)
|
1487
|
+
artifact = record.request.log_artifact.artifact
|
1488
|
+
history_step = record.request.log_artifact.history_step
|
1489
|
+
|
1490
|
+
try:
|
1491
|
+
res = self._send_artifact(artifact, history_step)
|
1492
|
+
assert res, "Unable to send artifact"
|
1493
|
+
result.response.log_artifact_response.artifact_id = res["id"]
|
1494
|
+
logger.info(f"logged artifact {artifact.name} - {res}")
|
1495
|
+
except Exception as e:
|
1496
|
+
result.response.log_artifact_response.error_message = (
|
1497
|
+
f'error logging artifact "{artifact.type}/{artifact.name}": {e}'
|
1498
|
+
)
|
1499
|
+
|
1500
|
+
self._respond_result(result)
|
1501
|
+
|
1502
|
+
def send_artifact(self, record: "Record") -> None:
|
1503
|
+
artifact = record.artifact
|
1504
|
+
try:
|
1505
|
+
res = self._send_artifact(artifact)
|
1506
|
+
logger.info(f"sent artifact {artifact.name} - {res}")
|
1507
|
+
except Exception as e:
|
1508
|
+
logger.error(
|
1509
|
+
'send_artifact: failed for artifact "{}/{}": {}'.format(
|
1510
|
+
artifact.type, artifact.name, e
|
1511
|
+
)
|
1512
|
+
)
|
1513
|
+
|
1514
|
+
def _send_artifact(
|
1515
|
+
self, artifact: "ArtifactRecord", history_step: Optional[int] = None
|
1516
|
+
) -> Optional[Dict]:
|
1517
|
+
from wandb.util import parse_version
|
1518
|
+
|
1519
|
+
assert self._pusher
|
1520
|
+
saver = ArtifactSaver(
|
1521
|
+
api=self._api,
|
1522
|
+
digest=artifact.digest,
|
1523
|
+
manifest_json=_manifest_json_from_proto(artifact.manifest),
|
1524
|
+
file_pusher=self._pusher,
|
1525
|
+
is_user_created=artifact.user_created,
|
1526
|
+
)
|
1527
|
+
|
1528
|
+
if artifact.distributed_id:
|
1529
|
+
max_cli_version = self._max_cli_version()
|
1530
|
+
if max_cli_version is None or parse_version(
|
1531
|
+
max_cli_version
|
1532
|
+
) < parse_version("0.10.16"):
|
1533
|
+
logger.warning(
|
1534
|
+
"This W&B Server doesn't support distributed artifacts, "
|
1535
|
+
"have your administrator install wandb/local >= 0.9.37"
|
1536
|
+
)
|
1537
|
+
return None
|
1538
|
+
|
1539
|
+
metadata = json.loads(artifact.metadata) if artifact.metadata else None
|
1540
|
+
res = saver.save(
|
1541
|
+
type=artifact.type,
|
1542
|
+
name=artifact.name,
|
1543
|
+
client_id=artifact.client_id,
|
1544
|
+
sequence_client_id=artifact.sequence_client_id,
|
1545
|
+
metadata=metadata,
|
1546
|
+
ttl_duration_seconds=artifact.ttl_duration_seconds or None,
|
1547
|
+
description=artifact.description or None,
|
1548
|
+
aliases=artifact.aliases,
|
1549
|
+
tags=artifact.tags,
|
1550
|
+
use_after_commit=artifact.use_after_commit,
|
1551
|
+
distributed_id=artifact.distributed_id,
|
1552
|
+
finalize=artifact.finalize,
|
1553
|
+
incremental=artifact.incremental_beta1,
|
1554
|
+
history_step=history_step,
|
1555
|
+
base_id=artifact.base_id or None,
|
1556
|
+
)
|
1557
|
+
|
1558
|
+
self._job_builder._handle_server_artifact(res, artifact)
|
1559
|
+
|
1560
|
+
if artifact.manifest.manifest_file_path:
|
1561
|
+
with contextlib.suppress(FileNotFoundError):
|
1562
|
+
os.remove(artifact.manifest.manifest_file_path)
|
1563
|
+
return res
|
1564
|
+
|
1565
|
+
def send_alert(self, record: "Record") -> None:
|
1566
|
+
from wandb.util import parse_version
|
1567
|
+
|
1568
|
+
alert = record.alert
|
1569
|
+
max_cli_version = self._max_cli_version()
|
1570
|
+
if max_cli_version is None or parse_version(max_cli_version) < parse_version(
|
1571
|
+
"0.10.9"
|
1572
|
+
):
|
1573
|
+
logger.warning(
|
1574
|
+
"This W&B server doesn't support alerts, "
|
1575
|
+
"have your administrator install wandb/local >= 0.9.31"
|
1576
|
+
)
|
1577
|
+
else:
|
1578
|
+
try:
|
1579
|
+
self._api.notify_scriptable_run_alert(
|
1580
|
+
title=alert.title,
|
1581
|
+
text=alert.text,
|
1582
|
+
level=alert.level,
|
1583
|
+
wait_duration=alert.wait_duration,
|
1584
|
+
)
|
1585
|
+
except Exception as e:
|
1586
|
+
logger.error(f"send_alert: failed for alert {alert.title!r}: {e}")
|
1587
|
+
|
1588
|
+
def finish(self) -> None:
|
1589
|
+
logger.info("shutting down sender")
|
1590
|
+
# if self._tb_watcher:
|
1591
|
+
# self._tb_watcher.finish()
|
1592
|
+
self._output_raw_finish()
|
1593
|
+
if self._dir_watcher:
|
1594
|
+
self._dir_watcher.finish()
|
1595
|
+
self._dir_watcher = None
|
1596
|
+
if self._pusher:
|
1597
|
+
self._pusher.finish()
|
1598
|
+
self._pusher.join()
|
1599
|
+
self._pusher = None
|
1600
|
+
if self._fs:
|
1601
|
+
self._fs.finish(self._exit_code)
|
1602
|
+
self._fs = None
|
1603
|
+
wandb._sentry.end_session()
|
1604
|
+
|
1605
|
+
def _max_cli_version(self) -> Optional[str]:
|
1606
|
+
server_info = self.get_server_info()
|
1607
|
+
max_cli_version = server_info.get("cliVersionInfo", {}).get(
|
1608
|
+
"max_cli_version", None
|
1609
|
+
)
|
1610
|
+
if not isinstance(max_cli_version, str):
|
1611
|
+
return None
|
1612
|
+
return max_cli_version
|
1613
|
+
|
1614
|
+
def get_viewer_server_info(self) -> None:
|
1615
|
+
if self._cached_server_info and self._cached_viewer:
|
1616
|
+
return
|
1617
|
+
self._cached_viewer, self._cached_server_info = self._api.viewer_server_info()
|
1618
|
+
|
1619
|
+
def get_viewer_info(self) -> Dict[str, Any]:
|
1620
|
+
if not self._cached_viewer:
|
1621
|
+
self.get_viewer_server_info()
|
1622
|
+
return self._cached_viewer
|
1623
|
+
|
1624
|
+
def get_server_info(self) -> Dict[str, Any]:
|
1625
|
+
if not self._cached_server_info:
|
1626
|
+
self.get_viewer_server_info()
|
1627
|
+
return self._cached_server_info
|
1628
|
+
|
1629
|
+
def get_local_info(self) -> "LocalInfo":
|
1630
|
+
"""Queries the server to get the local version information.
|
1631
|
+
|
1632
|
+
First, we perform an introspection, if it returns empty we deduce that the
|
1633
|
+
docker image is out-of-date. Otherwise, we use the returned values to deduce the
|
1634
|
+
state of the local server.
|
1635
|
+
"""
|
1636
|
+
local_info = wandb_internal_pb2.LocalInfo()
|
1637
|
+
if self._settings._offline:
|
1638
|
+
local_info.out_of_date = False
|
1639
|
+
return local_info
|
1640
|
+
|
1641
|
+
latest_local_version = "latest"
|
1642
|
+
|
1643
|
+
# Assuming the query is successful if the result is empty it indicates that
|
1644
|
+
# the backend is out of date since it doesn't have the desired field
|
1645
|
+
server_info = self.get_server_info()
|
1646
|
+
latest_local_version_info = server_info.get("latestLocalVersionInfo", {})
|
1647
|
+
if latest_local_version_info is None:
|
1648
|
+
local_info.out_of_date = False
|
1649
|
+
else:
|
1650
|
+
local_info.out_of_date = latest_local_version_info.get("outOfDate", True)
|
1651
|
+
local_info.version = latest_local_version_info.get(
|
1652
|
+
"latestVersionString", latest_local_version
|
1653
|
+
)
|
1654
|
+
return local_info
|
1655
|
+
|
1656
|
+
def _flush_job(self) -> None:
|
1657
|
+
if self._job_builder.disable or self._settings._offline:
|
1658
|
+
return
|
1659
|
+
self._job_builder.set_config(self._consolidated_config.non_internal_config())
|
1660
|
+
summary_dict = self._cached_summary.copy()
|
1661
|
+
summary_dict.pop("_wandb", None)
|
1662
|
+
self._job_builder.set_summary(summary_dict)
|
1663
|
+
|
1664
|
+
artifact = self._job_builder.build(api=self._api)
|
1665
|
+
if artifact is not None and self._run is not None:
|
1666
|
+
proto_artifact = self._interface._make_artifact(artifact)
|
1667
|
+
proto_artifact.run_id = self._run.run_id
|
1668
|
+
proto_artifact.project = self._run.project
|
1669
|
+
proto_artifact.entity = self._run.entity
|
1670
|
+
# TODO: this should be removed when the latest tag is handled
|
1671
|
+
# by the backend (WB-12116)
|
1672
|
+
proto_artifact.aliases.append("latest")
|
1673
|
+
# add docker image tag
|
1674
|
+
for alias in self._job_builder._aliases:
|
1675
|
+
proto_artifact.aliases.append(alias)
|
1676
|
+
|
1677
|
+
proto_artifact.user_created = True
|
1678
|
+
proto_artifact.use_after_commit = True
|
1679
|
+
proto_artifact.finalize = True
|
1680
|
+
|
1681
|
+
self._interface._publish_artifact(proto_artifact)
|
1682
|
+
|
1683
|
+
def __next__(self) -> "Record":
|
1684
|
+
return self._record_q.get(block=True)
|
1685
|
+
|
1686
|
+
next = __next__
|