wandb 0.19.1__py3-none-musllinux_1_2_aarch64.whl
Sign up to get free protection for your applications and to get access to all the features.
- package_readme.md +97 -0
- wandb/__init__.py +246 -0
- wandb/__init__.pyi +1197 -0
- wandb/__main__.py +3 -0
- wandb/_globals.py +19 -0
- wandb/agents/__init__.py +0 -0
- wandb/agents/pyagent.py +363 -0
- wandb/analytics/__init__.py +3 -0
- wandb/analytics/sentry.py +263 -0
- wandb/apis/__init__.py +48 -0
- wandb/apis/attrs.py +51 -0
- wandb/apis/importers/__init__.py +1 -0
- wandb/apis/importers/internals/internal.py +385 -0
- wandb/apis/importers/internals/protocols.py +103 -0
- wandb/apis/importers/internals/util.py +78 -0
- wandb/apis/importers/mlflow.py +254 -0
- wandb/apis/importers/validation.py +108 -0
- wandb/apis/importers/wandb.py +1603 -0
- wandb/apis/internal.py +232 -0
- wandb/apis/normalize.py +73 -0
- wandb/apis/paginator.py +81 -0
- wandb/apis/public/__init__.py +34 -0
- wandb/apis/public/api.py +1387 -0
- wandb/apis/public/artifacts.py +1095 -0
- wandb/apis/public/const.py +4 -0
- wandb/apis/public/files.py +263 -0
- wandb/apis/public/history.py +149 -0
- wandb/apis/public/jobs.py +653 -0
- wandb/apis/public/projects.py +154 -0
- wandb/apis/public/query_generator.py +166 -0
- wandb/apis/public/reports.py +458 -0
- wandb/apis/public/runs.py +1012 -0
- wandb/apis/public/sweeps.py +240 -0
- wandb/apis/public/teams.py +198 -0
- wandb/apis/public/users.py +136 -0
- wandb/apis/public/utils.py +68 -0
- wandb/apis/reports/__init__.py +1 -0
- wandb/apis/reports/v1/__init__.py +8 -0
- wandb/apis/reports/v2/__init__.py +8 -0
- wandb/apis/workspaces/__init__.py +8 -0
- wandb/beta/workflows.py +288 -0
- wandb/bin/gpu_stats +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/__init__.py +0 -0
- wandb/cli/beta.py +178 -0
- wandb/cli/cli.py +2812 -0
- wandb/data_types.py +66 -0
- wandb/docker/__init__.py +343 -0
- wandb/docker/auth.py +435 -0
- wandb/docker/wandb-entrypoint.sh +33 -0
- wandb/docker/www_authenticate.py +94 -0
- wandb/env.py +513 -0
- wandb/errors/__init__.py +17 -0
- wandb/errors/errors.py +37 -0
- wandb/errors/links.py +73 -0
- wandb/errors/term.py +415 -0
- wandb/errors/util.py +57 -0
- wandb/errors/warnings.py +2 -0
- wandb/filesync/__init__.py +0 -0
- wandb/filesync/dir_watcher.py +403 -0
- wandb/filesync/stats.py +100 -0
- wandb/filesync/step_checksum.py +142 -0
- wandb/filesync/step_prepare.py +179 -0
- wandb/filesync/step_upload.py +287 -0
- wandb/filesync/upload_job.py +142 -0
- wandb/integration/__init__.py +0 -0
- wandb/integration/catboost/__init__.py +5 -0
- wandb/integration/catboost/catboost.py +178 -0
- wandb/integration/cohere/__init__.py +3 -0
- wandb/integration/cohere/cohere.py +21 -0
- wandb/integration/cohere/resolver.py +347 -0
- wandb/integration/diffusers/__init__.py +3 -0
- wandb/integration/diffusers/autologger.py +76 -0
- wandb/integration/diffusers/pipeline_resolver.py +50 -0
- wandb/integration/diffusers/resolvers/__init__.py +9 -0
- wandb/integration/diffusers/resolvers/multimodal.py +882 -0
- wandb/integration/diffusers/resolvers/utils.py +102 -0
- wandb/integration/fastai/__init__.py +245 -0
- wandb/integration/gym/__init__.py +99 -0
- wandb/integration/huggingface/__init__.py +3 -0
- wandb/integration/huggingface/huggingface.py +18 -0
- wandb/integration/huggingface/resolver.py +213 -0
- wandb/integration/keras/__init__.py +11 -0
- wandb/integration/keras/callbacks/__init__.py +5 -0
- wandb/integration/keras/callbacks/metrics_logger.py +129 -0
- wandb/integration/keras/callbacks/model_checkpoint.py +188 -0
- wandb/integration/keras/callbacks/tables_builder.py +228 -0
- wandb/integration/keras/keras.py +1089 -0
- wandb/integration/kfp/__init__.py +6 -0
- wandb/integration/kfp/helpers.py +28 -0
- wandb/integration/kfp/kfp_patch.py +334 -0
- wandb/integration/kfp/wandb_logging.py +182 -0
- wandb/integration/langchain/__init__.py +3 -0
- wandb/integration/langchain/wandb_tracer.py +48 -0
- wandb/integration/lightgbm/__init__.py +239 -0
- wandb/integration/lightning/__init__.py +0 -0
- wandb/integration/lightning/fabric/__init__.py +3 -0
- wandb/integration/lightning/fabric/logger.py +764 -0
- wandb/integration/metaflow/__init__.py +3 -0
- wandb/integration/metaflow/metaflow.py +383 -0
- wandb/integration/openai/__init__.py +3 -0
- wandb/integration/openai/fine_tuning.py +480 -0
- wandb/integration/openai/openai.py +22 -0
- wandb/integration/openai/resolver.py +240 -0
- wandb/integration/prodigy/__init__.py +3 -0
- wandb/integration/prodigy/prodigy.py +299 -0
- wandb/integration/sacred/__init__.py +117 -0
- wandb/integration/sagemaker/__init__.py +12 -0
- wandb/integration/sagemaker/auth.py +28 -0
- wandb/integration/sagemaker/config.py +49 -0
- wandb/integration/sagemaker/files.py +3 -0
- wandb/integration/sagemaker/resources.py +34 -0
- wandb/integration/sb3/__init__.py +3 -0
- wandb/integration/sb3/sb3.py +147 -0
- wandb/integration/sklearn/__init__.py +37 -0
- wandb/integration/sklearn/calculate/__init__.py +32 -0
- wandb/integration/sklearn/calculate/calibration_curves.py +125 -0
- wandb/integration/sklearn/calculate/class_proportions.py +68 -0
- wandb/integration/sklearn/calculate/confusion_matrix.py +93 -0
- wandb/integration/sklearn/calculate/decision_boundaries.py +40 -0
- wandb/integration/sklearn/calculate/elbow_curve.py +55 -0
- wandb/integration/sklearn/calculate/feature_importances.py +67 -0
- wandb/integration/sklearn/calculate/learning_curve.py +64 -0
- wandb/integration/sklearn/calculate/outlier_candidates.py +69 -0
- wandb/integration/sklearn/calculate/residuals.py +86 -0
- wandb/integration/sklearn/calculate/silhouette.py +118 -0
- wandb/integration/sklearn/calculate/summary_metrics.py +62 -0
- wandb/integration/sklearn/plot/__init__.py +35 -0
- wandb/integration/sklearn/plot/classifier.py +329 -0
- wandb/integration/sklearn/plot/clusterer.py +146 -0
- wandb/integration/sklearn/plot/regressor.py +121 -0
- wandb/integration/sklearn/plot/shared.py +91 -0
- wandb/integration/sklearn/utils.py +183 -0
- wandb/integration/tensorboard/__init__.py +10 -0
- wandb/integration/tensorboard/log.py +354 -0
- wandb/integration/tensorboard/monkeypatch.py +186 -0
- wandb/integration/tensorflow/__init__.py +5 -0
- wandb/integration/tensorflow/estimator_hook.py +54 -0
- wandb/integration/torch/__init__.py +0 -0
- wandb/integration/torch/wandb_torch.py +554 -0
- wandb/integration/ultralytics/__init__.py +11 -0
- wandb/integration/ultralytics/bbox_utils.py +215 -0
- wandb/integration/ultralytics/callback.py +524 -0
- wandb/integration/ultralytics/classification_utils.py +83 -0
- wandb/integration/ultralytics/mask_utils.py +202 -0
- wandb/integration/ultralytics/pose_utils.py +103 -0
- wandb/integration/xgboost/__init__.py +11 -0
- wandb/integration/xgboost/xgboost.py +189 -0
- wandb/integration/yolov8/__init__.py +0 -0
- wandb/integration/yolov8/yolov8.py +284 -0
- wandb/jupyter.py +513 -0
- wandb/mpmain/__init__.py +0 -0
- wandb/mpmain/__main__.py +1 -0
- wandb/old/__init__.py +0 -0
- wandb/old/core.py +53 -0
- wandb/old/settings.py +173 -0
- wandb/old/summary.py +440 -0
- wandb/plot/__init__.py +28 -0
- wandb/plot/bar.py +70 -0
- wandb/plot/confusion_matrix.py +181 -0
- wandb/plot/custom_chart.py +124 -0
- wandb/plot/histogram.py +65 -0
- wandb/plot/line.py +74 -0
- wandb/plot/line_series.py +176 -0
- wandb/plot/pr_curve.py +185 -0
- wandb/plot/roc_curve.py +163 -0
- wandb/plot/scatter.py +66 -0
- wandb/plot/utils.py +183 -0
- wandb/plot/viz.py +41 -0
- wandb/proto/__init__.py +0 -0
- wandb/proto/v3/__init__.py +0 -0
- wandb/proto/v3/wandb_base_pb2.py +55 -0
- wandb/proto/v3/wandb_internal_pb2.py +1658 -0
- wandb/proto/v3/wandb_server_pb2.py +228 -0
- wandb/proto/v3/wandb_settings_pb2.py +122 -0
- wandb/proto/v3/wandb_telemetry_pb2.py +106 -0
- wandb/proto/v4/__init__.py +0 -0
- wandb/proto/v4/wandb_base_pb2.py +30 -0
- wandb/proto/v4/wandb_internal_pb2.py +370 -0
- wandb/proto/v4/wandb_server_pb2.py +67 -0
- wandb/proto/v4/wandb_settings_pb2.py +47 -0
- wandb/proto/v4/wandb_telemetry_pb2.py +41 -0
- wandb/proto/v5/wandb_base_pb2.py +31 -0
- wandb/proto/v5/wandb_internal_pb2.py +371 -0
- wandb/proto/v5/wandb_server_pb2.py +68 -0
- wandb/proto/v5/wandb_settings_pb2.py +48 -0
- wandb/proto/v5/wandb_telemetry_pb2.py +42 -0
- wandb/proto/wandb_base_pb2.py +10 -0
- wandb/proto/wandb_deprecated.py +45 -0
- wandb/proto/wandb_generate_deprecated.py +30 -0
- wandb/proto/wandb_generate_proto.py +49 -0
- wandb/proto/wandb_internal_pb2.py +16 -0
- wandb/proto/wandb_server_pb2.py +10 -0
- wandb/proto/wandb_settings_pb2.py +10 -0
- wandb/proto/wandb_telemetry_pb2.py +10 -0
- wandb/py.typed +0 -0
- wandb/sdk/__init__.py +37 -0
- wandb/sdk/artifacts/__init__.py +0 -0
- wandb/sdk/artifacts/_validators.py +121 -0
- wandb/sdk/artifacts/artifact.py +2364 -0
- wandb/sdk/artifacts/artifact_download_logger.py +43 -0
- wandb/sdk/artifacts/artifact_file_cache.py +249 -0
- wandb/sdk/artifacts/artifact_instance_cache.py +17 -0
- wandb/sdk/artifacts/artifact_manifest.py +75 -0
- wandb/sdk/artifacts/artifact_manifest_entry.py +249 -0
- wandb/sdk/artifacts/artifact_manifests/__init__.py +0 -0
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +92 -0
- wandb/sdk/artifacts/artifact_saver.py +265 -0
- wandb/sdk/artifacts/artifact_state.py +11 -0
- wandb/sdk/artifacts/artifact_ttl.py +7 -0
- wandb/sdk/artifacts/exceptions.py +57 -0
- wandb/sdk/artifacts/staging.py +25 -0
- wandb/sdk/artifacts/storage_handler.py +62 -0
- wandb/sdk/artifacts/storage_handlers/__init__.py +0 -0
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +213 -0
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +224 -0
- wandb/sdk/artifacts/storage_handlers/http_handler.py +114 -0
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +139 -0
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +56 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +298 -0
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +72 -0
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +135 -0
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +74 -0
- wandb/sdk/artifacts/storage_layout.py +6 -0
- wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
- wandb/sdk/artifacts/storage_policies/register.py +1 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +378 -0
- wandb/sdk/artifacts/storage_policy.py +72 -0
- wandb/sdk/backend/__init__.py +0 -0
- wandb/sdk/backend/backend.py +221 -0
- wandb/sdk/data_types/__init__.py +0 -0
- wandb/sdk/data_types/_dtypes.py +918 -0
- wandb/sdk/data_types/_private.py +10 -0
- wandb/sdk/data_types/audio.py +165 -0
- wandb/sdk/data_types/base_types/__init__.py +0 -0
- wandb/sdk/data_types/base_types/json_metadata.py +55 -0
- wandb/sdk/data_types/base_types/media.py +376 -0
- wandb/sdk/data_types/base_types/wb_value.py +282 -0
- wandb/sdk/data_types/bokeh.py +70 -0
- wandb/sdk/data_types/graph.py +405 -0
- wandb/sdk/data_types/helper_types/__init__.py +0 -0
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +305 -0
- wandb/sdk/data_types/helper_types/classes.py +159 -0
- wandb/sdk/data_types/helper_types/image_mask.py +241 -0
- wandb/sdk/data_types/histogram.py +94 -0
- wandb/sdk/data_types/html.py +115 -0
- wandb/sdk/data_types/image.py +847 -0
- wandb/sdk/data_types/molecule.py +241 -0
- wandb/sdk/data_types/object_3d.py +470 -0
- wandb/sdk/data_types/plotly.py +82 -0
- wandb/sdk/data_types/saved_model.py +445 -0
- wandb/sdk/data_types/table.py +1204 -0
- wandb/sdk/data_types/trace_tree.py +438 -0
- wandb/sdk/data_types/utils.py +228 -0
- wandb/sdk/data_types/video.py +268 -0
- wandb/sdk/integration_utils/__init__.py +0 -0
- wandb/sdk/integration_utils/auto_logging.py +232 -0
- wandb/sdk/integration_utils/data_logging.py +475 -0
- wandb/sdk/interface/__init__.py +0 -0
- wandb/sdk/interface/constants.py +4 -0
- wandb/sdk/interface/interface.py +1010 -0
- wandb/sdk/interface/interface_queue.py +53 -0
- wandb/sdk/interface/interface_relay.py +53 -0
- wandb/sdk/interface/interface_shared.py +546 -0
- wandb/sdk/interface/interface_sock.py +61 -0
- wandb/sdk/interface/message_future.py +27 -0
- wandb/sdk/interface/message_future_poll.py +50 -0
- wandb/sdk/interface/router.py +115 -0
- wandb/sdk/interface/router_queue.py +41 -0
- wandb/sdk/interface/router_relay.py +37 -0
- wandb/sdk/interface/router_sock.py +36 -0
- wandb/sdk/interface/summary_record.py +67 -0
- wandb/sdk/internal/__init__.py +0 -0
- wandb/sdk/internal/context.py +89 -0
- wandb/sdk/internal/datastore.py +297 -0
- wandb/sdk/internal/file_pusher.py +181 -0
- wandb/sdk/internal/file_stream.py +695 -0
- wandb/sdk/internal/flow_control.py +263 -0
- wandb/sdk/internal/handler.py +905 -0
- wandb/sdk/internal/internal.py +403 -0
- wandb/sdk/internal/internal_api.py +4587 -0
- wandb/sdk/internal/internal_util.py +97 -0
- wandb/sdk/internal/job_builder.py +638 -0
- wandb/sdk/internal/profiler.py +78 -0
- wandb/sdk/internal/progress.py +79 -0
- wandb/sdk/internal/run.py +25 -0
- wandb/sdk/internal/sample.py +70 -0
- wandb/sdk/internal/sender.py +1696 -0
- wandb/sdk/internal/sender_config.py +197 -0
- wandb/sdk/internal/settings_static.py +97 -0
- wandb/sdk/internal/system/__init__.py +0 -0
- wandb/sdk/internal/system/assets/__init__.py +25 -0
- wandb/sdk/internal/system/assets/aggregators.py +37 -0
- wandb/sdk/internal/system/assets/asset_registry.py +20 -0
- wandb/sdk/internal/system/assets/cpu.py +163 -0
- wandb/sdk/internal/system/assets/disk.py +210 -0
- wandb/sdk/internal/system/assets/gpu.py +416 -0
- wandb/sdk/internal/system/assets/gpu_amd.py +233 -0
- wandb/sdk/internal/system/assets/interfaces.py +205 -0
- wandb/sdk/internal/system/assets/ipu.py +177 -0
- wandb/sdk/internal/system/assets/memory.py +166 -0
- wandb/sdk/internal/system/assets/network.py +125 -0
- wandb/sdk/internal/system/assets/open_metrics.py +293 -0
- wandb/sdk/internal/system/assets/tpu.py +154 -0
- wandb/sdk/internal/system/assets/trainium.py +393 -0
- wandb/sdk/internal/system/env_probe_helpers.py +13 -0
- wandb/sdk/internal/system/system_info.py +250 -0
- wandb/sdk/internal/system/system_monitor.py +222 -0
- wandb/sdk/internal/tb_watcher.py +519 -0
- wandb/sdk/internal/thread_local_settings.py +18 -0
- wandb/sdk/internal/writer.py +204 -0
- wandb/sdk/launch/__init__.py +15 -0
- wandb/sdk/launch/_launch.py +331 -0
- wandb/sdk/launch/_launch_add.py +255 -0
- wandb/sdk/launch/_project_spec.py +566 -0
- wandb/sdk/launch/agent/__init__.py +5 -0
- wandb/sdk/launch/agent/agent.py +924 -0
- wandb/sdk/launch/agent/config.py +296 -0
- wandb/sdk/launch/agent/job_status_tracker.py +53 -0
- wandb/sdk/launch/agent/run_queue_item_file_saver.py +39 -0
- wandb/sdk/launch/builder/__init__.py +0 -0
- wandb/sdk/launch/builder/abstract.py +156 -0
- wandb/sdk/launch/builder/build.py +297 -0
- wandb/sdk/launch/builder/context_manager.py +235 -0
- wandb/sdk/launch/builder/docker_builder.py +177 -0
- wandb/sdk/launch/builder/kaniko_builder.py +594 -0
- wandb/sdk/launch/builder/noop.py +58 -0
- wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +188 -0
- wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
- wandb/sdk/launch/create_job.py +528 -0
- wandb/sdk/launch/environment/abstract.py +29 -0
- wandb/sdk/launch/environment/aws_environment.py +322 -0
- wandb/sdk/launch/environment/azure_environment.py +105 -0
- wandb/sdk/launch/environment/gcp_environment.py +335 -0
- wandb/sdk/launch/environment/local_environment.py +65 -0
- wandb/sdk/launch/errors.py +13 -0
- wandb/sdk/launch/git_reference.py +109 -0
- wandb/sdk/launch/inputs/files.py +148 -0
- wandb/sdk/launch/inputs/internal.py +315 -0
- wandb/sdk/launch/inputs/manage.py +113 -0
- wandb/sdk/launch/inputs/schema.py +39 -0
- wandb/sdk/launch/loader.py +249 -0
- wandb/sdk/launch/registry/abstract.py +48 -0
- wandb/sdk/launch/registry/anon.py +29 -0
- wandb/sdk/launch/registry/azure_container_registry.py +124 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +192 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +219 -0
- wandb/sdk/launch/registry/local_registry.py +65 -0
- wandb/sdk/launch/runner/__init__.py +0 -0
- wandb/sdk/launch/runner/abstract.py +185 -0
- wandb/sdk/launch/runner/kubernetes_monitor.py +472 -0
- wandb/sdk/launch/runner/kubernetes_runner.py +963 -0
- wandb/sdk/launch/runner/local_container.py +301 -0
- wandb/sdk/launch/runner/local_process.py +78 -0
- wandb/sdk/launch/runner/sagemaker_runner.py +426 -0
- wandb/sdk/launch/runner/vertex_runner.py +230 -0
- wandb/sdk/launch/sweeps/__init__.py +37 -0
- wandb/sdk/launch/sweeps/scheduler.py +740 -0
- wandb/sdk/launch/sweeps/scheduler_sweep.py +90 -0
- wandb/sdk/launch/sweeps/utils.py +316 -0
- wandb/sdk/launch/utils.py +747 -0
- wandb/sdk/launch/wandb_reference.py +138 -0
- wandb/sdk/lib/__init__.py +5 -0
- wandb/sdk/lib/apikey.py +269 -0
- wandb/sdk/lib/capped_dict.py +26 -0
- wandb/sdk/lib/config_util.py +101 -0
- wandb/sdk/lib/credentials.py +141 -0
- wandb/sdk/lib/deprecate.py +42 -0
- wandb/sdk/lib/disabled.py +29 -0
- wandb/sdk/lib/exit_hooks.py +54 -0
- wandb/sdk/lib/file_stream_utils.py +118 -0
- wandb/sdk/lib/filenames.py +64 -0
- wandb/sdk/lib/filesystem.py +372 -0
- wandb/sdk/lib/fsm.py +180 -0
- wandb/sdk/lib/gitlib.py +239 -0
- wandb/sdk/lib/gql_request.py +65 -0
- wandb/sdk/lib/handler_util.py +21 -0
- wandb/sdk/lib/hashutil.py +84 -0
- wandb/sdk/lib/import_hooks.py +275 -0
- wandb/sdk/lib/ipython.py +126 -0
- wandb/sdk/lib/json_util.py +80 -0
- wandb/sdk/lib/lazyloader.py +63 -0
- wandb/sdk/lib/mailbox.py +456 -0
- wandb/sdk/lib/module.py +78 -0
- wandb/sdk/lib/paths.py +106 -0
- wandb/sdk/lib/preinit.py +42 -0
- wandb/sdk/lib/printer.py +548 -0
- wandb/sdk/lib/progress.py +279 -0
- wandb/sdk/lib/proto_util.py +90 -0
- wandb/sdk/lib/redirect.py +845 -0
- wandb/sdk/lib/retry.py +289 -0
- wandb/sdk/lib/run_moment.py +72 -0
- wandb/sdk/lib/runid.py +12 -0
- wandb/sdk/lib/server.py +38 -0
- wandb/sdk/lib/service_connection.py +216 -0
- wandb/sdk/lib/service_token.py +94 -0
- wandb/sdk/lib/sock_client.py +290 -0
- wandb/sdk/lib/sparkline.py +44 -0
- wandb/sdk/lib/telemetry.py +100 -0
- wandb/sdk/lib/timed_input.py +133 -0
- wandb/sdk/lib/timer.py +19 -0
- wandb/sdk/service/__init__.py +0 -0
- wandb/sdk/service/_startup_debug.py +22 -0
- wandb/sdk/service/port_file.py +53 -0
- wandb/sdk/service/server.py +107 -0
- wandb/sdk/service/server_sock.py +274 -0
- wandb/sdk/service/service.py +242 -0
- wandb/sdk/service/streams.py +425 -0
- wandb/sdk/verify/__init__.py +0 -0
- wandb/sdk/verify/verify.py +501 -0
- wandb/sdk/wandb_alerts.py +12 -0
- wandb/sdk/wandb_config.py +322 -0
- wandb/sdk/wandb_helper.py +54 -0
- wandb/sdk/wandb_init.py +1313 -0
- wandb/sdk/wandb_login.py +339 -0
- wandb/sdk/wandb_metric.py +110 -0
- wandb/sdk/wandb_require.py +94 -0
- wandb/sdk/wandb_require_helpers.py +44 -0
- wandb/sdk/wandb_run.py +4066 -0
- wandb/sdk/wandb_settings.py +1309 -0
- wandb/sdk/wandb_setup.py +402 -0
- wandb/sdk/wandb_summary.py +150 -0
- wandb/sdk/wandb_sweep.py +119 -0
- wandb/sdk/wandb_sync.py +82 -0
- wandb/sdk/wandb_watch.py +150 -0
- wandb/sklearn.py +35 -0
- wandb/sync/__init__.py +3 -0
- wandb/sync/sync.py +442 -0
- wandb/trigger.py +29 -0
- wandb/util.py +1955 -0
- wandb/vendor/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/setup.py +40 -0
- wandb/vendor/gql-0.2.0/tests/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/tests/starwars/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/tests/starwars/fixtures.py +96 -0
- wandb/vendor/gql-0.2.0/tests/starwars/schema.py +146 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_dsl.py +293 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_query.py +355 -0
- wandb/vendor/gql-0.2.0/tests/starwars/test_validation.py +171 -0
- wandb/vendor/gql-0.2.0/tests/test_client.py +31 -0
- wandb/vendor/gql-0.2.0/tests/test_transport.py +89 -0
- wandb/vendor/gql-0.2.0/wandb_gql/__init__.py +4 -0
- wandb/vendor/gql-0.2.0/wandb_gql/client.py +75 -0
- wandb/vendor/gql-0.2.0/wandb_gql/dsl.py +152 -0
- wandb/vendor/gql-0.2.0/wandb_gql/gql.py +10 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/__init__.py +0 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/http.py +6 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/local_schema.py +15 -0
- wandb/vendor/gql-0.2.0/wandb_gql/transport/requests.py +46 -0
- wandb/vendor/gql-0.2.0/wandb_gql/utils.py +21 -0
- wandb/vendor/graphql-core-1.1/setup.py +86 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/__init__.py +287 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/__init__.py +6 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/base.py +42 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/format_error.py +11 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/located_error.py +29 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/error/syntax_error.py +36 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/__init__.py +26 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/base.py +311 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executor.py +398 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/asyncio.py +53 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/gevent.py +22 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/process.py +32 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/sync.py +7 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/thread.py +35 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/utils.py +6 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/executor.py +66 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/fragment.py +252 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/resolver.py +151 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/utils.py +7 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/middleware.py +57 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/execution/values.py +145 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/graphql.py +60 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/ast.py +1349 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/base.py +19 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/lexer.py +435 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/location.py +30 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/parser.py +779 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/printer.py +193 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/source.py +18 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor.py +222 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor_meta.py +82 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/cached_property.py +17 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/contain_subset.py +28 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/default_ordered_dict.py +40 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/ordereddict.py +8 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/pair_set.py +43 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/version.py +78 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/__init__.py +67 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/definition.py +619 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/directives.py +132 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/introspection.py +440 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/scalars.py +131 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/schema.py +100 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/type/typemap.py +145 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/__init__.py +0 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/assert_valid_name.py +9 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_from_value.py +65 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_code.py +49 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_dict.py +24 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/base.py +75 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_ast_schema.py +291 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_client_schema.py +250 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/concat_ast.py +9 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/extend_schema.py +357 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_field_def.py +27 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_operation_ast.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/introspection_query.py +90 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_literal_value.py +67 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_value.py +66 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/quoted_or_list.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/schema_printer.py +168 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/suggestion_list.py +56 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_comparators.py +69 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_from_ast.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_info.py +149 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/utils/value_from_ast.py +69 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/__init__.py +4 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/__init__.py +79 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/arguments_of_correct_type.py +24 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/base.py +8 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/default_values_of_correct_type.py +44 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fields_on_correct_type.py +113 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fragments_on_composite_types.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_argument_names.py +70 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_directives.py +97 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_fragment_names.py +19 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_type_names.py +43 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/lone_anonymous_operation.py +23 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_fragment_cycles.py +59 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_undefined_variables.py +36 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_fragments.py +38 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_variables.py +37 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/overlapping_fields_can_be_merged.py +529 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/possible_fragment_spreads.py +44 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/provided_non_null_arguments.py +46 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/scalar_leafs.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_argument_names.py +32 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_fragment_names.py +28 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_input_field_names.py +33 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_operation_names.py +31 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_variable_names.py +27 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_are_input_types.py +21 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_in_allowed_position.py +53 -0
- wandb/vendor/graphql-core-1.1/wandb_graphql/validation/validation.py +158 -0
- wandb/vendor/promise-2.3.0/conftest.py +30 -0
- wandb/vendor/promise-2.3.0/setup.py +64 -0
- wandb/vendor/promise-2.3.0/tests/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/tests/conftest.py +8 -0
- wandb/vendor/promise-2.3.0/tests/test_awaitable.py +32 -0
- wandb/vendor/promise-2.3.0/tests/test_awaitable_35.py +47 -0
- wandb/vendor/promise-2.3.0/tests/test_benchmark.py +116 -0
- wandb/vendor/promise-2.3.0/tests/test_complex_threads.py +23 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader.py +452 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader_awaitable_35.py +99 -0
- wandb/vendor/promise-2.3.0/tests/test_dataloader_extra.py +65 -0
- wandb/vendor/promise-2.3.0/tests/test_extra.py +670 -0
- wandb/vendor/promise-2.3.0/tests/test_issues.py +132 -0
- wandb/vendor/promise-2.3.0/tests/test_promise_list.py +70 -0
- wandb/vendor/promise-2.3.0/tests/test_spec.py +584 -0
- wandb/vendor/promise-2.3.0/tests/test_thread_safety.py +115 -0
- wandb/vendor/promise-2.3.0/tests/utils.py +3 -0
- wandb/vendor/promise-2.3.0/wandb_promise/__init__.py +38 -0
- wandb/vendor/promise-2.3.0/wandb_promise/async_.py +135 -0
- wandb/vendor/promise-2.3.0/wandb_promise/compat.py +32 -0
- wandb/vendor/promise-2.3.0/wandb_promise/dataloader.py +326 -0
- wandb/vendor/promise-2.3.0/wandb_promise/iterate_promise.py +12 -0
- wandb/vendor/promise-2.3.0/wandb_promise/promise.py +848 -0
- wandb/vendor/promise-2.3.0/wandb_promise/promise_list.py +151 -0
- wandb/vendor/promise-2.3.0/wandb_promise/pyutils/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/wandb_promise/pyutils/version.py +83 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/__init__.py +0 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/asyncio.py +22 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/gevent.py +21 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/immediate.py +27 -0
- wandb/vendor/promise-2.3.0/wandb_promise/schedulers/thread.py +18 -0
- wandb/vendor/promise-2.3.0/wandb_promise/utils.py +56 -0
- wandb/vendor/pygments/__init__.py +90 -0
- wandb/vendor/pygments/cmdline.py +568 -0
- wandb/vendor/pygments/console.py +74 -0
- wandb/vendor/pygments/filter.py +74 -0
- wandb/vendor/pygments/filters/__init__.py +350 -0
- wandb/vendor/pygments/formatter.py +95 -0
- wandb/vendor/pygments/formatters/__init__.py +153 -0
- wandb/vendor/pygments/formatters/_mapping.py +85 -0
- wandb/vendor/pygments/formatters/bbcode.py +109 -0
- wandb/vendor/pygments/formatters/html.py +851 -0
- wandb/vendor/pygments/formatters/img.py +600 -0
- wandb/vendor/pygments/formatters/irc.py +182 -0
- wandb/vendor/pygments/formatters/latex.py +482 -0
- wandb/vendor/pygments/formatters/other.py +160 -0
- wandb/vendor/pygments/formatters/rtf.py +147 -0
- wandb/vendor/pygments/formatters/svg.py +153 -0
- wandb/vendor/pygments/formatters/terminal.py +136 -0
- wandb/vendor/pygments/formatters/terminal256.py +309 -0
- wandb/vendor/pygments/lexer.py +871 -0
- wandb/vendor/pygments/lexers/__init__.py +329 -0
- wandb/vendor/pygments/lexers/_asy_builtins.py +1645 -0
- wandb/vendor/pygments/lexers/_cl_builtins.py +232 -0
- wandb/vendor/pygments/lexers/_cocoa_builtins.py +72 -0
- wandb/vendor/pygments/lexers/_csound_builtins.py +1346 -0
- wandb/vendor/pygments/lexers/_lasso_builtins.py +5327 -0
- wandb/vendor/pygments/lexers/_lua_builtins.py +295 -0
- wandb/vendor/pygments/lexers/_mapping.py +500 -0
- wandb/vendor/pygments/lexers/_mql_builtins.py +1172 -0
- wandb/vendor/pygments/lexers/_openedge_builtins.py +2547 -0
- wandb/vendor/pygments/lexers/_php_builtins.py +4756 -0
- wandb/vendor/pygments/lexers/_postgres_builtins.py +621 -0
- wandb/vendor/pygments/lexers/_scilab_builtins.py +3094 -0
- wandb/vendor/pygments/lexers/_sourcemod_builtins.py +1163 -0
- wandb/vendor/pygments/lexers/_stan_builtins.py +532 -0
- wandb/vendor/pygments/lexers/_stata_builtins.py +419 -0
- wandb/vendor/pygments/lexers/_tsql_builtins.py +1004 -0
- wandb/vendor/pygments/lexers/_vim_builtins.py +1939 -0
- wandb/vendor/pygments/lexers/actionscript.py +240 -0
- wandb/vendor/pygments/lexers/agile.py +24 -0
- wandb/vendor/pygments/lexers/algebra.py +221 -0
- wandb/vendor/pygments/lexers/ambient.py +76 -0
- wandb/vendor/pygments/lexers/ampl.py +87 -0
- wandb/vendor/pygments/lexers/apl.py +101 -0
- wandb/vendor/pygments/lexers/archetype.py +318 -0
- wandb/vendor/pygments/lexers/asm.py +641 -0
- wandb/vendor/pygments/lexers/automation.py +374 -0
- wandb/vendor/pygments/lexers/basic.py +500 -0
- wandb/vendor/pygments/lexers/bibtex.py +160 -0
- wandb/vendor/pygments/lexers/business.py +612 -0
- wandb/vendor/pygments/lexers/c_cpp.py +252 -0
- wandb/vendor/pygments/lexers/c_like.py +541 -0
- wandb/vendor/pygments/lexers/capnproto.py +78 -0
- wandb/vendor/pygments/lexers/chapel.py +102 -0
- wandb/vendor/pygments/lexers/clean.py +288 -0
- wandb/vendor/pygments/lexers/compiled.py +34 -0
- wandb/vendor/pygments/lexers/configs.py +833 -0
- wandb/vendor/pygments/lexers/console.py +114 -0
- wandb/vendor/pygments/lexers/crystal.py +393 -0
- wandb/vendor/pygments/lexers/csound.py +366 -0
- wandb/vendor/pygments/lexers/css.py +689 -0
- wandb/vendor/pygments/lexers/d.py +251 -0
- wandb/vendor/pygments/lexers/dalvik.py +125 -0
- wandb/vendor/pygments/lexers/data.py +555 -0
- wandb/vendor/pygments/lexers/diff.py +165 -0
- wandb/vendor/pygments/lexers/dotnet.py +691 -0
- wandb/vendor/pygments/lexers/dsls.py +878 -0
- wandb/vendor/pygments/lexers/dylan.py +289 -0
- wandb/vendor/pygments/lexers/ecl.py +125 -0
- wandb/vendor/pygments/lexers/eiffel.py +65 -0
- wandb/vendor/pygments/lexers/elm.py +121 -0
- wandb/vendor/pygments/lexers/erlang.py +533 -0
- wandb/vendor/pygments/lexers/esoteric.py +277 -0
- wandb/vendor/pygments/lexers/ezhil.py +69 -0
- wandb/vendor/pygments/lexers/factor.py +344 -0
- wandb/vendor/pygments/lexers/fantom.py +250 -0
- wandb/vendor/pygments/lexers/felix.py +273 -0
- wandb/vendor/pygments/lexers/forth.py +177 -0
- wandb/vendor/pygments/lexers/fortran.py +205 -0
- wandb/vendor/pygments/lexers/foxpro.py +428 -0
- wandb/vendor/pygments/lexers/functional.py +21 -0
- wandb/vendor/pygments/lexers/go.py +101 -0
- wandb/vendor/pygments/lexers/grammar_notation.py +213 -0
- wandb/vendor/pygments/lexers/graph.py +80 -0
- wandb/vendor/pygments/lexers/graphics.py +553 -0
- wandb/vendor/pygments/lexers/haskell.py +843 -0
- wandb/vendor/pygments/lexers/haxe.py +936 -0
- wandb/vendor/pygments/lexers/hdl.py +382 -0
- wandb/vendor/pygments/lexers/hexdump.py +103 -0
- wandb/vendor/pygments/lexers/html.py +602 -0
- wandb/vendor/pygments/lexers/idl.py +270 -0
- wandb/vendor/pygments/lexers/igor.py +288 -0
- wandb/vendor/pygments/lexers/inferno.py +96 -0
- wandb/vendor/pygments/lexers/installers.py +322 -0
- wandb/vendor/pygments/lexers/int_fiction.py +1343 -0
- wandb/vendor/pygments/lexers/iolang.py +63 -0
- wandb/vendor/pygments/lexers/j.py +146 -0
- wandb/vendor/pygments/lexers/javascript.py +1525 -0
- wandb/vendor/pygments/lexers/julia.py +333 -0
- wandb/vendor/pygments/lexers/jvm.py +1573 -0
- wandb/vendor/pygments/lexers/lisp.py +2621 -0
- wandb/vendor/pygments/lexers/make.py +202 -0
- wandb/vendor/pygments/lexers/markup.py +595 -0
- wandb/vendor/pygments/lexers/math.py +21 -0
- wandb/vendor/pygments/lexers/matlab.py +663 -0
- wandb/vendor/pygments/lexers/ml.py +769 -0
- wandb/vendor/pygments/lexers/modeling.py +358 -0
- wandb/vendor/pygments/lexers/modula2.py +1561 -0
- wandb/vendor/pygments/lexers/monte.py +204 -0
- wandb/vendor/pygments/lexers/ncl.py +894 -0
- wandb/vendor/pygments/lexers/nimrod.py +159 -0
- wandb/vendor/pygments/lexers/nit.py +64 -0
- wandb/vendor/pygments/lexers/nix.py +136 -0
- wandb/vendor/pygments/lexers/oberon.py +105 -0
- wandb/vendor/pygments/lexers/objective.py +504 -0
- wandb/vendor/pygments/lexers/ooc.py +85 -0
- wandb/vendor/pygments/lexers/other.py +41 -0
- wandb/vendor/pygments/lexers/parasail.py +79 -0
- wandb/vendor/pygments/lexers/parsers.py +835 -0
- wandb/vendor/pygments/lexers/pascal.py +644 -0
- wandb/vendor/pygments/lexers/pawn.py +199 -0
- wandb/vendor/pygments/lexers/perl.py +620 -0
- wandb/vendor/pygments/lexers/php.py +267 -0
- wandb/vendor/pygments/lexers/praat.py +294 -0
- wandb/vendor/pygments/lexers/prolog.py +306 -0
- wandb/vendor/pygments/lexers/python.py +939 -0
- wandb/vendor/pygments/lexers/qvt.py +152 -0
- wandb/vendor/pygments/lexers/r.py +453 -0
- wandb/vendor/pygments/lexers/rdf.py +270 -0
- wandb/vendor/pygments/lexers/rebol.py +431 -0
- wandb/vendor/pygments/lexers/resource.py +85 -0
- wandb/vendor/pygments/lexers/rnc.py +67 -0
- wandb/vendor/pygments/lexers/roboconf.py +82 -0
- wandb/vendor/pygments/lexers/robotframework.py +560 -0
- wandb/vendor/pygments/lexers/ruby.py +519 -0
- wandb/vendor/pygments/lexers/rust.py +220 -0
- wandb/vendor/pygments/lexers/sas.py +228 -0
- wandb/vendor/pygments/lexers/scripting.py +1222 -0
- wandb/vendor/pygments/lexers/shell.py +794 -0
- wandb/vendor/pygments/lexers/smalltalk.py +195 -0
- wandb/vendor/pygments/lexers/smv.py +79 -0
- wandb/vendor/pygments/lexers/snobol.py +83 -0
- wandb/vendor/pygments/lexers/special.py +103 -0
- wandb/vendor/pygments/lexers/sql.py +681 -0
- wandb/vendor/pygments/lexers/stata.py +108 -0
- wandb/vendor/pygments/lexers/supercollider.py +90 -0
- wandb/vendor/pygments/lexers/tcl.py +145 -0
- wandb/vendor/pygments/lexers/templates.py +2283 -0
- wandb/vendor/pygments/lexers/testing.py +207 -0
- wandb/vendor/pygments/lexers/text.py +25 -0
- wandb/vendor/pygments/lexers/textedit.py +169 -0
- wandb/vendor/pygments/lexers/textfmts.py +297 -0
- wandb/vendor/pygments/lexers/theorem.py +458 -0
- wandb/vendor/pygments/lexers/trafficscript.py +54 -0
- wandb/vendor/pygments/lexers/typoscript.py +226 -0
- wandb/vendor/pygments/lexers/urbi.py +133 -0
- wandb/vendor/pygments/lexers/varnish.py +190 -0
- wandb/vendor/pygments/lexers/verification.py +111 -0
- wandb/vendor/pygments/lexers/web.py +24 -0
- wandb/vendor/pygments/lexers/webmisc.py +988 -0
- wandb/vendor/pygments/lexers/whiley.py +116 -0
- wandb/vendor/pygments/lexers/x10.py +69 -0
- wandb/vendor/pygments/modeline.py +44 -0
- wandb/vendor/pygments/plugin.py +68 -0
- wandb/vendor/pygments/regexopt.py +92 -0
- wandb/vendor/pygments/scanner.py +105 -0
- wandb/vendor/pygments/sphinxext.py +158 -0
- wandb/vendor/pygments/style.py +155 -0
- wandb/vendor/pygments/styles/__init__.py +80 -0
- wandb/vendor/pygments/styles/abap.py +29 -0
- wandb/vendor/pygments/styles/algol.py +63 -0
- wandb/vendor/pygments/styles/algol_nu.py +63 -0
- wandb/vendor/pygments/styles/arduino.py +98 -0
- wandb/vendor/pygments/styles/autumn.py +65 -0
- wandb/vendor/pygments/styles/borland.py +51 -0
- wandb/vendor/pygments/styles/bw.py +49 -0
- wandb/vendor/pygments/styles/colorful.py +81 -0
- wandb/vendor/pygments/styles/default.py +73 -0
- wandb/vendor/pygments/styles/emacs.py +72 -0
- wandb/vendor/pygments/styles/friendly.py +72 -0
- wandb/vendor/pygments/styles/fruity.py +42 -0
- wandb/vendor/pygments/styles/igor.py +29 -0
- wandb/vendor/pygments/styles/lovelace.py +97 -0
- wandb/vendor/pygments/styles/manni.py +75 -0
- wandb/vendor/pygments/styles/monokai.py +106 -0
- wandb/vendor/pygments/styles/murphy.py +80 -0
- wandb/vendor/pygments/styles/native.py +65 -0
- wandb/vendor/pygments/styles/paraiso_dark.py +125 -0
- wandb/vendor/pygments/styles/paraiso_light.py +125 -0
- wandb/vendor/pygments/styles/pastie.py +75 -0
- wandb/vendor/pygments/styles/perldoc.py +69 -0
- wandb/vendor/pygments/styles/rainbow_dash.py +89 -0
- wandb/vendor/pygments/styles/rrt.py +33 -0
- wandb/vendor/pygments/styles/sas.py +44 -0
- wandb/vendor/pygments/styles/stata.py +40 -0
- wandb/vendor/pygments/styles/tango.py +141 -0
- wandb/vendor/pygments/styles/trac.py +63 -0
- wandb/vendor/pygments/styles/vim.py +63 -0
- wandb/vendor/pygments/styles/vs.py +38 -0
- wandb/vendor/pygments/styles/xcode.py +51 -0
- wandb/vendor/pygments/token.py +213 -0
- wandb/vendor/pygments/unistring.py +217 -0
- wandb/vendor/pygments/util.py +388 -0
- wandb/vendor/pynvml/__init__.py +0 -0
- wandb/vendor/pynvml/pynvml.py +4779 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/__init__.py +17 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/events.py +615 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/__init__.py +98 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/api.py +369 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents.py +172 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents2.py +239 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify.py +218 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_buffer.py +81 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_c.py +575 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/kqueue.py +730 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/polling.py +145 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/read_directory_changes.py +133 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/winapi.py +348 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/patterns.py +265 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/tricks/__init__.py +174 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/__init__.py +151 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/bricks.py +249 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/compat.py +29 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/decorators.py +198 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/delayed_queue.py +88 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/dirsnapshot.py +293 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/echo.py +157 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/event_backport.py +41 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/importlib2.py +40 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/platform.py +57 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/unicode_paths.py +64 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/win32stat.py +123 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/version.py +28 -0
- wandb/vendor/watchdog_0_9_0/wandb_watchdog/watchmedo.py +577 -0
- wandb/wandb_agent.py +588 -0
- wandb/wandb_controller.py +719 -0
- wandb/wandb_run.py +9 -0
- wandb-0.19.1.dist-info/METADATA +223 -0
- wandb-0.19.1.dist-info/RECORD +822 -0
- wandb-0.19.1.dist-info/WHEEL +5 -0
- wandb-0.19.1.dist-info/entry_points.txt +3 -0
- wandb-0.19.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1696 @@
|
|
1
|
+
"""sender."""
|
2
|
+
|
3
|
+
import contextlib
|
4
|
+
import gzip
|
5
|
+
import json
|
6
|
+
import logging
|
7
|
+
import os
|
8
|
+
import queue
|
9
|
+
import threading
|
10
|
+
import time
|
11
|
+
import traceback
|
12
|
+
from collections import defaultdict
|
13
|
+
from datetime import datetime
|
14
|
+
from queue import Queue
|
15
|
+
from typing import (
|
16
|
+
TYPE_CHECKING,
|
17
|
+
Any,
|
18
|
+
Dict,
|
19
|
+
Generator,
|
20
|
+
List,
|
21
|
+
Literal,
|
22
|
+
Optional,
|
23
|
+
Tuple,
|
24
|
+
Type,
|
25
|
+
Union,
|
26
|
+
)
|
27
|
+
|
28
|
+
import requests
|
29
|
+
|
30
|
+
import wandb
|
31
|
+
from wandb import util
|
32
|
+
from wandb.errors import CommError, UsageError
|
33
|
+
from wandb.errors.util import ProtobufErrorHandler
|
34
|
+
from wandb.filesync.dir_watcher import DirWatcher
|
35
|
+
from wandb.proto import wandb_internal_pb2
|
36
|
+
from wandb.sdk.artifacts.artifact_saver import ArtifactSaver
|
37
|
+
from wandb.sdk.interface import interface
|
38
|
+
from wandb.sdk.interface.interface_queue import InterfaceQueue
|
39
|
+
from wandb.sdk.internal import (
|
40
|
+
context,
|
41
|
+
datastore,
|
42
|
+
file_stream,
|
43
|
+
internal_api,
|
44
|
+
sender_config,
|
45
|
+
)
|
46
|
+
from wandb.sdk.internal.file_pusher import FilePusher
|
47
|
+
from wandb.sdk.internal.job_builder import JobBuilder
|
48
|
+
from wandb.sdk.internal.settings_static import SettingsStatic
|
49
|
+
from wandb.sdk.lib import (
|
50
|
+
config_util,
|
51
|
+
filenames,
|
52
|
+
filesystem,
|
53
|
+
proto_util,
|
54
|
+
redirect,
|
55
|
+
telemetry,
|
56
|
+
)
|
57
|
+
from wandb.sdk.lib.mailbox import ContextCancelledError
|
58
|
+
from wandb.sdk.lib.proto_util import message_to_dict
|
59
|
+
|
60
|
+
if TYPE_CHECKING:
|
61
|
+
from wandb.proto.wandb_internal_pb2 import (
|
62
|
+
ArtifactManifest,
|
63
|
+
ArtifactManifestEntry,
|
64
|
+
ArtifactRecord,
|
65
|
+
HttpResponse,
|
66
|
+
LocalInfo,
|
67
|
+
Record,
|
68
|
+
Result,
|
69
|
+
RunExitResult,
|
70
|
+
RunRecord,
|
71
|
+
SummaryRecord,
|
72
|
+
)
|
73
|
+
|
74
|
+
StreamLiterals = Literal["stdout", "stderr"]
|
75
|
+
|
76
|
+
|
77
|
+
logger = logging.getLogger(__name__)
|
78
|
+
|
79
|
+
|
80
|
+
_OUTPUT_MIN_CALLBACK_INTERVAL = 2 # seconds
|
81
|
+
|
82
|
+
|
83
|
+
def _framework_priority() -> Generator[Tuple[str, str], None, None]:
|
84
|
+
yield from [
|
85
|
+
("lightgbm", "lightgbm"),
|
86
|
+
("catboost", "catboost"),
|
87
|
+
("xgboost", "xgboost"),
|
88
|
+
("transformers_huggingface", "huggingface"), # backwards compatibility
|
89
|
+
("transformers", "huggingface"),
|
90
|
+
("pytorch_ignite", "ignite"), # backwards compatibility
|
91
|
+
("ignite", "ignite"),
|
92
|
+
("pytorch_lightning", "lightning"),
|
93
|
+
("fastai", "fastai"),
|
94
|
+
("torch", "torch"),
|
95
|
+
("keras", "keras"),
|
96
|
+
("tensorflow", "tensorflow"),
|
97
|
+
("sklearn", "sklearn"),
|
98
|
+
]
|
99
|
+
|
100
|
+
|
101
|
+
def _manifest_json_from_proto(manifest: "ArtifactManifest") -> Dict:
|
102
|
+
if manifest.version == 1:
|
103
|
+
if manifest.manifest_file_path:
|
104
|
+
contents = {}
|
105
|
+
with gzip.open(manifest.manifest_file_path, "rt") as f:
|
106
|
+
for line in f:
|
107
|
+
entry_json = json.loads(line)
|
108
|
+
path = entry_json.pop("path")
|
109
|
+
contents[path] = entry_json
|
110
|
+
else:
|
111
|
+
contents = {
|
112
|
+
content.path: _manifest_entry_from_proto(content)
|
113
|
+
for content in manifest.contents
|
114
|
+
}
|
115
|
+
else:
|
116
|
+
raise ValueError(f"unknown artifact manifest version: {manifest.version}")
|
117
|
+
|
118
|
+
return {
|
119
|
+
"version": manifest.version,
|
120
|
+
"storagePolicy": manifest.storage_policy,
|
121
|
+
"storagePolicyConfig": {
|
122
|
+
config.key: json.loads(config.value_json)
|
123
|
+
for config in manifest.storage_policy_config
|
124
|
+
},
|
125
|
+
"contents": contents,
|
126
|
+
}
|
127
|
+
|
128
|
+
|
129
|
+
def _manifest_entry_from_proto(entry: "ArtifactManifestEntry") -> Dict:
|
130
|
+
birth_artifact_id = entry.birth_artifact_id if entry.birth_artifact_id else None
|
131
|
+
return {
|
132
|
+
"digest": entry.digest,
|
133
|
+
"birthArtifactID": birth_artifact_id,
|
134
|
+
"ref": entry.ref if entry.ref else None,
|
135
|
+
"size": entry.size if entry.size is not None else None,
|
136
|
+
"local_path": entry.local_path if entry.local_path else None,
|
137
|
+
"skip_cache": entry.skip_cache,
|
138
|
+
"extra": {extra.key: json.loads(extra.value_json) for extra in entry.extra},
|
139
|
+
}
|
140
|
+
|
141
|
+
|
142
|
+
class ResumeState:
|
143
|
+
resumed: bool
|
144
|
+
step: int
|
145
|
+
history: int
|
146
|
+
events: int
|
147
|
+
output: int
|
148
|
+
runtime: float
|
149
|
+
wandb_runtime: Optional[int]
|
150
|
+
summary: Optional[Dict[str, Any]]
|
151
|
+
config: Optional[Dict[str, Any]]
|
152
|
+
tags: Optional[List[str]]
|
153
|
+
|
154
|
+
def __init__(self) -> None:
|
155
|
+
self.resumed = False
|
156
|
+
self.step = 0
|
157
|
+
self.history = 0
|
158
|
+
self.events = 0
|
159
|
+
self.output = 0
|
160
|
+
self.runtime = 0
|
161
|
+
# wandb_runtime is the canonical runtime (stored in summary._wandb.runtime)
|
162
|
+
self.wandb_runtime = None
|
163
|
+
self.summary = None
|
164
|
+
self.config = None
|
165
|
+
self.tags = None
|
166
|
+
|
167
|
+
def __str__(self) -> str:
|
168
|
+
obj = ",".join(map(lambda it: f"{it[0]}={it[1]}", vars(self).items()))
|
169
|
+
return f"ResumeState({obj})"
|
170
|
+
|
171
|
+
|
172
|
+
class _OutputRawStream:
|
173
|
+
_stopped: threading.Event
|
174
|
+
_queue: queue.Queue
|
175
|
+
_emulator: redirect.TerminalEmulator
|
176
|
+
_writer_thr: threading.Thread
|
177
|
+
_reader_thr: threading.Thread
|
178
|
+
|
179
|
+
def __init__(self, stream: str, sm: "SendManager"):
|
180
|
+
self._stopped = threading.Event()
|
181
|
+
self._queue = queue.Queue()
|
182
|
+
self._emulator = redirect.TerminalEmulator()
|
183
|
+
self._writer_thr = threading.Thread(
|
184
|
+
target=sm._output_raw_writer_thread,
|
185
|
+
kwargs=dict(stream=stream),
|
186
|
+
daemon=True,
|
187
|
+
name=f"OutRawWr-{stream}",
|
188
|
+
)
|
189
|
+
self._reader_thr = threading.Thread(
|
190
|
+
target=sm._output_raw_reader_thread,
|
191
|
+
kwargs=dict(stream=stream),
|
192
|
+
daemon=True,
|
193
|
+
name=f"OutRawRd-{stream}",
|
194
|
+
)
|
195
|
+
|
196
|
+
def start(self) -> None:
|
197
|
+
self._writer_thr.start()
|
198
|
+
self._reader_thr.start()
|
199
|
+
|
200
|
+
|
201
|
+
class SendManager:
|
202
|
+
UPDATE_CONFIG_TIME: int = 30
|
203
|
+
UPDATE_STATUS_TIME: int = 5
|
204
|
+
|
205
|
+
_settings: SettingsStatic
|
206
|
+
_record_q: "Queue[Record]"
|
207
|
+
_result_q: "Queue[Result]"
|
208
|
+
_interface: InterfaceQueue
|
209
|
+
_api_settings: Dict[str, str]
|
210
|
+
_partial_output: Dict[str, str]
|
211
|
+
_context_keeper: context.ContextKeeper
|
212
|
+
|
213
|
+
_telemetry_obj: telemetry.TelemetryRecord
|
214
|
+
_fs: Optional["file_stream.FileStreamApi"]
|
215
|
+
_run: Optional["RunRecord"]
|
216
|
+
_entity: Optional[str]
|
217
|
+
_project: Optional[str]
|
218
|
+
_dir_watcher: Optional["DirWatcher"]
|
219
|
+
_pusher: Optional["FilePusher"]
|
220
|
+
_record_exit: Optional["Record"]
|
221
|
+
_exit_result: Optional["RunExitResult"]
|
222
|
+
_resume_state: ResumeState
|
223
|
+
_rewind_response: Optional[Dict[str, Any]]
|
224
|
+
_cached_server_info: Dict[str, Any]
|
225
|
+
_cached_viewer: Dict[str, Any]
|
226
|
+
_server_messages: List[Dict[str, Any]]
|
227
|
+
_ds: Optional[datastore.DataStore]
|
228
|
+
_output_raw_streams: Dict["StreamLiterals", _OutputRawStream]
|
229
|
+
_output_raw_file: Optional[filesystem.CRDedupedFile]
|
230
|
+
_send_record_num: int
|
231
|
+
_send_end_offset: int
|
232
|
+
_debounce_config_time: float
|
233
|
+
_debounce_status_time: float
|
234
|
+
|
235
|
+
def __init__(
|
236
|
+
self,
|
237
|
+
settings: SettingsStatic,
|
238
|
+
record_q: "Queue[Record]",
|
239
|
+
result_q: "Queue[Result]",
|
240
|
+
interface: InterfaceQueue,
|
241
|
+
context_keeper: context.ContextKeeper,
|
242
|
+
) -> None:
|
243
|
+
self._settings = settings
|
244
|
+
self._record_q = record_q
|
245
|
+
self._result_q = result_q
|
246
|
+
self._interface = interface
|
247
|
+
self._context_keeper = context_keeper
|
248
|
+
|
249
|
+
self._ds = None
|
250
|
+
self._send_record_num = 0
|
251
|
+
self._send_end_offset = 0
|
252
|
+
|
253
|
+
self._fs = None
|
254
|
+
self._pusher = None
|
255
|
+
self._dir_watcher = None
|
256
|
+
|
257
|
+
# State updated by login
|
258
|
+
self._entity = None
|
259
|
+
self._flags = None
|
260
|
+
|
261
|
+
# State updated by wandb.init
|
262
|
+
self._run = None
|
263
|
+
self._project = None
|
264
|
+
|
265
|
+
# keep track of config from key/val updates
|
266
|
+
self._consolidated_config = sender_config.ConfigState()
|
267
|
+
|
268
|
+
self._start_time: int = 0
|
269
|
+
self._telemetry_obj = telemetry.TelemetryRecord()
|
270
|
+
self._config_metric_pbdict_list: List[Dict[int, Any]] = []
|
271
|
+
self._metadata_summary: Dict[str, Any] = defaultdict()
|
272
|
+
self._cached_summary: Dict[str, Any] = dict()
|
273
|
+
self._config_metric_index_dict: Dict[str, int] = {}
|
274
|
+
self._config_metric_dict: Dict[str, wandb_internal_pb2.MetricRecord] = {}
|
275
|
+
self._consolidated_summary: Dict[str, Any] = dict()
|
276
|
+
|
277
|
+
self._cached_server_info = dict()
|
278
|
+
self._cached_viewer = dict()
|
279
|
+
self._server_messages = []
|
280
|
+
|
281
|
+
# State updated by resuming
|
282
|
+
self._resume_state = ResumeState()
|
283
|
+
self._rewind_response = None
|
284
|
+
|
285
|
+
# State added when run_exit is initiated and complete
|
286
|
+
self._record_exit = None
|
287
|
+
self._exit_result = None
|
288
|
+
|
289
|
+
self._api = internal_api.Api(
|
290
|
+
default_settings=settings, retry_callback=self.retry_callback
|
291
|
+
)
|
292
|
+
self._api_settings = dict()
|
293
|
+
|
294
|
+
# queue filled by retry_callback
|
295
|
+
self._retry_q: Queue[HttpResponse] = queue.Queue()
|
296
|
+
|
297
|
+
# do we need to debounce?
|
298
|
+
self._config_needs_debounce: bool = False
|
299
|
+
|
300
|
+
# TODO(jhr): do something better, why do we need to send full lines?
|
301
|
+
self._partial_output = dict()
|
302
|
+
|
303
|
+
self._exit_code = 0
|
304
|
+
|
305
|
+
# internal vars for handing raw console output
|
306
|
+
self._output_raw_streams = dict()
|
307
|
+
self._output_raw_file = None
|
308
|
+
|
309
|
+
# job builder
|
310
|
+
self._job_builder = JobBuilder(settings)
|
311
|
+
|
312
|
+
time_now = time.monotonic()
|
313
|
+
self._debounce_config_time = time_now
|
314
|
+
self._debounce_status_time = time_now
|
315
|
+
|
316
|
+
@classmethod
|
317
|
+
def setup(
|
318
|
+
cls,
|
319
|
+
root_dir: str,
|
320
|
+
resume: Union[None, bool, str],
|
321
|
+
) -> "SendManager":
|
322
|
+
"""Set up a standalone SendManager.
|
323
|
+
|
324
|
+
Exclusively used in `sync.py`.
|
325
|
+
"""
|
326
|
+
print(root_dir)
|
327
|
+
files_dir = os.path.join(root_dir, "files")
|
328
|
+
settings = wandb.Settings(
|
329
|
+
x_files_dir=files_dir,
|
330
|
+
root_dir=root_dir,
|
331
|
+
# _start_time=0,
|
332
|
+
resume=resume,
|
333
|
+
# ignore_globs=(),
|
334
|
+
x_sync=True,
|
335
|
+
disable_job_creation=False,
|
336
|
+
x_file_stream_timeout_seconds=0,
|
337
|
+
)
|
338
|
+
record_q: Queue[Record] = queue.Queue()
|
339
|
+
result_q: Queue[Result] = queue.Queue()
|
340
|
+
publish_interface = InterfaceQueue(record_q=record_q)
|
341
|
+
context_keeper = context.ContextKeeper()
|
342
|
+
return SendManager(
|
343
|
+
settings=SettingsStatic(settings.to_proto()),
|
344
|
+
record_q=record_q,
|
345
|
+
result_q=result_q,
|
346
|
+
interface=publish_interface,
|
347
|
+
context_keeper=context_keeper,
|
348
|
+
)
|
349
|
+
|
350
|
+
def __len__(self) -> int:
|
351
|
+
return self._record_q.qsize()
|
352
|
+
|
353
|
+
def __enter__(self) -> "SendManager":
|
354
|
+
return self
|
355
|
+
|
356
|
+
def __exit__(
|
357
|
+
self,
|
358
|
+
exc_type: Optional[Type[BaseException]],
|
359
|
+
exc_value: Optional[BaseException],
|
360
|
+
exc_traceback: Optional[traceback.TracebackException],
|
361
|
+
) -> Literal[False]:
|
362
|
+
while self:
|
363
|
+
data = next(self)
|
364
|
+
self.send(data)
|
365
|
+
self.finish()
|
366
|
+
return False
|
367
|
+
|
368
|
+
def retry_callback(self, status: int, response_text: str) -> None:
|
369
|
+
response = wandb_internal_pb2.HttpResponse()
|
370
|
+
response.http_status_code = status
|
371
|
+
response.http_response_text = response_text
|
372
|
+
self._retry_q.put(response)
|
373
|
+
|
374
|
+
def send(self, record: "Record") -> None:
|
375
|
+
self._update_record_num(record.num)
|
376
|
+
self._update_end_offset(record.control.end_offset)
|
377
|
+
|
378
|
+
record_type = record.WhichOneof("record_type")
|
379
|
+
assert record_type
|
380
|
+
handler_str = "send_" + record_type
|
381
|
+
send_handler = getattr(self, handler_str, None)
|
382
|
+
# Don't log output to reduce log noise
|
383
|
+
if record_type not in {"output", "request", "output_raw"}:
|
384
|
+
logger.debug(f"send: {record_type}")
|
385
|
+
assert send_handler, f"unknown send handler: {handler_str}"
|
386
|
+
|
387
|
+
context_id = context.context_id_from_record(record)
|
388
|
+
api_context = self._context_keeper.get(context_id)
|
389
|
+
try:
|
390
|
+
self._api.set_local_context(api_context)
|
391
|
+
send_handler(record)
|
392
|
+
except ContextCancelledError:
|
393
|
+
logger.debug(f"Record cancelled: {record_type}")
|
394
|
+
self._context_keeper.release(context_id)
|
395
|
+
finally:
|
396
|
+
self._api.clear_local_context()
|
397
|
+
|
398
|
+
def send_preempting(self, _: "Record") -> None:
|
399
|
+
if self._fs:
|
400
|
+
self._fs.enqueue_preempting()
|
401
|
+
|
402
|
+
def send_request_sender_mark(self, _: "Record") -> None:
|
403
|
+
self._maybe_report_status(always=True)
|
404
|
+
|
405
|
+
def send_request(self, record: "Record") -> None:
|
406
|
+
request_type = record.request.WhichOneof("request_type")
|
407
|
+
assert request_type
|
408
|
+
handler_str = "send_request_" + request_type
|
409
|
+
send_handler = getattr(self, handler_str, None)
|
410
|
+
if request_type != "network_status":
|
411
|
+
logger.debug(f"send_request: {request_type}")
|
412
|
+
assert send_handler, f"unknown handle: {handler_str}"
|
413
|
+
send_handler(record)
|
414
|
+
|
415
|
+
def _respond_result(self, result: "Result") -> None:
|
416
|
+
context_id = context.context_id_from_result(result)
|
417
|
+
self._context_keeper.release(context_id)
|
418
|
+
self._result_q.put(result)
|
419
|
+
|
420
|
+
def _flatten(self, dictionary: Dict) -> None:
|
421
|
+
if isinstance(dictionary, dict):
|
422
|
+
for k, v in list(dictionary.items()):
|
423
|
+
if isinstance(v, dict):
|
424
|
+
self._flatten(v)
|
425
|
+
dictionary.pop(k)
|
426
|
+
for k2, v2 in v.items():
|
427
|
+
dictionary[k + "." + k2] = v2
|
428
|
+
|
429
|
+
def _update_record_num(self, record_num: int) -> None:
|
430
|
+
if not record_num:
|
431
|
+
return
|
432
|
+
# Currently how we handle offline mode and syncing is not
|
433
|
+
# compatible with this assertion due to how the exit record
|
434
|
+
# is (mis)handled:
|
435
|
+
# - using "always_send" in offline mode to trigger defer
|
436
|
+
# state machine
|
437
|
+
# - skipping the exit record in `wandb sync` mode so that
|
438
|
+
# it is always executed as the last record
|
439
|
+
if not self._settings._offline and not self._settings.x_sync:
|
440
|
+
assert record_num == self._send_record_num + 1
|
441
|
+
self._send_record_num = record_num
|
442
|
+
|
443
|
+
def _update_end_offset(self, end_offset: int) -> None:
|
444
|
+
if not end_offset:
|
445
|
+
return
|
446
|
+
self._send_end_offset = end_offset
|
447
|
+
|
448
|
+
def send_request_sender_read(self, record: "Record") -> None:
|
449
|
+
if self._ds is None:
|
450
|
+
self._ds = datastore.DataStore()
|
451
|
+
self._ds.open_for_scan(self._settings.sync_file)
|
452
|
+
|
453
|
+
# TODO(cancel_paused): implement cancel_set logic
|
454
|
+
# The idea is that there is an active request to cancel a
|
455
|
+
# message that is being read from the transaction log below
|
456
|
+
|
457
|
+
start_offset = record.request.sender_read.start_offset
|
458
|
+
final_offset = record.request.sender_read.final_offset
|
459
|
+
self._ds.seek(start_offset)
|
460
|
+
|
461
|
+
current_end_offset = 0
|
462
|
+
while current_end_offset < final_offset:
|
463
|
+
data = self._ds.scan_data()
|
464
|
+
assert data
|
465
|
+
current_end_offset = self._ds.get_offset()
|
466
|
+
|
467
|
+
send_record = wandb_internal_pb2.Record()
|
468
|
+
send_record.ParseFromString(data)
|
469
|
+
self._update_end_offset(current_end_offset)
|
470
|
+
self.send(send_record)
|
471
|
+
|
472
|
+
# make sure we perform deferred operations
|
473
|
+
self.debounce()
|
474
|
+
|
475
|
+
# make sure that we always update writer for every sended read request
|
476
|
+
self._maybe_report_status(always=True)
|
477
|
+
|
478
|
+
def send_request_stop_status(self, record: "Record") -> None:
|
479
|
+
result = proto_util._result_from_record(record)
|
480
|
+
status_resp = result.response.stop_status_response
|
481
|
+
status_resp.run_should_stop = False
|
482
|
+
if self._entity and self._project and self._run and self._run.run_id:
|
483
|
+
try:
|
484
|
+
status_resp.run_should_stop = self._api.check_stop_requested(
|
485
|
+
self._project, self._entity, self._run.run_id
|
486
|
+
)
|
487
|
+
except Exception as e:
|
488
|
+
logger.warning("Failed to check stop requested status: %s", e)
|
489
|
+
self._respond_result(result)
|
490
|
+
|
491
|
+
def _maybe_update_config(self, always: bool = False) -> None:
|
492
|
+
time_now = time.monotonic()
|
493
|
+
if (
|
494
|
+
not always
|
495
|
+
and time_now < self._debounce_config_time + self.UPDATE_CONFIG_TIME
|
496
|
+
):
|
497
|
+
return
|
498
|
+
if self._config_needs_debounce:
|
499
|
+
self._debounce_config()
|
500
|
+
self._debounce_config_time = time_now
|
501
|
+
|
502
|
+
def _maybe_report_status(self, always: bool = False) -> None:
|
503
|
+
time_now = time.monotonic()
|
504
|
+
if (
|
505
|
+
not always
|
506
|
+
and time_now < self._debounce_status_time + self.UPDATE_STATUS_TIME
|
507
|
+
):
|
508
|
+
return
|
509
|
+
self._debounce_status_time = time_now
|
510
|
+
|
511
|
+
status_report = wandb_internal_pb2.StatusReportRequest(
|
512
|
+
record_num=self._send_record_num,
|
513
|
+
sent_offset=self._send_end_offset,
|
514
|
+
)
|
515
|
+
status_time = time.time()
|
516
|
+
status_report.sync_time.FromMicroseconds(int(status_time * 1e6))
|
517
|
+
record = self._interface._make_request(status_report=status_report)
|
518
|
+
self._interface._publish(record)
|
519
|
+
|
520
|
+
def debounce(self, final: bool = False) -> None:
|
521
|
+
self._maybe_report_status(always=final)
|
522
|
+
self._maybe_update_config(always=final)
|
523
|
+
|
524
|
+
def _debounce_config(self) -> None:
|
525
|
+
config_value_dict = self._config_backend_dict()
|
526
|
+
# TODO(jhr): check result of upsert_run?
|
527
|
+
if self._run:
|
528
|
+
self._api.upsert_run(
|
529
|
+
name=self._run.run_id,
|
530
|
+
config=config_value_dict,
|
531
|
+
**self._api_settings, # type: ignore
|
532
|
+
)
|
533
|
+
self._config_save(config_value_dict)
|
534
|
+
self._config_needs_debounce = False
|
535
|
+
|
536
|
+
def send_request_network_status(self, record: "Record") -> None:
|
537
|
+
result = proto_util._result_from_record(record)
|
538
|
+
status_resp = result.response.network_status_response
|
539
|
+
while True:
|
540
|
+
try:
|
541
|
+
status_resp.network_responses.append(self._retry_q.get_nowait())
|
542
|
+
except queue.Empty:
|
543
|
+
break
|
544
|
+
except Exception as e:
|
545
|
+
logger.warning(f"Error emptying retry queue: {e}")
|
546
|
+
self._respond_result(result)
|
547
|
+
|
548
|
+
def send_request_login(self, record: "Record") -> None:
|
549
|
+
# TODO: do something with api_key or anonymous?
|
550
|
+
# TODO: return an error if we aren't logged in?
|
551
|
+
self._api.reauth()
|
552
|
+
viewer = self.get_viewer_info()
|
553
|
+
server_info = self.get_server_info()
|
554
|
+
# self._login_flags = json.loads(viewer.get("flags", "{}"))
|
555
|
+
# self._login_entity = viewer.get("entity")
|
556
|
+
if server_info:
|
557
|
+
logger.info(f"Login server info: {server_info}")
|
558
|
+
self._entity = viewer.get("entity")
|
559
|
+
if record.control.req_resp:
|
560
|
+
result = proto_util._result_from_record(record)
|
561
|
+
if self._entity:
|
562
|
+
result.response.login_response.active_entity = self._entity
|
563
|
+
self._respond_result(result)
|
564
|
+
|
565
|
+
def send_exit(self, record: "Record") -> None:
|
566
|
+
# track where the exit came from
|
567
|
+
self._record_exit = record
|
568
|
+
|
569
|
+
run_exit = record.exit
|
570
|
+
self._exit_code = run_exit.exit_code
|
571
|
+
logger.info("handling exit code: %s", run_exit.exit_code)
|
572
|
+
runtime = run_exit.runtime
|
573
|
+
logger.info("handling runtime: %s", run_exit.runtime)
|
574
|
+
self._metadata_summary["runtime"] = runtime
|
575
|
+
self._update_summary()
|
576
|
+
|
577
|
+
# We need to give the request queue a chance to empty between states
|
578
|
+
# so use handle_request_defer as a state machine.
|
579
|
+
logger.info("send defer")
|
580
|
+
self._interface.publish_defer()
|
581
|
+
|
582
|
+
def send_final(self, record: "Record") -> None:
|
583
|
+
pass
|
584
|
+
|
585
|
+
def _flush_run(self) -> None:
|
586
|
+
pass
|
587
|
+
|
588
|
+
def send_request_status_report(self, record: "Record") -> None:
|
589
|
+
# todo? this is just a noop to please wandb sync
|
590
|
+
pass
|
591
|
+
|
592
|
+
def send_request_defer(self, record: "Record") -> None: # noqa: C901
|
593
|
+
defer = record.request.defer
|
594
|
+
state = defer.state
|
595
|
+
logger.info(f"handle sender defer: {state}")
|
596
|
+
|
597
|
+
def transition_state() -> None:
|
598
|
+
state = defer.state + 1
|
599
|
+
logger.info(f"send defer: {state}")
|
600
|
+
self._interface.publish_defer(state)
|
601
|
+
|
602
|
+
done = False
|
603
|
+
if state == defer.BEGIN:
|
604
|
+
transition_state()
|
605
|
+
elif state == defer.FLUSH_RUN:
|
606
|
+
self._flush_run()
|
607
|
+
transition_state()
|
608
|
+
elif state == defer.FLUSH_STATS:
|
609
|
+
# NOTE: this is handled in handler.py:handle_request_defer()
|
610
|
+
transition_state()
|
611
|
+
elif state == defer.FLUSH_PARTIAL_HISTORY:
|
612
|
+
# NOTE: this is handled in handler.py:handle_request_defer()
|
613
|
+
transition_state()
|
614
|
+
elif state == defer.FLUSH_TB:
|
615
|
+
# NOTE: this is handled in handler.py:handle_request_defer()
|
616
|
+
transition_state()
|
617
|
+
elif state == defer.FLUSH_SUM:
|
618
|
+
# NOTE: this is handled in handler.py:handle_request_defer()
|
619
|
+
transition_state()
|
620
|
+
elif state == defer.FLUSH_DEBOUNCER:
|
621
|
+
self.debounce(final=True)
|
622
|
+
transition_state()
|
623
|
+
elif state == defer.FLUSH_OUTPUT:
|
624
|
+
self._output_raw_finish()
|
625
|
+
transition_state()
|
626
|
+
elif state == defer.FLUSH_JOB:
|
627
|
+
self._flush_job()
|
628
|
+
transition_state()
|
629
|
+
elif state == defer.FLUSH_DIR:
|
630
|
+
if self._dir_watcher:
|
631
|
+
self._dir_watcher.finish()
|
632
|
+
self._dir_watcher = None
|
633
|
+
transition_state()
|
634
|
+
elif state == defer.FLUSH_FP:
|
635
|
+
if self._pusher:
|
636
|
+
# FilePusher generates some events for FileStreamApi, so we
|
637
|
+
# need to wait for pusher to finish before going to the next
|
638
|
+
# state to ensure that filestream gets all the events that we
|
639
|
+
# want before telling it to finish up
|
640
|
+
self._pusher.finish(transition_state)
|
641
|
+
else:
|
642
|
+
transition_state()
|
643
|
+
elif state == defer.JOIN_FP:
|
644
|
+
if self._pusher:
|
645
|
+
self._pusher.join()
|
646
|
+
transition_state()
|
647
|
+
elif state == defer.FLUSH_FS:
|
648
|
+
if self._fs:
|
649
|
+
# TODO(jhr): now is a good time to output pending output lines
|
650
|
+
self._fs.finish(self._exit_code)
|
651
|
+
self._fs = None
|
652
|
+
transition_state()
|
653
|
+
elif state == defer.FLUSH_FINAL:
|
654
|
+
self._interface.publish_final()
|
655
|
+
self._interface.publish_footer()
|
656
|
+
transition_state()
|
657
|
+
elif state == defer.END:
|
658
|
+
done = True
|
659
|
+
else:
|
660
|
+
raise AssertionError("unknown state")
|
661
|
+
|
662
|
+
if not done:
|
663
|
+
return
|
664
|
+
|
665
|
+
exit_result = wandb_internal_pb2.RunExitResult()
|
666
|
+
|
667
|
+
# mark exit done in case we are polling on exit
|
668
|
+
self._exit_result = exit_result
|
669
|
+
|
670
|
+
# Report response to mailbox
|
671
|
+
if self._record_exit and self._record_exit.control.mailbox_slot:
|
672
|
+
result = proto_util._result_from_record(self._record_exit)
|
673
|
+
result.exit_result.CopyFrom(exit_result)
|
674
|
+
self._respond_result(result)
|
675
|
+
|
676
|
+
def send_request_poll_exit(self, record: "Record") -> None:
|
677
|
+
if not record.control.req_resp and not record.control.mailbox_slot:
|
678
|
+
return
|
679
|
+
|
680
|
+
result = proto_util._result_from_record(record)
|
681
|
+
|
682
|
+
if self._pusher:
|
683
|
+
_alive, status = self._pusher.get_status()
|
684
|
+
file_counts = self._pusher.file_counts_by_category()
|
685
|
+
resp = result.response.poll_exit_response
|
686
|
+
resp.pusher_stats.uploaded_bytes = status.uploaded_bytes
|
687
|
+
resp.pusher_stats.total_bytes = status.total_bytes
|
688
|
+
resp.pusher_stats.deduped_bytes = status.deduped_bytes
|
689
|
+
resp.file_counts.wandb_count = file_counts.wandb
|
690
|
+
resp.file_counts.media_count = file_counts.media
|
691
|
+
resp.file_counts.artifact_count = file_counts.artifact
|
692
|
+
resp.file_counts.other_count = file_counts.other
|
693
|
+
|
694
|
+
if self._exit_result:
|
695
|
+
result.response.poll_exit_response.done = True
|
696
|
+
result.response.poll_exit_response.exit_result.CopyFrom(self._exit_result)
|
697
|
+
|
698
|
+
self._respond_result(result)
|
699
|
+
|
700
|
+
def _setup_resume(
|
701
|
+
self, run: "RunRecord"
|
702
|
+
) -> Optional["wandb_internal_pb2.ErrorInfo"]:
|
703
|
+
"""Queries the backend for a run; fail if the settings are incompatible."""
|
704
|
+
if not self._settings.resume:
|
705
|
+
return None
|
706
|
+
|
707
|
+
# TODO: This causes a race, we need to make the upsert atomically
|
708
|
+
# only create or update depending on the resume config
|
709
|
+
# we use the runs entity if set, otherwise fallback to users entity
|
710
|
+
# todo: ensure entity is not None as self._entity is Optional[str]
|
711
|
+
entity = run.entity or self._entity
|
712
|
+
logger.info(
|
713
|
+
"checking resume status for %s/%s/%s", entity, run.project, run.run_id
|
714
|
+
)
|
715
|
+
resume_status = self._api.run_resume_status(
|
716
|
+
entity=entity, # type: ignore
|
717
|
+
project_name=run.project,
|
718
|
+
name=run.run_id,
|
719
|
+
)
|
720
|
+
# No resume status = run does not exist; No t key in wandbConfig = run exists but hasn't been inited
|
721
|
+
if not resume_status or '"t":' not in resume_status.get("wandbConfig", ""):
|
722
|
+
if self._settings.resume == "must":
|
723
|
+
error = wandb_internal_pb2.ErrorInfo()
|
724
|
+
error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
|
725
|
+
error.message = (
|
726
|
+
"You provided an invalid value for the `resume` argument."
|
727
|
+
f" The value 'must' is not a valid option for resuming a run ({run.run_id}) that has not been initialized."
|
728
|
+
" Please check your inputs and try again with a valid run ID."
|
729
|
+
" If you are trying to start a new run, please omit the `resume` argument or use `resume='allow'`."
|
730
|
+
)
|
731
|
+
return error
|
732
|
+
return None
|
733
|
+
|
734
|
+
#
|
735
|
+
# handle cases where we have resume_status
|
736
|
+
#
|
737
|
+
if self._settings.resume == "never":
|
738
|
+
error = wandb_internal_pb2.ErrorInfo()
|
739
|
+
error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
|
740
|
+
error.message = (
|
741
|
+
"You provided an invalid value for the `resume` argument."
|
742
|
+
f" The value 'never' is not a valid option for resuming a run ({run.run_id}) that already exists."
|
743
|
+
" Please check your inputs and try again with a valid value for the `resume` argument."
|
744
|
+
)
|
745
|
+
return error
|
746
|
+
|
747
|
+
history = {}
|
748
|
+
events = {}
|
749
|
+
config = {}
|
750
|
+
summary = {}
|
751
|
+
try:
|
752
|
+
events_rt = 0
|
753
|
+
history_rt = 0
|
754
|
+
history = json.loads(resume_status["historyTail"])
|
755
|
+
if history:
|
756
|
+
history = json.loads(history[-1])
|
757
|
+
history_rt = history.get("_runtime", 0)
|
758
|
+
events = json.loads(resume_status["eventsTail"])
|
759
|
+
if events:
|
760
|
+
events = json.loads(events[-1])
|
761
|
+
events_rt = events.get("_runtime", 0)
|
762
|
+
config = json.loads(resume_status["config"] or "{}")
|
763
|
+
summary = json.loads(resume_status["summaryMetrics"] or "{}")
|
764
|
+
new_runtime = summary.get("_wandb", {}).get("runtime", None)
|
765
|
+
if new_runtime is not None:
|
766
|
+
self._resume_state.wandb_runtime = new_runtime
|
767
|
+
tags = resume_status.get("tags") or []
|
768
|
+
|
769
|
+
except (IndexError, ValueError) as e:
|
770
|
+
logger.error("unable to load resume tails", exc_info=e)
|
771
|
+
if self._settings.resume == "must":
|
772
|
+
error = wandb_internal_pb2.ErrorInfo()
|
773
|
+
error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
|
774
|
+
error.message = "resume='must' but could not resume ({}) ".format(
|
775
|
+
run.run_id
|
776
|
+
)
|
777
|
+
return error
|
778
|
+
|
779
|
+
# TODO: Do we need to restore config / summary?
|
780
|
+
# System metrics runtime is usually greater than history
|
781
|
+
self._resume_state.runtime = max(events_rt, history_rt)
|
782
|
+
last_step = history.get("_step", 0)
|
783
|
+
history_line_count = resume_status["historyLineCount"]
|
784
|
+
self._resume_state.step = last_step + 1 if history_line_count > 0 else last_step
|
785
|
+
self._resume_state.history = history_line_count
|
786
|
+
self._resume_state.events = resume_status["eventsLineCount"]
|
787
|
+
self._resume_state.output = resume_status["logLineCount"]
|
788
|
+
self._resume_state.config = config
|
789
|
+
self._resume_state.summary = summary
|
790
|
+
self._resume_state.tags = tags
|
791
|
+
self._resume_state.resumed = True
|
792
|
+
logger.info("configured resuming with: {}".format(self._resume_state))
|
793
|
+
return None
|
794
|
+
|
795
|
+
def _telemetry_get_framework(self) -> str:
|
796
|
+
"""Get telemetry data for internal config structure."""
|
797
|
+
# detect framework by checking what is loaded
|
798
|
+
imports: telemetry.TelemetryImports
|
799
|
+
if self._telemetry_obj.HasField("imports_finish"):
|
800
|
+
imports = self._telemetry_obj.imports_finish
|
801
|
+
elif self._telemetry_obj.HasField("imports_init"):
|
802
|
+
imports = self._telemetry_obj.imports_init
|
803
|
+
else:
|
804
|
+
return ""
|
805
|
+
framework = next(
|
806
|
+
(n for f, n in _framework_priority() if getattr(imports, f, False)), ""
|
807
|
+
)
|
808
|
+
return framework
|
809
|
+
|
810
|
+
def _config_backend_dict(self) -> sender_config.BackendConfigDict:
|
811
|
+
config = self._consolidated_config or sender_config.ConfigState()
|
812
|
+
|
813
|
+
return config.to_backend_dict(
|
814
|
+
telemetry_record=self._telemetry_obj,
|
815
|
+
framework=self._telemetry_get_framework(),
|
816
|
+
start_time_millis=self._start_time,
|
817
|
+
metric_pbdicts=self._config_metric_pbdict_list,
|
818
|
+
)
|
819
|
+
|
820
|
+
def _config_save(
|
821
|
+
self,
|
822
|
+
config_value_dict: sender_config.BackendConfigDict,
|
823
|
+
) -> None:
|
824
|
+
config_path = os.path.join(self._settings.files_dir, "config.yaml")
|
825
|
+
config_util.save_config_file_from_dict(config_path, config_value_dict)
|
826
|
+
|
827
|
+
def _sync_spell(self) -> None:
|
828
|
+
"""Sync this run with spell."""
|
829
|
+
if not self._run:
|
830
|
+
return
|
831
|
+
try:
|
832
|
+
env = os.environ
|
833
|
+
self._interface.publish_config(
|
834
|
+
key=("_wandb", "spell_url"), val=env.get("SPELL_RUN_URL")
|
835
|
+
)
|
836
|
+
url = "{}/{}/{}/runs/{}".format(
|
837
|
+
self._api.app_url, self._run.entity, self._run.project, self._run.run_id
|
838
|
+
)
|
839
|
+
requests.put(
|
840
|
+
env.get("SPELL_API_URL", "https://api.spell.run") + "/wandb_url",
|
841
|
+
json={"access_token": env.get("WANDB_ACCESS_TOKEN"), "url": url},
|
842
|
+
timeout=2,
|
843
|
+
)
|
844
|
+
except requests.RequestException:
|
845
|
+
pass
|
846
|
+
# TODO: do something if sync spell is not successful?
|
847
|
+
|
848
|
+
def _setup_fork(self, server_run: dict):
|
849
|
+
assert self._settings.fork_from
|
850
|
+
assert self._settings.fork_from.metric == "_step"
|
851
|
+
assert self._run
|
852
|
+
first_step = int(self._settings.fork_from.value) + 1
|
853
|
+
self._resume_state.step = first_step
|
854
|
+
self._resume_state.history = server_run.get("historyLineCount", 0)
|
855
|
+
self._run.forked = True
|
856
|
+
self._run.starting_step = first_step
|
857
|
+
|
858
|
+
def _load_rewind_state(self, run: "RunRecord"):
|
859
|
+
assert self._settings.resume_from
|
860
|
+
self._rewind_response = self._api.rewind_run(
|
861
|
+
run_name=run.run_id,
|
862
|
+
entity=run.entity or None,
|
863
|
+
project=run.project or None,
|
864
|
+
metric_name=self._settings.resume_from.metric,
|
865
|
+
metric_value=self._settings.resume_from.value,
|
866
|
+
program_path=self._settings.program or None,
|
867
|
+
)
|
868
|
+
self._resume_state.history = self._rewind_response.get("historyLineCount", 0)
|
869
|
+
self._resume_state.config = json.loads(
|
870
|
+
self._rewind_response.get("config", "{}")
|
871
|
+
)
|
872
|
+
|
873
|
+
def _install_rewind_state(self):
|
874
|
+
assert self._settings.resume_from
|
875
|
+
assert self._settings.resume_from.metric == "_step"
|
876
|
+
assert self._run
|
877
|
+
assert self._rewind_response
|
878
|
+
|
879
|
+
first_step = int(self._settings.resume_from.value) + 1
|
880
|
+
self._resume_state.step = first_step
|
881
|
+
|
882
|
+
# We set the fork flag here because rewind uses the forking
|
883
|
+
# infrastructure under the hood. Setting `forked` here
|
884
|
+
# ensures that run._step is properly set in the user process.
|
885
|
+
self._run.forked = True
|
886
|
+
self._run.starting_step = first_step
|
887
|
+
|
888
|
+
def _handle_error(
|
889
|
+
self,
|
890
|
+
record: "Record",
|
891
|
+
error: "wandb_internal_pb2.ErrorInfo",
|
892
|
+
run: "RunRecord",
|
893
|
+
) -> None:
|
894
|
+
if record.control.req_resp or record.control.mailbox_slot:
|
895
|
+
result = proto_util._result_from_record(record)
|
896
|
+
result.run_result.run.CopyFrom(run)
|
897
|
+
result.run_result.error.CopyFrom(error)
|
898
|
+
self._respond_result(result)
|
899
|
+
else:
|
900
|
+
logger.error("Got error in async mode: %s", error.message)
|
901
|
+
|
902
|
+
def send_run(self, record: "Record", file_dir: Optional[str] = None) -> None:
|
903
|
+
run = record.run
|
904
|
+
error = None
|
905
|
+
is_wandb_init = self._run is None
|
906
|
+
|
907
|
+
# save start time of a run
|
908
|
+
self._start_time = int(run.start_time.ToMicroseconds() // 1e6)
|
909
|
+
|
910
|
+
# update telemetry
|
911
|
+
if run.telemetry:
|
912
|
+
self._telemetry_obj.MergeFrom(run.telemetry)
|
913
|
+
if self._settings.x_sync:
|
914
|
+
self._telemetry_obj.feature.sync = True
|
915
|
+
|
916
|
+
# build config dict
|
917
|
+
config_value_dict: Optional[sender_config.BackendConfigDict] = None
|
918
|
+
if run.config:
|
919
|
+
self._consolidated_config.update_from_proto(run.config)
|
920
|
+
config_value_dict = self._config_backend_dict()
|
921
|
+
self._config_save(config_value_dict)
|
922
|
+
|
923
|
+
do_fork = self._settings.fork_from is not None and is_wandb_init
|
924
|
+
do_rewind = self._settings.resume_from is not None and is_wandb_init
|
925
|
+
do_resume = bool(self._settings.resume)
|
926
|
+
|
927
|
+
num_resume_options_set = sum([do_fork, do_rewind, do_resume])
|
928
|
+
if num_resume_options_set > 1:
|
929
|
+
error = wandb_internal_pb2.ErrorInfo()
|
930
|
+
error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
|
931
|
+
error.message = (
|
932
|
+
"Multiple resume options specified. "
|
933
|
+
"Please specify only one of `fork_from`, `resume`, or `resume_from`."
|
934
|
+
)
|
935
|
+
self._handle_error(record, error, run)
|
936
|
+
|
937
|
+
if is_wandb_init:
|
938
|
+
# Ensure we have a project to query for status
|
939
|
+
if run.project == "":
|
940
|
+
run.project = util.auto_project_name(self._settings.program)
|
941
|
+
# Only check resume status on `wandb.init`
|
942
|
+
|
943
|
+
if do_resume:
|
944
|
+
error = self._setup_resume(run)
|
945
|
+
|
946
|
+
elif do_rewind:
|
947
|
+
error = self._load_rewind_state(run)
|
948
|
+
|
949
|
+
if error is not None:
|
950
|
+
self._handle_error(record, error, run)
|
951
|
+
return
|
952
|
+
|
953
|
+
# Save the resumed config
|
954
|
+
if self._resume_state.config is not None:
|
955
|
+
self._consolidated_config.merge_resumed_config(
|
956
|
+
config_util.dict_strip_value_dict(self._resume_state.config)
|
957
|
+
)
|
958
|
+
|
959
|
+
config_value_dict = self._config_backend_dict()
|
960
|
+
self._config_save(config_value_dict)
|
961
|
+
|
962
|
+
# handle empty config
|
963
|
+
# TODO(jhr): consolidate the 4 ways config is built:
|
964
|
+
# (passed config, empty config, resume config, send_config)
|
965
|
+
if not config_value_dict:
|
966
|
+
config_value_dict = self._config_backend_dict()
|
967
|
+
self._config_save(config_value_dict)
|
968
|
+
|
969
|
+
try:
|
970
|
+
server_run = self._init_run(run, config_value_dict)
|
971
|
+
except (CommError, UsageError) as e:
|
972
|
+
logger.error(e, exc_info=True)
|
973
|
+
error = ProtobufErrorHandler.from_exception(e)
|
974
|
+
self._handle_error(record, error, run)
|
975
|
+
return
|
976
|
+
|
977
|
+
assert self._run # self._run is configured in _init_run()
|
978
|
+
|
979
|
+
if do_fork:
|
980
|
+
error = self._setup_fork(server_run)
|
981
|
+
|
982
|
+
if error is not None:
|
983
|
+
self._handle_error(record, error, run)
|
984
|
+
return
|
985
|
+
|
986
|
+
if record.control.req_resp or record.control.mailbox_slot:
|
987
|
+
result = proto_util._result_from_record(record)
|
988
|
+
# TODO: we could do self._interface.publish_defer(resp) to notify
|
989
|
+
# the handler not to actually perform server updates for this uuid
|
990
|
+
# because the user process will send a summary update when we resume
|
991
|
+
result.run_result.run.CopyFrom(self._run)
|
992
|
+
self._respond_result(result)
|
993
|
+
|
994
|
+
# Only spin up our threads on the first run message
|
995
|
+
if is_wandb_init:
|
996
|
+
self._start_run_threads(file_dir)
|
997
|
+
else:
|
998
|
+
logger.info("updated run: %s", self._run.run_id)
|
999
|
+
|
1000
|
+
def _update_resume_state(self, is_rewinding: bool, inserted: bool):
|
1001
|
+
assert self._run
|
1002
|
+
if self._resume_state.resumed:
|
1003
|
+
self._run.resumed = True
|
1004
|
+
if self._resume_state.wandb_runtime is not None:
|
1005
|
+
self._run.runtime = self._resume_state.wandb_runtime
|
1006
|
+
elif is_rewinding:
|
1007
|
+
# because is_rewinding is mutually exclusive with self._resume_state.resumed,
|
1008
|
+
# this block will always execute if is_rewinding is set
|
1009
|
+
self._install_rewind_state()
|
1010
|
+
else:
|
1011
|
+
# If the user is not resuming, and we didn't insert on upsert_run then
|
1012
|
+
# it is likely that we are overwriting the run which we might want to
|
1013
|
+
# prevent in the future. This could be a false signal since an upsert_run
|
1014
|
+
# message which gets retried in the network could also show up as not
|
1015
|
+
# inserted.
|
1016
|
+
if not inserted:
|
1017
|
+
# no need to flush this, it will get updated eventually
|
1018
|
+
self._telemetry_obj.feature.maybe_run_overwrite = True
|
1019
|
+
|
1020
|
+
def _init_run(
|
1021
|
+
self,
|
1022
|
+
run: "RunRecord",
|
1023
|
+
config_dict: Optional[sender_config.BackendConfigDict],
|
1024
|
+
) -> dict:
|
1025
|
+
# We subtract the previous runs runtime when resuming
|
1026
|
+
start_time = (
|
1027
|
+
run.start_time.ToMicroseconds() / 1e6
|
1028
|
+
) - self._resume_state.runtime
|
1029
|
+
# TODO: we don't check inserted currently, ultimately we should make
|
1030
|
+
# the upsert know the resume state and fail transactionally
|
1031
|
+
|
1032
|
+
if self._resume_state and self._resume_state.tags and not run.tags:
|
1033
|
+
run.tags.extend(self._resume_state.tags)
|
1034
|
+
|
1035
|
+
is_rewinding = bool(self._settings.resume_from)
|
1036
|
+
if is_rewinding:
|
1037
|
+
assert self._rewind_response
|
1038
|
+
server_run = self._rewind_response
|
1039
|
+
server_messages = None
|
1040
|
+
inserted = True
|
1041
|
+
else:
|
1042
|
+
server_run, inserted, server_messages = self._api.upsert_run(
|
1043
|
+
name=run.run_id,
|
1044
|
+
entity=run.entity or None,
|
1045
|
+
project=run.project or None,
|
1046
|
+
group=run.run_group or None,
|
1047
|
+
job_type=run.job_type or None,
|
1048
|
+
display_name=run.display_name or None,
|
1049
|
+
notes=run.notes or None,
|
1050
|
+
tags=run.tags[:] or None,
|
1051
|
+
config=config_dict or None,
|
1052
|
+
sweep_name=run.sweep_id or None,
|
1053
|
+
host=run.host or None,
|
1054
|
+
program_path=self._settings.program or None,
|
1055
|
+
repo=run.git.remote_url or None,
|
1056
|
+
commit=run.git.commit or None,
|
1057
|
+
)
|
1058
|
+
|
1059
|
+
# TODO: we don't want to create jobs in sweeps, since the
|
1060
|
+
# executable doesn't appear to be consistent
|
1061
|
+
if run.sweep_id:
|
1062
|
+
self._job_builder.disable = True
|
1063
|
+
|
1064
|
+
self._server_messages = server_messages or []
|
1065
|
+
self._run = run
|
1066
|
+
|
1067
|
+
if self._resume_state.resumed and is_rewinding:
|
1068
|
+
# this should not ever be possible to hit, since we check for
|
1069
|
+
# resumption above and raise an error if resumption is specified
|
1070
|
+
# twice.
|
1071
|
+
raise ValueError(
|
1072
|
+
"Cannot attempt to rewind and resume a run - only one of "
|
1073
|
+
"`resume` or `resume_from` can be specified."
|
1074
|
+
)
|
1075
|
+
|
1076
|
+
self._update_resume_state(is_rewinding, inserted)
|
1077
|
+
self._run.starting_step = self._resume_state.step
|
1078
|
+
self._run.start_time.FromMicroseconds(int(start_time * 1e6))
|
1079
|
+
self._run.config.CopyFrom(self._interface._make_config(config_dict))
|
1080
|
+
if self._resume_state.summary is not None:
|
1081
|
+
self._run.summary.CopyFrom(
|
1082
|
+
self._interface._make_summary_from_dict(self._resume_state.summary)
|
1083
|
+
)
|
1084
|
+
storage_id = server_run.get("id")
|
1085
|
+
if storage_id:
|
1086
|
+
self._run.storage_id = storage_id
|
1087
|
+
id = server_run.get("name")
|
1088
|
+
if id:
|
1089
|
+
self._api.set_current_run_id(id)
|
1090
|
+
display_name = server_run.get("displayName")
|
1091
|
+
if display_name:
|
1092
|
+
self._run.display_name = display_name
|
1093
|
+
project = server_run.get("project")
|
1094
|
+
# TODO: remove self._api.set_settings, and make self._project a property?
|
1095
|
+
if project:
|
1096
|
+
project_name = project.get("name")
|
1097
|
+
if project_name:
|
1098
|
+
self._run.project = project_name
|
1099
|
+
self._project = project_name
|
1100
|
+
self._api_settings["project"] = project_name
|
1101
|
+
self._api.set_setting("project", project_name)
|
1102
|
+
entity = project.get("entity")
|
1103
|
+
if entity:
|
1104
|
+
entity_name = entity.get("name")
|
1105
|
+
if entity_name:
|
1106
|
+
self._run.entity = entity_name
|
1107
|
+
self._entity = entity_name
|
1108
|
+
self._api_settings["entity"] = entity_name
|
1109
|
+
self._api.set_setting("entity", entity_name)
|
1110
|
+
sweep_id = server_run.get("sweepName")
|
1111
|
+
if sweep_id:
|
1112
|
+
self._run.sweep_id = sweep_id
|
1113
|
+
if os.getenv("SPELL_RUN_URL"):
|
1114
|
+
self._sync_spell()
|
1115
|
+
return server_run
|
1116
|
+
|
1117
|
+
def _start_run_threads(self, file_dir: Optional[str] = None) -> None:
|
1118
|
+
assert self._run # self._run is configured by caller
|
1119
|
+
self._fs = file_stream.FileStreamApi(
|
1120
|
+
self._api,
|
1121
|
+
self._run.run_id,
|
1122
|
+
self._run.start_time.ToMicroseconds() / 1e6,
|
1123
|
+
timeout=self._settings.x_file_stream_timeout_seconds or 0,
|
1124
|
+
settings=self._api_settings,
|
1125
|
+
)
|
1126
|
+
# Ensure the streaming polices have the proper offsets
|
1127
|
+
self._fs.set_file_policy("wandb-summary.json", file_stream.SummaryFilePolicy())
|
1128
|
+
self._fs.set_file_policy(
|
1129
|
+
"wandb-history.jsonl",
|
1130
|
+
file_stream.JsonlFilePolicy(start_chunk_id=self._resume_state.history),
|
1131
|
+
)
|
1132
|
+
self._fs.set_file_policy(
|
1133
|
+
"wandb-events.jsonl",
|
1134
|
+
file_stream.JsonlFilePolicy(start_chunk_id=self._resume_state.events),
|
1135
|
+
)
|
1136
|
+
self._fs.set_file_policy(
|
1137
|
+
"output.log",
|
1138
|
+
file_stream.CRDedupeFilePolicy(start_chunk_id=self._resume_state.output),
|
1139
|
+
)
|
1140
|
+
|
1141
|
+
# hack to merge run_settings and self._settings object together
|
1142
|
+
# so that fields like entity or project are available to be attached to Sentry events.
|
1143
|
+
run_settings = message_to_dict(self._run)
|
1144
|
+
_settings = dict(self._settings)
|
1145
|
+
_settings.update(run_settings)
|
1146
|
+
wandb._sentry.configure_scope(tags=_settings, process_context="internal")
|
1147
|
+
|
1148
|
+
self._fs.start()
|
1149
|
+
self._pusher = FilePusher(self._api, self._fs, settings=self._settings)
|
1150
|
+
self._dir_watcher = DirWatcher(self._settings, self._pusher, file_dir)
|
1151
|
+
logger.info(
|
1152
|
+
"run started: %s with start time %s",
|
1153
|
+
self._run.run_id,
|
1154
|
+
self._run.start_time.ToMicroseconds() / 1e6,
|
1155
|
+
)
|
1156
|
+
|
1157
|
+
def _save_history(self, history_dict: Dict[str, Any]) -> None:
|
1158
|
+
if self._fs:
|
1159
|
+
self._fs.push(filenames.HISTORY_FNAME, json.dumps(history_dict))
|
1160
|
+
|
1161
|
+
def send_history(self, record: "Record") -> None:
|
1162
|
+
history = record.history
|
1163
|
+
history_dict = proto_util.dict_from_proto_list(history.item)
|
1164
|
+
self._save_history(history_dict)
|
1165
|
+
|
1166
|
+
def _update_summary_record(self, summary: "SummaryRecord") -> None:
|
1167
|
+
summary_dict = proto_util.dict_from_proto_list(summary.update)
|
1168
|
+
self._cached_summary = summary_dict
|
1169
|
+
self._update_summary()
|
1170
|
+
|
1171
|
+
def send_summary(self, record: "Record") -> None:
|
1172
|
+
self._update_summary_record(record.summary)
|
1173
|
+
|
1174
|
+
def send_request_summary_record(self, record: "Record") -> None:
|
1175
|
+
self._update_summary_record(record.request.summary_record.summary)
|
1176
|
+
|
1177
|
+
def _update_summary(self) -> None:
|
1178
|
+
summary_dict = self._cached_summary.copy()
|
1179
|
+
summary_dict.pop("_wandb", None)
|
1180
|
+
if self._metadata_summary:
|
1181
|
+
summary_dict["_wandb"] = self._metadata_summary
|
1182
|
+
# merge with consolidated summary
|
1183
|
+
self._consolidated_summary.update(summary_dict)
|
1184
|
+
json_summary = json.dumps(self._consolidated_summary)
|
1185
|
+
if self._fs:
|
1186
|
+
self._fs.push(filenames.SUMMARY_FNAME, json_summary)
|
1187
|
+
# TODO(jhr): we should only write this at the end of the script
|
1188
|
+
summary_path = os.path.join(self._settings.files_dir, filenames.SUMMARY_FNAME)
|
1189
|
+
with open(summary_path, "w") as f:
|
1190
|
+
f.write(json_summary)
|
1191
|
+
self._save_file(interface.GlobStr(filenames.SUMMARY_FNAME))
|
1192
|
+
|
1193
|
+
def send_stats(self, record: "Record") -> None:
|
1194
|
+
stats = record.stats
|
1195
|
+
if stats.stats_type != wandb_internal_pb2.StatsRecord.StatsType.SYSTEM:
|
1196
|
+
return
|
1197
|
+
if not self._fs:
|
1198
|
+
return
|
1199
|
+
if not self._run:
|
1200
|
+
return
|
1201
|
+
now_us = stats.timestamp.ToMicroseconds()
|
1202
|
+
start_us = self._run.start_time.ToMicroseconds()
|
1203
|
+
d = dict()
|
1204
|
+
for item in stats.item:
|
1205
|
+
try:
|
1206
|
+
d[item.key] = json.loads(item.value_json)
|
1207
|
+
except json.JSONDecodeError:
|
1208
|
+
logger.error("error decoding stats json: %s", item.value_json)
|
1209
|
+
row: Dict[str, Any] = dict(system=d)
|
1210
|
+
self._flatten(row)
|
1211
|
+
row["_wandb"] = True
|
1212
|
+
row["_timestamp"] = now_us / 1e6
|
1213
|
+
row["_runtime"] = (now_us - start_us) / 1e6
|
1214
|
+
self._fs.push(filenames.EVENTS_FNAME, json.dumps(row))
|
1215
|
+
# TODO(jhr): check fs.push results?
|
1216
|
+
|
1217
|
+
def _output_raw_finish(self) -> None:
|
1218
|
+
for stream, output_raw in self._output_raw_streams.items():
|
1219
|
+
output_raw._stopped.set()
|
1220
|
+
|
1221
|
+
# shut down threads
|
1222
|
+
output_raw._writer_thr.join(timeout=5)
|
1223
|
+
if output_raw._writer_thr.is_alive():
|
1224
|
+
logger.info("processing output...")
|
1225
|
+
output_raw._writer_thr.join()
|
1226
|
+
output_raw._reader_thr.join()
|
1227
|
+
|
1228
|
+
# flush output buffers and files
|
1229
|
+
self._output_raw_flush(stream)
|
1230
|
+
self._output_raw_streams = {}
|
1231
|
+
if self._output_raw_file:
|
1232
|
+
self._output_raw_file.close()
|
1233
|
+
self._output_raw_file = None
|
1234
|
+
|
1235
|
+
def _output_raw_writer_thread(self, stream: "StreamLiterals") -> None:
|
1236
|
+
while True:
|
1237
|
+
output_raw = self._output_raw_streams[stream]
|
1238
|
+
if output_raw._queue.empty():
|
1239
|
+
if output_raw._stopped.is_set():
|
1240
|
+
return
|
1241
|
+
time.sleep(0.5)
|
1242
|
+
continue
|
1243
|
+
data = []
|
1244
|
+
while not output_raw._queue.empty():
|
1245
|
+
data.append(output_raw._queue.get())
|
1246
|
+
if output_raw._stopped.is_set() and sum(map(len, data)) > 100000:
|
1247
|
+
logger.warning("Terminal output too large. Logging without processing.")
|
1248
|
+
self._output_raw_flush(stream)
|
1249
|
+
for line in data:
|
1250
|
+
self._output_raw_flush(stream, line)
|
1251
|
+
# TODO: lets mark that this happened in telemetry
|
1252
|
+
return
|
1253
|
+
try:
|
1254
|
+
output_raw._emulator.write("".join(data))
|
1255
|
+
except Exception as e:
|
1256
|
+
logger.warning(f"problem writing to output_raw emulator: {e}")
|
1257
|
+
|
1258
|
+
def _output_raw_reader_thread(self, stream: "StreamLiterals") -> None:
|
1259
|
+
output_raw = self._output_raw_streams[stream]
|
1260
|
+
while not (output_raw._stopped.is_set() and output_raw._queue.empty()):
|
1261
|
+
self._output_raw_flush(stream)
|
1262
|
+
time.sleep(_OUTPUT_MIN_CALLBACK_INTERVAL)
|
1263
|
+
|
1264
|
+
def _output_raw_flush(
|
1265
|
+
self, stream: "StreamLiterals", data: Optional[str] = None
|
1266
|
+
) -> None:
|
1267
|
+
if data is None:
|
1268
|
+
output_raw = self._output_raw_streams[stream]
|
1269
|
+
try:
|
1270
|
+
data = output_raw._emulator.read()
|
1271
|
+
except Exception as e:
|
1272
|
+
logger.warning(f"problem reading from output_raw emulator: {e}")
|
1273
|
+
if data:
|
1274
|
+
self._send_output_line(stream, data)
|
1275
|
+
if self._output_raw_file:
|
1276
|
+
self._output_raw_file.write(data.encode("utf-8"))
|
1277
|
+
|
1278
|
+
def send_request_python_packages(self, record: "Record") -> None:
|
1279
|
+
import os
|
1280
|
+
|
1281
|
+
from wandb.sdk.lib.filenames import REQUIREMENTS_FNAME
|
1282
|
+
|
1283
|
+
installed_packages_list = sorted(
|
1284
|
+
f"{r.name}=={r.version}" for r in record.request.python_packages.package
|
1285
|
+
)
|
1286
|
+
with open(os.path.join(self._settings.files_dir, REQUIREMENTS_FNAME), "w") as f:
|
1287
|
+
f.write("\n".join(installed_packages_list))
|
1288
|
+
|
1289
|
+
def send_output(self, record: "Record") -> None:
|
1290
|
+
if not self._fs:
|
1291
|
+
return
|
1292
|
+
out = record.output
|
1293
|
+
stream: StreamLiterals = "stdout"
|
1294
|
+
if out.output_type == wandb_internal_pb2.OutputRecord.OutputType.STDERR:
|
1295
|
+
stream = "stderr"
|
1296
|
+
line = out.line
|
1297
|
+
self._send_output_line(stream, line)
|
1298
|
+
|
1299
|
+
def send_output_raw(self, record: "Record") -> None:
|
1300
|
+
if not self._fs:
|
1301
|
+
return
|
1302
|
+
out = record.output_raw
|
1303
|
+
stream: StreamLiterals = "stdout"
|
1304
|
+
if out.output_type == wandb_internal_pb2.OutputRawRecord.OutputType.STDERR:
|
1305
|
+
stream = "stderr"
|
1306
|
+
line = out.line
|
1307
|
+
|
1308
|
+
output_raw = self._output_raw_streams.get(stream)
|
1309
|
+
if not output_raw:
|
1310
|
+
output_raw = _OutputRawStream(stream=stream, sm=self)
|
1311
|
+
self._output_raw_streams[stream] = output_raw
|
1312
|
+
|
1313
|
+
# open the console output file shared between both streams
|
1314
|
+
if not self._output_raw_file:
|
1315
|
+
output_log_path = os.path.join(
|
1316
|
+
self._settings.files_dir, filenames.OUTPUT_FNAME
|
1317
|
+
)
|
1318
|
+
output_raw_file = None
|
1319
|
+
try:
|
1320
|
+
output_raw_file = filesystem.CRDedupedFile(
|
1321
|
+
open(output_log_path, "wb")
|
1322
|
+
)
|
1323
|
+
except OSError as e:
|
1324
|
+
logger.warning(f"could not open output_raw_file: {e}")
|
1325
|
+
if output_raw_file:
|
1326
|
+
self._output_raw_file = output_raw_file
|
1327
|
+
output_raw.start()
|
1328
|
+
|
1329
|
+
output_raw._queue.put(line)
|
1330
|
+
|
1331
|
+
def _send_output_line(self, stream: "StreamLiterals", line: str) -> None:
|
1332
|
+
"""Combined writer for raw and non raw output lines.
|
1333
|
+
|
1334
|
+
This is combined because they are both post emulator.
|
1335
|
+
"""
|
1336
|
+
prepend = ""
|
1337
|
+
if stream == "stderr":
|
1338
|
+
prepend = "ERROR "
|
1339
|
+
if not line.endswith("\n"):
|
1340
|
+
self._partial_output.setdefault(stream, "")
|
1341
|
+
if line.startswith("\r"):
|
1342
|
+
# TODO: maybe we shouldnt just drop this, what if there was some \ns in the partial
|
1343
|
+
# that should probably be the check instead of not line.endswith(\n")
|
1344
|
+
# logger.info(f"Dropping data {self._partial_output[stream]}")
|
1345
|
+
self._partial_output[stream] = ""
|
1346
|
+
self._partial_output[stream] += line
|
1347
|
+
# TODO(jhr): how do we make sure this gets flushed?
|
1348
|
+
# we might need this for other stuff like telemetry
|
1349
|
+
else:
|
1350
|
+
# TODO(jhr): use time from timestamp proto
|
1351
|
+
# TODO(jhr): do we need to make sure we write full lines?
|
1352
|
+
# seems to be some issues with line breaks
|
1353
|
+
cur_time = time.time()
|
1354
|
+
timestamp = datetime.utcfromtimestamp(cur_time).isoformat() + " "
|
1355
|
+
prev_str = self._partial_output.get(stream, "")
|
1356
|
+
line = f"{prepend}{timestamp}{prev_str}{line}"
|
1357
|
+
if self._fs:
|
1358
|
+
self._fs.push(filenames.OUTPUT_FNAME, line)
|
1359
|
+
self._partial_output[stream] = ""
|
1360
|
+
|
1361
|
+
def _update_config(self) -> None:
|
1362
|
+
self._config_needs_debounce = True
|
1363
|
+
|
1364
|
+
def send_config(self, record: "Record") -> None:
|
1365
|
+
self._consolidated_config.update_from_proto(record.config)
|
1366
|
+
self._update_config()
|
1367
|
+
|
1368
|
+
def send_metric(self, record: "Record") -> None:
|
1369
|
+
metric = record.metric
|
1370
|
+
if metric.glob_name:
|
1371
|
+
logger.warning("Seen metric with glob (shouldn't happen)")
|
1372
|
+
return
|
1373
|
+
|
1374
|
+
# merge or overwrite
|
1375
|
+
old_metric = self._config_metric_dict.get(
|
1376
|
+
metric.name, wandb_internal_pb2.MetricRecord()
|
1377
|
+
)
|
1378
|
+
if metric._control.overwrite:
|
1379
|
+
old_metric.CopyFrom(metric)
|
1380
|
+
else:
|
1381
|
+
old_metric.MergeFrom(metric)
|
1382
|
+
self._config_metric_dict[metric.name] = old_metric
|
1383
|
+
metric = old_metric
|
1384
|
+
|
1385
|
+
# convert step_metric to index
|
1386
|
+
if metric.step_metric:
|
1387
|
+
find_step_idx = self._config_metric_index_dict.get(metric.step_metric)
|
1388
|
+
if find_step_idx is not None:
|
1389
|
+
# make a copy of this metric as we will be modifying it
|
1390
|
+
rec = wandb_internal_pb2.Record()
|
1391
|
+
rec.metric.CopyFrom(metric)
|
1392
|
+
metric = rec.metric
|
1393
|
+
|
1394
|
+
metric.ClearField("step_metric")
|
1395
|
+
metric.step_metric_index = find_step_idx + 1
|
1396
|
+
|
1397
|
+
md: Dict[int, Any] = proto_util.proto_encode_to_dict(metric)
|
1398
|
+
find_idx = self._config_metric_index_dict.get(metric.name)
|
1399
|
+
if find_idx is not None:
|
1400
|
+
self._config_metric_pbdict_list[find_idx] = md
|
1401
|
+
else:
|
1402
|
+
next_idx = len(self._config_metric_pbdict_list)
|
1403
|
+
self._config_metric_pbdict_list.append(md)
|
1404
|
+
self._config_metric_index_dict[metric.name] = next_idx
|
1405
|
+
self._update_config()
|
1406
|
+
|
1407
|
+
def _update_telemetry_record(self, telemetry: telemetry.TelemetryRecord) -> None:
|
1408
|
+
self._telemetry_obj.MergeFrom(telemetry)
|
1409
|
+
self._update_config()
|
1410
|
+
|
1411
|
+
def send_telemetry(self, record: "Record") -> None:
|
1412
|
+
self._update_telemetry_record(record.telemetry)
|
1413
|
+
|
1414
|
+
def send_request_telemetry_record(self, record: "Record") -> None:
|
1415
|
+
self._update_telemetry_record(record.request.telemetry_record.telemetry)
|
1416
|
+
|
1417
|
+
def _save_file(
|
1418
|
+
self, fname: interface.GlobStr, policy: "interface.PolicyName" = "end"
|
1419
|
+
) -> None:
|
1420
|
+
logger.info("saving file %s with policy %s", fname, policy)
|
1421
|
+
if self._dir_watcher:
|
1422
|
+
self._dir_watcher.update_policy(fname, policy)
|
1423
|
+
|
1424
|
+
def send_files(self, record: "Record") -> None:
|
1425
|
+
files = record.files
|
1426
|
+
for k in files.files:
|
1427
|
+
# TODO(jhr): fix paths with directories
|
1428
|
+
self._save_file(
|
1429
|
+
interface.GlobStr(k.path), interface.file_enum_to_policy(k.policy)
|
1430
|
+
)
|
1431
|
+
|
1432
|
+
def send_header(self, record: "Record") -> None:
|
1433
|
+
pass
|
1434
|
+
|
1435
|
+
def send_footer(self, record: "Record") -> None:
|
1436
|
+
pass
|
1437
|
+
|
1438
|
+
def send_tbrecord(self, record: "Record") -> None:
|
1439
|
+
# tbrecord watching threads are handled by handler.py
|
1440
|
+
pass
|
1441
|
+
|
1442
|
+
def send_request_link_artifact(self, record: "Record") -> None:
|
1443
|
+
if not (record.control.req_resp or record.control.mailbox_slot):
|
1444
|
+
raise ValueError(
|
1445
|
+
f"Expected either `req_resp` or `mailbox_slot`, got: {record.control!r}"
|
1446
|
+
)
|
1447
|
+
result = proto_util._result_from_record(record)
|
1448
|
+
link = record.request.link_artifact
|
1449
|
+
client_id = link.client_id
|
1450
|
+
server_id = link.server_id
|
1451
|
+
portfolio_name = link.portfolio_name
|
1452
|
+
entity = link.portfolio_entity
|
1453
|
+
project = link.portfolio_project
|
1454
|
+
aliases = link.portfolio_aliases
|
1455
|
+
organization = link.portfolio_organization
|
1456
|
+
logger.debug(
|
1457
|
+
f"link_artifact params - client_id={client_id}, server_id={server_id}, "
|
1458
|
+
f"portfolio_name={portfolio_name}, entity={entity}, project={project}, "
|
1459
|
+
f"organization={organization}"
|
1460
|
+
)
|
1461
|
+
if (client_id or server_id) and portfolio_name and entity and project:
|
1462
|
+
try:
|
1463
|
+
self._api.link_artifact(
|
1464
|
+
client_id,
|
1465
|
+
server_id,
|
1466
|
+
portfolio_name,
|
1467
|
+
entity,
|
1468
|
+
project,
|
1469
|
+
aliases,
|
1470
|
+
organization,
|
1471
|
+
)
|
1472
|
+
except Exception as e:
|
1473
|
+
org_or_entity = organization or entity
|
1474
|
+
result.response.log_artifact_response.error_message = (
|
1475
|
+
f"error linking artifact to "
|
1476
|
+
f'"{org_or_entity}/{project}/{portfolio_name}"; error: {e}'
|
1477
|
+
)
|
1478
|
+
logger.warning("Failed to link artifact to portfolio: %s", e)
|
1479
|
+
self._respond_result(result)
|
1480
|
+
|
1481
|
+
def send_use_artifact(self, record: "Record") -> None:
|
1482
|
+
"""Pretend to send a used artifact.
|
1483
|
+
|
1484
|
+
This function doesn't actually send anything, it is just used internally.
|
1485
|
+
"""
|
1486
|
+
use = record.use_artifact
|
1487
|
+
|
1488
|
+
if use.type == "job" and not use.partial.job_name:
|
1489
|
+
self._job_builder.disable = True
|
1490
|
+
elif use.partial.job_name:
|
1491
|
+
# job is partial, let job builder rebuild job, set job source dict
|
1492
|
+
self._job_builder.set_partial_source_id(use.id)
|
1493
|
+
|
1494
|
+
def send_request_log_artifact(self, record: "Record") -> None:
|
1495
|
+
assert record.control.req_resp
|
1496
|
+
result = proto_util._result_from_record(record)
|
1497
|
+
artifact = record.request.log_artifact.artifact
|
1498
|
+
history_step = record.request.log_artifact.history_step
|
1499
|
+
|
1500
|
+
try:
|
1501
|
+
res = self._send_artifact(artifact, history_step)
|
1502
|
+
assert res, "Unable to send artifact"
|
1503
|
+
result.response.log_artifact_response.artifact_id = res["id"]
|
1504
|
+
logger.info(f"logged artifact {artifact.name} - {res}")
|
1505
|
+
except Exception as e:
|
1506
|
+
result.response.log_artifact_response.error_message = (
|
1507
|
+
f'error logging artifact "{artifact.type}/{artifact.name}": {e}'
|
1508
|
+
)
|
1509
|
+
|
1510
|
+
self._respond_result(result)
|
1511
|
+
|
1512
|
+
def send_artifact(self, record: "Record") -> None:
|
1513
|
+
artifact = record.artifact
|
1514
|
+
try:
|
1515
|
+
res = self._send_artifact(artifact)
|
1516
|
+
logger.info(f"sent artifact {artifact.name} - {res}")
|
1517
|
+
except Exception as e:
|
1518
|
+
logger.error(
|
1519
|
+
'send_artifact: failed for artifact "{}/{}": {}'.format(
|
1520
|
+
artifact.type, artifact.name, e
|
1521
|
+
)
|
1522
|
+
)
|
1523
|
+
|
1524
|
+
def _send_artifact(
|
1525
|
+
self, artifact: "ArtifactRecord", history_step: Optional[int] = None
|
1526
|
+
) -> Optional[Dict]:
|
1527
|
+
from wandb.util import parse_version
|
1528
|
+
|
1529
|
+
assert self._pusher
|
1530
|
+
saver = ArtifactSaver(
|
1531
|
+
api=self._api,
|
1532
|
+
digest=artifact.digest,
|
1533
|
+
manifest_json=_manifest_json_from_proto(artifact.manifest),
|
1534
|
+
file_pusher=self._pusher,
|
1535
|
+
is_user_created=artifact.user_created,
|
1536
|
+
)
|
1537
|
+
|
1538
|
+
if artifact.distributed_id:
|
1539
|
+
max_cli_version = self._max_cli_version()
|
1540
|
+
if max_cli_version is None or parse_version(
|
1541
|
+
max_cli_version
|
1542
|
+
) < parse_version("0.10.16"):
|
1543
|
+
logger.warning(
|
1544
|
+
"This W&B Server doesn't support distributed artifacts, "
|
1545
|
+
"have your administrator install wandb/local >= 0.9.37"
|
1546
|
+
)
|
1547
|
+
return None
|
1548
|
+
|
1549
|
+
metadata = json.loads(artifact.metadata) if artifact.metadata else None
|
1550
|
+
res = saver.save(
|
1551
|
+
type=artifact.type,
|
1552
|
+
name=artifact.name,
|
1553
|
+
client_id=artifact.client_id,
|
1554
|
+
sequence_client_id=artifact.sequence_client_id,
|
1555
|
+
metadata=metadata,
|
1556
|
+
ttl_duration_seconds=artifact.ttl_duration_seconds or None,
|
1557
|
+
description=artifact.description or None,
|
1558
|
+
aliases=artifact.aliases,
|
1559
|
+
tags=artifact.tags,
|
1560
|
+
use_after_commit=artifact.use_after_commit,
|
1561
|
+
distributed_id=artifact.distributed_id,
|
1562
|
+
finalize=artifact.finalize,
|
1563
|
+
incremental=artifact.incremental_beta1,
|
1564
|
+
history_step=history_step,
|
1565
|
+
base_id=artifact.base_id or None,
|
1566
|
+
)
|
1567
|
+
|
1568
|
+
self._job_builder._handle_server_artifact(res, artifact)
|
1569
|
+
|
1570
|
+
if artifact.manifest.manifest_file_path:
|
1571
|
+
with contextlib.suppress(FileNotFoundError):
|
1572
|
+
os.remove(artifact.manifest.manifest_file_path)
|
1573
|
+
return res
|
1574
|
+
|
1575
|
+
def send_alert(self, record: "Record") -> None:
|
1576
|
+
from wandb.util import parse_version
|
1577
|
+
|
1578
|
+
alert = record.alert
|
1579
|
+
max_cli_version = self._max_cli_version()
|
1580
|
+
if max_cli_version is None or parse_version(max_cli_version) < parse_version(
|
1581
|
+
"0.10.9"
|
1582
|
+
):
|
1583
|
+
logger.warning(
|
1584
|
+
"This W&B server doesn't support alerts, "
|
1585
|
+
"have your administrator install wandb/local >= 0.9.31"
|
1586
|
+
)
|
1587
|
+
else:
|
1588
|
+
try:
|
1589
|
+
self._api.notify_scriptable_run_alert(
|
1590
|
+
title=alert.title,
|
1591
|
+
text=alert.text,
|
1592
|
+
level=alert.level,
|
1593
|
+
wait_duration=alert.wait_duration,
|
1594
|
+
)
|
1595
|
+
except Exception as e:
|
1596
|
+
logger.error(f"send_alert: failed for alert {alert.title!r}: {e}")
|
1597
|
+
|
1598
|
+
def finish(self) -> None:
|
1599
|
+
logger.info("shutting down sender")
|
1600
|
+
# if self._tb_watcher:
|
1601
|
+
# self._tb_watcher.finish()
|
1602
|
+
self._output_raw_finish()
|
1603
|
+
if self._dir_watcher:
|
1604
|
+
self._dir_watcher.finish()
|
1605
|
+
self._dir_watcher = None
|
1606
|
+
if self._pusher:
|
1607
|
+
self._pusher.finish()
|
1608
|
+
self._pusher.join()
|
1609
|
+
self._pusher = None
|
1610
|
+
if self._fs:
|
1611
|
+
self._fs.finish(self._exit_code)
|
1612
|
+
self._fs = None
|
1613
|
+
wandb._sentry.end_session()
|
1614
|
+
|
1615
|
+
def _max_cli_version(self) -> Optional[str]:
|
1616
|
+
server_info = self.get_server_info()
|
1617
|
+
max_cli_version = server_info.get("cliVersionInfo", {}).get(
|
1618
|
+
"max_cli_version", None
|
1619
|
+
)
|
1620
|
+
if not isinstance(max_cli_version, str):
|
1621
|
+
return None
|
1622
|
+
return max_cli_version
|
1623
|
+
|
1624
|
+
def get_viewer_server_info(self) -> None:
|
1625
|
+
if self._cached_server_info and self._cached_viewer:
|
1626
|
+
return
|
1627
|
+
self._cached_viewer, self._cached_server_info = self._api.viewer_server_info()
|
1628
|
+
|
1629
|
+
def get_viewer_info(self) -> Dict[str, Any]:
|
1630
|
+
if not self._cached_viewer:
|
1631
|
+
self.get_viewer_server_info()
|
1632
|
+
return self._cached_viewer
|
1633
|
+
|
1634
|
+
def get_server_info(self) -> Dict[str, Any]:
|
1635
|
+
if not self._cached_server_info:
|
1636
|
+
self.get_viewer_server_info()
|
1637
|
+
return self._cached_server_info
|
1638
|
+
|
1639
|
+
def get_local_info(self) -> "LocalInfo":
|
1640
|
+
"""Queries the server to get the local version information.
|
1641
|
+
|
1642
|
+
First, we perform an introspection, if it returns empty we deduce that the
|
1643
|
+
docker image is out-of-date. Otherwise, we use the returned values to deduce the
|
1644
|
+
state of the local server.
|
1645
|
+
"""
|
1646
|
+
local_info = wandb_internal_pb2.LocalInfo()
|
1647
|
+
if self._settings._offline:
|
1648
|
+
local_info.out_of_date = False
|
1649
|
+
return local_info
|
1650
|
+
|
1651
|
+
latest_local_version = "latest"
|
1652
|
+
|
1653
|
+
# Assuming the query is successful if the result is empty it indicates that
|
1654
|
+
# the backend is out of date since it doesn't have the desired field
|
1655
|
+
server_info = self.get_server_info()
|
1656
|
+
latest_local_version_info = server_info.get("latestLocalVersionInfo", {})
|
1657
|
+
if latest_local_version_info is None:
|
1658
|
+
local_info.out_of_date = False
|
1659
|
+
else:
|
1660
|
+
local_info.out_of_date = latest_local_version_info.get("outOfDate", True)
|
1661
|
+
local_info.version = latest_local_version_info.get(
|
1662
|
+
"latestVersionString", latest_local_version
|
1663
|
+
)
|
1664
|
+
return local_info
|
1665
|
+
|
1666
|
+
def _flush_job(self) -> None:
|
1667
|
+
if self._job_builder.disable or self._settings._offline:
|
1668
|
+
return
|
1669
|
+
self._job_builder.set_config(self._consolidated_config.non_internal_config())
|
1670
|
+
summary_dict = self._cached_summary.copy()
|
1671
|
+
summary_dict.pop("_wandb", None)
|
1672
|
+
self._job_builder.set_summary(summary_dict)
|
1673
|
+
|
1674
|
+
artifact = self._job_builder.build(api=self._api)
|
1675
|
+
if artifact is not None and self._run is not None:
|
1676
|
+
proto_artifact = self._interface._make_artifact(artifact)
|
1677
|
+
proto_artifact.run_id = self._run.run_id
|
1678
|
+
proto_artifact.project = self._run.project
|
1679
|
+
proto_artifact.entity = self._run.entity
|
1680
|
+
# TODO: this should be removed when the latest tag is handled
|
1681
|
+
# by the backend (WB-12116)
|
1682
|
+
proto_artifact.aliases.append("latest")
|
1683
|
+
# add docker image tag
|
1684
|
+
for alias in self._job_builder._aliases:
|
1685
|
+
proto_artifact.aliases.append(alias)
|
1686
|
+
|
1687
|
+
proto_artifact.user_created = True
|
1688
|
+
proto_artifact.use_after_commit = True
|
1689
|
+
proto_artifact.finalize = True
|
1690
|
+
|
1691
|
+
self._interface._publish_artifact(proto_artifact)
|
1692
|
+
|
1693
|
+
def __next__(self) -> "Record":
|
1694
|
+
return self._record_q.get(block=True)
|
1695
|
+
|
1696
|
+
next = __next__
|