wandb 0.18.2__py3-none-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (827) hide show
  1. package_readme.md +89 -0
  2. wandb/__init__.py +245 -0
  3. wandb/__init__.pyi +1139 -0
  4. wandb/__main__.py +3 -0
  5. wandb/_globals.py +19 -0
  6. wandb/agents/__init__.py +0 -0
  7. wandb/agents/pyagent.py +363 -0
  8. wandb/analytics/__init__.py +3 -0
  9. wandb/analytics/sentry.py +266 -0
  10. wandb/apis/__init__.py +48 -0
  11. wandb/apis/attrs.py +40 -0
  12. wandb/apis/importers/__init__.py +1 -0
  13. wandb/apis/importers/internals/internal.py +385 -0
  14. wandb/apis/importers/internals/protocols.py +99 -0
  15. wandb/apis/importers/internals/util.py +78 -0
  16. wandb/apis/importers/mlflow.py +254 -0
  17. wandb/apis/importers/validation.py +108 -0
  18. wandb/apis/importers/wandb.py +1603 -0
  19. wandb/apis/internal.py +232 -0
  20. wandb/apis/normalize.py +89 -0
  21. wandb/apis/paginator.py +81 -0
  22. wandb/apis/public/__init__.py +34 -0
  23. wandb/apis/public/api.py +1305 -0
  24. wandb/apis/public/artifacts.py +1090 -0
  25. wandb/apis/public/const.py +4 -0
  26. wandb/apis/public/files.py +195 -0
  27. wandb/apis/public/history.py +149 -0
  28. wandb/apis/public/jobs.py +659 -0
  29. wandb/apis/public/projects.py +154 -0
  30. wandb/apis/public/query_generator.py +166 -0
  31. wandb/apis/public/reports.py +469 -0
  32. wandb/apis/public/runs.py +914 -0
  33. wandb/apis/public/sweeps.py +240 -0
  34. wandb/apis/public/teams.py +198 -0
  35. wandb/apis/public/users.py +136 -0
  36. wandb/apis/reports/__init__.py +1 -0
  37. wandb/apis/reports/v1/__init__.py +8 -0
  38. wandb/apis/reports/v2/__init__.py +8 -0
  39. wandb/apis/workspaces/__init__.py +8 -0
  40. wandb/beta/workflows.py +288 -0
  41. wandb/bin/nvidia_gpu_stats +0 -0
  42. wandb/bin/wandb-core +0 -0
  43. wandb/cli/__init__.py +0 -0
  44. wandb/cli/cli.py +3004 -0
  45. wandb/data_types.py +63 -0
  46. wandb/docker/__init__.py +342 -0
  47. wandb/docker/auth.py +436 -0
  48. wandb/docker/wandb-entrypoint.sh +33 -0
  49. wandb/docker/www_authenticate.py +94 -0
  50. wandb/env.py +514 -0
  51. wandb/errors/__init__.py +17 -0
  52. wandb/errors/errors.py +37 -0
  53. wandb/errors/term.py +103 -0
  54. wandb/errors/util.py +57 -0
  55. wandb/errors/warnings.py +2 -0
  56. wandb/filesync/__init__.py +0 -0
  57. wandb/filesync/dir_watcher.py +403 -0
  58. wandb/filesync/stats.py +100 -0
  59. wandb/filesync/step_checksum.py +142 -0
  60. wandb/filesync/step_prepare.py +179 -0
  61. wandb/filesync/step_upload.py +290 -0
  62. wandb/filesync/upload_job.py +142 -0
  63. wandb/integration/__init__.py +0 -0
  64. wandb/integration/catboost/__init__.py +5 -0
  65. wandb/integration/catboost/catboost.py +178 -0
  66. wandb/integration/cohere/__init__.py +3 -0
  67. wandb/integration/cohere/cohere.py +21 -0
  68. wandb/integration/cohere/resolver.py +347 -0
  69. wandb/integration/diffusers/__init__.py +3 -0
  70. wandb/integration/diffusers/autologger.py +76 -0
  71. wandb/integration/diffusers/pipeline_resolver.py +50 -0
  72. wandb/integration/diffusers/resolvers/__init__.py +9 -0
  73. wandb/integration/diffusers/resolvers/multimodal.py +882 -0
  74. wandb/integration/diffusers/resolvers/utils.py +102 -0
  75. wandb/integration/fastai/__init__.py +249 -0
  76. wandb/integration/gym/__init__.py +105 -0
  77. wandb/integration/huggingface/__init__.py +3 -0
  78. wandb/integration/huggingface/huggingface.py +18 -0
  79. wandb/integration/huggingface/resolver.py +213 -0
  80. wandb/integration/keras/__init__.py +11 -0
  81. wandb/integration/keras/callbacks/__init__.py +5 -0
  82. wandb/integration/keras/callbacks/metrics_logger.py +136 -0
  83. wandb/integration/keras/callbacks/model_checkpoint.py +195 -0
  84. wandb/integration/keras/callbacks/tables_builder.py +226 -0
  85. wandb/integration/keras/keras.py +1091 -0
  86. wandb/integration/kfp/__init__.py +6 -0
  87. wandb/integration/kfp/helpers.py +28 -0
  88. wandb/integration/kfp/kfp_patch.py +324 -0
  89. wandb/integration/kfp/wandb_logging.py +182 -0
  90. wandb/integration/langchain/__init__.py +3 -0
  91. wandb/integration/langchain/wandb_tracer.py +48 -0
  92. wandb/integration/lightgbm/__init__.py +239 -0
  93. wandb/integration/lightning/__init__.py +0 -0
  94. wandb/integration/lightning/fabric/__init__.py +3 -0
  95. wandb/integration/lightning/fabric/logger.py +762 -0
  96. wandb/integration/magic.py +556 -0
  97. wandb/integration/metaflow/__init__.py +3 -0
  98. wandb/integration/metaflow/metaflow.py +383 -0
  99. wandb/integration/openai/__init__.py +3 -0
  100. wandb/integration/openai/fine_tuning.py +480 -0
  101. wandb/integration/openai/openai.py +22 -0
  102. wandb/integration/openai/resolver.py +240 -0
  103. wandb/integration/prodigy/__init__.py +3 -0
  104. wandb/integration/prodigy/prodigy.py +299 -0
  105. wandb/integration/sacred/__init__.py +117 -0
  106. wandb/integration/sagemaker/__init__.py +12 -0
  107. wandb/integration/sagemaker/auth.py +28 -0
  108. wandb/integration/sagemaker/config.py +49 -0
  109. wandb/integration/sagemaker/files.py +3 -0
  110. wandb/integration/sagemaker/resources.py +34 -0
  111. wandb/integration/sb3/__init__.py +3 -0
  112. wandb/integration/sb3/sb3.py +153 -0
  113. wandb/integration/sklearn/__init__.py +37 -0
  114. wandb/integration/sklearn/calculate/__init__.py +32 -0
  115. wandb/integration/sklearn/calculate/calibration_curves.py +125 -0
  116. wandb/integration/sklearn/calculate/class_proportions.py +68 -0
  117. wandb/integration/sklearn/calculate/confusion_matrix.py +93 -0
  118. wandb/integration/sklearn/calculate/decision_boundaries.py +40 -0
  119. wandb/integration/sklearn/calculate/elbow_curve.py +55 -0
  120. wandb/integration/sklearn/calculate/feature_importances.py +67 -0
  121. wandb/integration/sklearn/calculate/learning_curve.py +64 -0
  122. wandb/integration/sklearn/calculate/outlier_candidates.py +69 -0
  123. wandb/integration/sklearn/calculate/residuals.py +86 -0
  124. wandb/integration/sklearn/calculate/silhouette.py +118 -0
  125. wandb/integration/sklearn/calculate/summary_metrics.py +62 -0
  126. wandb/integration/sklearn/plot/__init__.py +35 -0
  127. wandb/integration/sklearn/plot/classifier.py +329 -0
  128. wandb/integration/sklearn/plot/clusterer.py +146 -0
  129. wandb/integration/sklearn/plot/regressor.py +121 -0
  130. wandb/integration/sklearn/plot/shared.py +91 -0
  131. wandb/integration/sklearn/utils.py +183 -0
  132. wandb/integration/tensorboard/__init__.py +10 -0
  133. wandb/integration/tensorboard/log.py +355 -0
  134. wandb/integration/tensorboard/monkeypatch.py +185 -0
  135. wandb/integration/tensorflow/__init__.py +5 -0
  136. wandb/integration/tensorflow/estimator_hook.py +54 -0
  137. wandb/integration/torch/__init__.py +0 -0
  138. wandb/integration/torch/wandb_torch.py +554 -0
  139. wandb/integration/ultralytics/__init__.py +11 -0
  140. wandb/integration/ultralytics/bbox_utils.py +208 -0
  141. wandb/integration/ultralytics/callback.py +524 -0
  142. wandb/integration/ultralytics/classification_utils.py +83 -0
  143. wandb/integration/ultralytics/mask_utils.py +202 -0
  144. wandb/integration/ultralytics/pose_utils.py +103 -0
  145. wandb/integration/xgboost/__init__.py +11 -0
  146. wandb/integration/xgboost/xgboost.py +189 -0
  147. wandb/integration/yolov8/__init__.py +0 -0
  148. wandb/integration/yolov8/yolov8.py +284 -0
  149. wandb/jupyter.py +515 -0
  150. wandb/magic.py +3 -0
  151. wandb/mpmain/__init__.py +0 -0
  152. wandb/mpmain/__main__.py +1 -0
  153. wandb/old/__init__.py +0 -0
  154. wandb/old/core.py +53 -0
  155. wandb/old/settings.py +173 -0
  156. wandb/old/summary.py +440 -0
  157. wandb/plot/__init__.py +19 -0
  158. wandb/plot/bar.py +45 -0
  159. wandb/plot/confusion_matrix.py +100 -0
  160. wandb/plot/histogram.py +39 -0
  161. wandb/plot/line.py +43 -0
  162. wandb/plot/line_series.py +88 -0
  163. wandb/plot/pr_curve.py +136 -0
  164. wandb/plot/roc_curve.py +118 -0
  165. wandb/plot/scatter.py +32 -0
  166. wandb/plot/utils.py +183 -0
  167. wandb/plot/viz.py +123 -0
  168. wandb/proto/__init__.py +0 -0
  169. wandb/proto/v3/__init__.py +0 -0
  170. wandb/proto/v3/wandb_base_pb2.py +55 -0
  171. wandb/proto/v3/wandb_internal_pb2.py +1608 -0
  172. wandb/proto/v3/wandb_server_pb2.py +208 -0
  173. wandb/proto/v3/wandb_settings_pb2.py +112 -0
  174. wandb/proto/v3/wandb_telemetry_pb2.py +106 -0
  175. wandb/proto/v4/__init__.py +0 -0
  176. wandb/proto/v4/wandb_base_pb2.py +30 -0
  177. wandb/proto/v4/wandb_internal_pb2.py +360 -0
  178. wandb/proto/v4/wandb_server_pb2.py +63 -0
  179. wandb/proto/v4/wandb_settings_pb2.py +45 -0
  180. wandb/proto/v4/wandb_telemetry_pb2.py +41 -0
  181. wandb/proto/v5/wandb_base_pb2.py +31 -0
  182. wandb/proto/v5/wandb_internal_pb2.py +361 -0
  183. wandb/proto/v5/wandb_server_pb2.py +64 -0
  184. wandb/proto/v5/wandb_settings_pb2.py +46 -0
  185. wandb/proto/v5/wandb_telemetry_pb2.py +42 -0
  186. wandb/proto/wandb_base_pb2.py +10 -0
  187. wandb/proto/wandb_deprecated.py +53 -0
  188. wandb/proto/wandb_generate_deprecated.py +34 -0
  189. wandb/proto/wandb_generate_proto.py +49 -0
  190. wandb/proto/wandb_internal_pb2.py +16 -0
  191. wandb/proto/wandb_server_pb2.py +10 -0
  192. wandb/proto/wandb_settings_pb2.py +10 -0
  193. wandb/proto/wandb_telemetry_pb2.py +10 -0
  194. wandb/py.typed +0 -0
  195. wandb/sdk/__init__.py +37 -0
  196. wandb/sdk/artifacts/__init__.py +0 -0
  197. wandb/sdk/artifacts/_validators.py +90 -0
  198. wandb/sdk/artifacts/artifact.py +2389 -0
  199. wandb/sdk/artifacts/artifact_download_logger.py +43 -0
  200. wandb/sdk/artifacts/artifact_file_cache.py +253 -0
  201. wandb/sdk/artifacts/artifact_instance_cache.py +17 -0
  202. wandb/sdk/artifacts/artifact_manifest.py +74 -0
  203. wandb/sdk/artifacts/artifact_manifest_entry.py +249 -0
  204. wandb/sdk/artifacts/artifact_manifests/__init__.py +0 -0
  205. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +92 -0
  206. wandb/sdk/artifacts/artifact_saver.py +269 -0
  207. wandb/sdk/artifacts/artifact_state.py +11 -0
  208. wandb/sdk/artifacts/artifact_ttl.py +7 -0
  209. wandb/sdk/artifacts/exceptions.py +57 -0
  210. wandb/sdk/artifacts/staging.py +25 -0
  211. wandb/sdk/artifacts/storage_handler.py +62 -0
  212. wandb/sdk/artifacts/storage_handlers/__init__.py +0 -0
  213. wandb/sdk/artifacts/storage_handlers/azure_handler.py +208 -0
  214. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +228 -0
  215. wandb/sdk/artifacts/storage_handlers/http_handler.py +114 -0
  216. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +141 -0
  217. wandb/sdk/artifacts/storage_handlers/multi_handler.py +56 -0
  218. wandb/sdk/artifacts/storage_handlers/s3_handler.py +300 -0
  219. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +72 -0
  220. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +135 -0
  221. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +74 -0
  222. wandb/sdk/artifacts/storage_layout.py +6 -0
  223. wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
  224. wandb/sdk/artifacts/storage_policies/register.py +1 -0
  225. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +378 -0
  226. wandb/sdk/artifacts/storage_policy.py +72 -0
  227. wandb/sdk/backend/__init__.py +0 -0
  228. wandb/sdk/backend/backend.py +222 -0
  229. wandb/sdk/data_types/__init__.py +0 -0
  230. wandb/sdk/data_types/_dtypes.py +914 -0
  231. wandb/sdk/data_types/_private.py +10 -0
  232. wandb/sdk/data_types/audio.py +165 -0
  233. wandb/sdk/data_types/base_types/__init__.py +0 -0
  234. wandb/sdk/data_types/base_types/json_metadata.py +55 -0
  235. wandb/sdk/data_types/base_types/media.py +315 -0
  236. wandb/sdk/data_types/base_types/wb_value.py +272 -0
  237. wandb/sdk/data_types/bokeh.py +70 -0
  238. wandb/sdk/data_types/graph.py +405 -0
  239. wandb/sdk/data_types/helper_types/__init__.py +0 -0
  240. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +295 -0
  241. wandb/sdk/data_types/helper_types/classes.py +159 -0
  242. wandb/sdk/data_types/helper_types/image_mask.py +235 -0
  243. wandb/sdk/data_types/histogram.py +96 -0
  244. wandb/sdk/data_types/html.py +115 -0
  245. wandb/sdk/data_types/image.py +845 -0
  246. wandb/sdk/data_types/molecule.py +241 -0
  247. wandb/sdk/data_types/object_3d.py +474 -0
  248. wandb/sdk/data_types/plotly.py +82 -0
  249. wandb/sdk/data_types/saved_model.py +446 -0
  250. wandb/sdk/data_types/table.py +1204 -0
  251. wandb/sdk/data_types/trace_tree.py +438 -0
  252. wandb/sdk/data_types/utils.py +229 -0
  253. wandb/sdk/data_types/video.py +247 -0
  254. wandb/sdk/integration_utils/__init__.py +0 -0
  255. wandb/sdk/integration_utils/auto_logging.py +239 -0
  256. wandb/sdk/integration_utils/data_logging.py +475 -0
  257. wandb/sdk/interface/__init__.py +0 -0
  258. wandb/sdk/interface/constants.py +4 -0
  259. wandb/sdk/interface/interface.py +972 -0
  260. wandb/sdk/interface/interface_queue.py +59 -0
  261. wandb/sdk/interface/interface_relay.py +53 -0
  262. wandb/sdk/interface/interface_shared.py +537 -0
  263. wandb/sdk/interface/interface_sock.py +61 -0
  264. wandb/sdk/interface/message_future.py +27 -0
  265. wandb/sdk/interface/message_future_poll.py +50 -0
  266. wandb/sdk/interface/router.py +118 -0
  267. wandb/sdk/interface/router_queue.py +44 -0
  268. wandb/sdk/interface/router_relay.py +39 -0
  269. wandb/sdk/interface/router_sock.py +36 -0
  270. wandb/sdk/interface/summary_record.py +67 -0
  271. wandb/sdk/internal/__init__.py +0 -0
  272. wandb/sdk/internal/context.py +89 -0
  273. wandb/sdk/internal/datastore.py +297 -0
  274. wandb/sdk/internal/file_pusher.py +181 -0
  275. wandb/sdk/internal/file_stream.py +695 -0
  276. wandb/sdk/internal/flow_control.py +263 -0
  277. wandb/sdk/internal/handler.py +901 -0
  278. wandb/sdk/internal/internal.py +417 -0
  279. wandb/sdk/internal/internal_api.py +4358 -0
  280. wandb/sdk/internal/internal_util.py +100 -0
  281. wandb/sdk/internal/job_builder.py +629 -0
  282. wandb/sdk/internal/profiler.py +78 -0
  283. wandb/sdk/internal/progress.py +83 -0
  284. wandb/sdk/internal/run.py +25 -0
  285. wandb/sdk/internal/sample.py +70 -0
  286. wandb/sdk/internal/sender.py +1686 -0
  287. wandb/sdk/internal/sender_config.py +197 -0
  288. wandb/sdk/internal/settings_static.py +90 -0
  289. wandb/sdk/internal/system/__init__.py +0 -0
  290. wandb/sdk/internal/system/assets/__init__.py +27 -0
  291. wandb/sdk/internal/system/assets/aggregators.py +37 -0
  292. wandb/sdk/internal/system/assets/asset_registry.py +20 -0
  293. wandb/sdk/internal/system/assets/cpu.py +163 -0
  294. wandb/sdk/internal/system/assets/disk.py +210 -0
  295. wandb/sdk/internal/system/assets/gpu.py +416 -0
  296. wandb/sdk/internal/system/assets/gpu_amd.py +239 -0
  297. wandb/sdk/internal/system/assets/gpu_apple.py +177 -0
  298. wandb/sdk/internal/system/assets/interfaces.py +207 -0
  299. wandb/sdk/internal/system/assets/ipu.py +177 -0
  300. wandb/sdk/internal/system/assets/memory.py +166 -0
  301. wandb/sdk/internal/system/assets/network.py +125 -0
  302. wandb/sdk/internal/system/assets/open_metrics.py +299 -0
  303. wandb/sdk/internal/system/assets/tpu.py +154 -0
  304. wandb/sdk/internal/system/assets/trainium.py +399 -0
  305. wandb/sdk/internal/system/env_probe_helpers.py +13 -0
  306. wandb/sdk/internal/system/system_info.py +249 -0
  307. wandb/sdk/internal/system/system_monitor.py +229 -0
  308. wandb/sdk/internal/tb_watcher.py +518 -0
  309. wandb/sdk/internal/thread_local_settings.py +18 -0
  310. wandb/sdk/internal/writer.py +206 -0
  311. wandb/sdk/launch/__init__.py +14 -0
  312. wandb/sdk/launch/_launch.py +330 -0
  313. wandb/sdk/launch/_launch_add.py +255 -0
  314. wandb/sdk/launch/_project_spec.py +566 -0
  315. wandb/sdk/launch/agent/__init__.py +5 -0
  316. wandb/sdk/launch/agent/agent.py +924 -0
  317. wandb/sdk/launch/agent/config.py +296 -0
  318. wandb/sdk/launch/agent/job_status_tracker.py +53 -0
  319. wandb/sdk/launch/agent/run_queue_item_file_saver.py +45 -0
  320. wandb/sdk/launch/builder/__init__.py +0 -0
  321. wandb/sdk/launch/builder/abstract.py +156 -0
  322. wandb/sdk/launch/builder/build.py +297 -0
  323. wandb/sdk/launch/builder/context_manager.py +235 -0
  324. wandb/sdk/launch/builder/docker_builder.py +177 -0
  325. wandb/sdk/launch/builder/kaniko_builder.py +595 -0
  326. wandb/sdk/launch/builder/noop.py +58 -0
  327. wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +188 -0
  328. wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
  329. wandb/sdk/launch/create_job.py +528 -0
  330. wandb/sdk/launch/environment/abstract.py +29 -0
  331. wandb/sdk/launch/environment/aws_environment.py +322 -0
  332. wandb/sdk/launch/environment/azure_environment.py +105 -0
  333. wandb/sdk/launch/environment/gcp_environment.py +335 -0
  334. wandb/sdk/launch/environment/local_environment.py +66 -0
  335. wandb/sdk/launch/errors.py +19 -0
  336. wandb/sdk/launch/git_reference.py +109 -0
  337. wandb/sdk/launch/inputs/files.py +148 -0
  338. wandb/sdk/launch/inputs/internal.py +315 -0
  339. wandb/sdk/launch/inputs/manage.py +113 -0
  340. wandb/sdk/launch/inputs/schema.py +39 -0
  341. wandb/sdk/launch/loader.py +249 -0
  342. wandb/sdk/launch/registry/abstract.py +48 -0
  343. wandb/sdk/launch/registry/anon.py +29 -0
  344. wandb/sdk/launch/registry/azure_container_registry.py +124 -0
  345. wandb/sdk/launch/registry/elastic_container_registry.py +192 -0
  346. wandb/sdk/launch/registry/google_artifact_registry.py +219 -0
  347. wandb/sdk/launch/registry/local_registry.py +67 -0
  348. wandb/sdk/launch/runner/__init__.py +0 -0
  349. wandb/sdk/launch/runner/abstract.py +195 -0
  350. wandb/sdk/launch/runner/kubernetes_monitor.py +474 -0
  351. wandb/sdk/launch/runner/kubernetes_runner.py +963 -0
  352. wandb/sdk/launch/runner/local_container.py +301 -0
  353. wandb/sdk/launch/runner/local_process.py +78 -0
  354. wandb/sdk/launch/runner/sagemaker_runner.py +426 -0
  355. wandb/sdk/launch/runner/vertex_runner.py +230 -0
  356. wandb/sdk/launch/sweeps/__init__.py +39 -0
  357. wandb/sdk/launch/sweeps/scheduler.py +742 -0
  358. wandb/sdk/launch/sweeps/scheduler_sweep.py +91 -0
  359. wandb/sdk/launch/sweeps/utils.py +316 -0
  360. wandb/sdk/launch/utils.py +746 -0
  361. wandb/sdk/launch/wandb_reference.py +138 -0
  362. wandb/sdk/lib/__init__.py +5 -0
  363. wandb/sdk/lib/_settings_toposort_generate.py +159 -0
  364. wandb/sdk/lib/_settings_toposort_generated.py +250 -0
  365. wandb/sdk/lib/_wburls_generate.py +25 -0
  366. wandb/sdk/lib/_wburls_generated.py +22 -0
  367. wandb/sdk/lib/apikey.py +273 -0
  368. wandb/sdk/lib/capped_dict.py +26 -0
  369. wandb/sdk/lib/config_util.py +101 -0
  370. wandb/sdk/lib/credentials.py +141 -0
  371. wandb/sdk/lib/deprecate.py +42 -0
  372. wandb/sdk/lib/disabled.py +29 -0
  373. wandb/sdk/lib/exit_hooks.py +54 -0
  374. wandb/sdk/lib/file_stream_utils.py +118 -0
  375. wandb/sdk/lib/filenames.py +64 -0
  376. wandb/sdk/lib/filesystem.py +372 -0
  377. wandb/sdk/lib/fsm.py +174 -0
  378. wandb/sdk/lib/gitlib.py +239 -0
  379. wandb/sdk/lib/gql_request.py +65 -0
  380. wandb/sdk/lib/handler_util.py +21 -0
  381. wandb/sdk/lib/hashutil.py +84 -0
  382. wandb/sdk/lib/import_hooks.py +275 -0
  383. wandb/sdk/lib/ipython.py +146 -0
  384. wandb/sdk/lib/json_util.py +80 -0
  385. wandb/sdk/lib/lazyloader.py +63 -0
  386. wandb/sdk/lib/mailbox.py +460 -0
  387. wandb/sdk/lib/module.py +69 -0
  388. wandb/sdk/lib/paths.py +106 -0
  389. wandb/sdk/lib/preinit.py +42 -0
  390. wandb/sdk/lib/printer.py +313 -0
  391. wandb/sdk/lib/proto_util.py +90 -0
  392. wandb/sdk/lib/redirect.py +845 -0
  393. wandb/sdk/lib/reporting.py +99 -0
  394. wandb/sdk/lib/retry.py +289 -0
  395. wandb/sdk/lib/run_moment.py +78 -0
  396. wandb/sdk/lib/runid.py +12 -0
  397. wandb/sdk/lib/server.py +52 -0
  398. wandb/sdk/lib/service_connection.py +216 -0
  399. wandb/sdk/lib/service_token.py +94 -0
  400. wandb/sdk/lib/sock_client.py +295 -0
  401. wandb/sdk/lib/sparkline.py +45 -0
  402. wandb/sdk/lib/telemetry.py +100 -0
  403. wandb/sdk/lib/timed_input.py +133 -0
  404. wandb/sdk/lib/timer.py +19 -0
  405. wandb/sdk/lib/tracelog.py +255 -0
  406. wandb/sdk/lib/wburls.py +46 -0
  407. wandb/sdk/service/__init__.py +0 -0
  408. wandb/sdk/service/_startup_debug.py +22 -0
  409. wandb/sdk/service/port_file.py +53 -0
  410. wandb/sdk/service/server.py +116 -0
  411. wandb/sdk/service/server_sock.py +276 -0
  412. wandb/sdk/service/service.py +242 -0
  413. wandb/sdk/service/streams.py +417 -0
  414. wandb/sdk/verify/__init__.py +0 -0
  415. wandb/sdk/verify/verify.py +501 -0
  416. wandb/sdk/wandb_alerts.py +12 -0
  417. wandb/sdk/wandb_config.py +322 -0
  418. wandb/sdk/wandb_helper.py +54 -0
  419. wandb/sdk/wandb_init.py +1266 -0
  420. wandb/sdk/wandb_login.py +349 -0
  421. wandb/sdk/wandb_metric.py +110 -0
  422. wandb/sdk/wandb_require.py +97 -0
  423. wandb/sdk/wandb_require_helpers.py +44 -0
  424. wandb/sdk/wandb_run.py +4236 -0
  425. wandb/sdk/wandb_settings.py +2001 -0
  426. wandb/sdk/wandb_setup.py +409 -0
  427. wandb/sdk/wandb_summary.py +150 -0
  428. wandb/sdk/wandb_sweep.py +119 -0
  429. wandb/sdk/wandb_sync.py +81 -0
  430. wandb/sdk/wandb_watch.py +144 -0
  431. wandb/sklearn.py +35 -0
  432. wandb/sync/__init__.py +3 -0
  433. wandb/sync/sync.py +443 -0
  434. wandb/trigger.py +29 -0
  435. wandb/util.py +1956 -0
  436. wandb/vendor/__init__.py +0 -0
  437. wandb/vendor/gql-0.2.0/setup.py +40 -0
  438. wandb/vendor/gql-0.2.0/tests/__init__.py +0 -0
  439. wandb/vendor/gql-0.2.0/tests/starwars/__init__.py +0 -0
  440. wandb/vendor/gql-0.2.0/tests/starwars/fixtures.py +96 -0
  441. wandb/vendor/gql-0.2.0/tests/starwars/schema.py +146 -0
  442. wandb/vendor/gql-0.2.0/tests/starwars/test_dsl.py +293 -0
  443. wandb/vendor/gql-0.2.0/tests/starwars/test_query.py +355 -0
  444. wandb/vendor/gql-0.2.0/tests/starwars/test_validation.py +171 -0
  445. wandb/vendor/gql-0.2.0/tests/test_client.py +31 -0
  446. wandb/vendor/gql-0.2.0/tests/test_transport.py +89 -0
  447. wandb/vendor/gql-0.2.0/wandb_gql/__init__.py +4 -0
  448. wandb/vendor/gql-0.2.0/wandb_gql/client.py +75 -0
  449. wandb/vendor/gql-0.2.0/wandb_gql/dsl.py +152 -0
  450. wandb/vendor/gql-0.2.0/wandb_gql/gql.py +10 -0
  451. wandb/vendor/gql-0.2.0/wandb_gql/transport/__init__.py +0 -0
  452. wandb/vendor/gql-0.2.0/wandb_gql/transport/http.py +6 -0
  453. wandb/vendor/gql-0.2.0/wandb_gql/transport/local_schema.py +15 -0
  454. wandb/vendor/gql-0.2.0/wandb_gql/transport/requests.py +46 -0
  455. wandb/vendor/gql-0.2.0/wandb_gql/utils.py +21 -0
  456. wandb/vendor/graphql-core-1.1/setup.py +86 -0
  457. wandb/vendor/graphql-core-1.1/wandb_graphql/__init__.py +287 -0
  458. wandb/vendor/graphql-core-1.1/wandb_graphql/error/__init__.py +6 -0
  459. wandb/vendor/graphql-core-1.1/wandb_graphql/error/base.py +42 -0
  460. wandb/vendor/graphql-core-1.1/wandb_graphql/error/format_error.py +11 -0
  461. wandb/vendor/graphql-core-1.1/wandb_graphql/error/located_error.py +29 -0
  462. wandb/vendor/graphql-core-1.1/wandb_graphql/error/syntax_error.py +36 -0
  463. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/__init__.py +26 -0
  464. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/base.py +311 -0
  465. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executor.py +398 -0
  466. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/__init__.py +0 -0
  467. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/asyncio.py +53 -0
  468. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/gevent.py +22 -0
  469. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/process.py +32 -0
  470. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/sync.py +7 -0
  471. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/thread.py +35 -0
  472. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/utils.py +6 -0
  473. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/__init__.py +0 -0
  474. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/executor.py +66 -0
  475. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/fragment.py +252 -0
  476. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/resolver.py +151 -0
  477. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/utils.py +7 -0
  478. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/middleware.py +57 -0
  479. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/values.py +145 -0
  480. wandb/vendor/graphql-core-1.1/wandb_graphql/graphql.py +60 -0
  481. wandb/vendor/graphql-core-1.1/wandb_graphql/language/__init__.py +0 -0
  482. wandb/vendor/graphql-core-1.1/wandb_graphql/language/ast.py +1349 -0
  483. wandb/vendor/graphql-core-1.1/wandb_graphql/language/base.py +19 -0
  484. wandb/vendor/graphql-core-1.1/wandb_graphql/language/lexer.py +435 -0
  485. wandb/vendor/graphql-core-1.1/wandb_graphql/language/location.py +30 -0
  486. wandb/vendor/graphql-core-1.1/wandb_graphql/language/parser.py +779 -0
  487. wandb/vendor/graphql-core-1.1/wandb_graphql/language/printer.py +193 -0
  488. wandb/vendor/graphql-core-1.1/wandb_graphql/language/source.py +18 -0
  489. wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor.py +222 -0
  490. wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor_meta.py +82 -0
  491. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/__init__.py +0 -0
  492. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/cached_property.py +17 -0
  493. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/contain_subset.py +28 -0
  494. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/default_ordered_dict.py +40 -0
  495. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/ordereddict.py +8 -0
  496. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/pair_set.py +43 -0
  497. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/version.py +78 -0
  498. wandb/vendor/graphql-core-1.1/wandb_graphql/type/__init__.py +67 -0
  499. wandb/vendor/graphql-core-1.1/wandb_graphql/type/definition.py +619 -0
  500. wandb/vendor/graphql-core-1.1/wandb_graphql/type/directives.py +132 -0
  501. wandb/vendor/graphql-core-1.1/wandb_graphql/type/introspection.py +440 -0
  502. wandb/vendor/graphql-core-1.1/wandb_graphql/type/scalars.py +131 -0
  503. wandb/vendor/graphql-core-1.1/wandb_graphql/type/schema.py +100 -0
  504. wandb/vendor/graphql-core-1.1/wandb_graphql/type/typemap.py +145 -0
  505. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/__init__.py +0 -0
  506. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/assert_valid_name.py +9 -0
  507. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_from_value.py +65 -0
  508. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_code.py +49 -0
  509. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_dict.py +24 -0
  510. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/base.py +75 -0
  511. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_ast_schema.py +291 -0
  512. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_client_schema.py +250 -0
  513. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/concat_ast.py +9 -0
  514. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/extend_schema.py +357 -0
  515. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_field_def.py +27 -0
  516. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_operation_ast.py +21 -0
  517. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/introspection_query.py +90 -0
  518. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_literal_value.py +67 -0
  519. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_value.py +66 -0
  520. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/quoted_or_list.py +21 -0
  521. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/schema_printer.py +168 -0
  522. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/suggestion_list.py +56 -0
  523. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_comparators.py +69 -0
  524. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_from_ast.py +21 -0
  525. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_info.py +149 -0
  526. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/value_from_ast.py +69 -0
  527. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/__init__.py +4 -0
  528. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/__init__.py +79 -0
  529. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/arguments_of_correct_type.py +24 -0
  530. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/base.py +8 -0
  531. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/default_values_of_correct_type.py +44 -0
  532. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fields_on_correct_type.py +113 -0
  533. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fragments_on_composite_types.py +33 -0
  534. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_argument_names.py +70 -0
  535. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_directives.py +97 -0
  536. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_fragment_names.py +19 -0
  537. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_type_names.py +43 -0
  538. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/lone_anonymous_operation.py +23 -0
  539. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_fragment_cycles.py +59 -0
  540. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_undefined_variables.py +36 -0
  541. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_fragments.py +38 -0
  542. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_variables.py +37 -0
  543. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/overlapping_fields_can_be_merged.py +529 -0
  544. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/possible_fragment_spreads.py +44 -0
  545. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/provided_non_null_arguments.py +46 -0
  546. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/scalar_leafs.py +33 -0
  547. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_argument_names.py +32 -0
  548. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_fragment_names.py +28 -0
  549. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_input_field_names.py +33 -0
  550. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_operation_names.py +31 -0
  551. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_variable_names.py +27 -0
  552. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_are_input_types.py +21 -0
  553. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_in_allowed_position.py +53 -0
  554. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/validation.py +158 -0
  555. wandb/vendor/promise-2.3.0/conftest.py +30 -0
  556. wandb/vendor/promise-2.3.0/setup.py +64 -0
  557. wandb/vendor/promise-2.3.0/tests/__init__.py +0 -0
  558. wandb/vendor/promise-2.3.0/tests/conftest.py +8 -0
  559. wandb/vendor/promise-2.3.0/tests/test_awaitable.py +32 -0
  560. wandb/vendor/promise-2.3.0/tests/test_awaitable_35.py +47 -0
  561. wandb/vendor/promise-2.3.0/tests/test_benchmark.py +116 -0
  562. wandb/vendor/promise-2.3.0/tests/test_complex_threads.py +23 -0
  563. wandb/vendor/promise-2.3.0/tests/test_dataloader.py +452 -0
  564. wandb/vendor/promise-2.3.0/tests/test_dataloader_awaitable_35.py +99 -0
  565. wandb/vendor/promise-2.3.0/tests/test_dataloader_extra.py +65 -0
  566. wandb/vendor/promise-2.3.0/tests/test_extra.py +670 -0
  567. wandb/vendor/promise-2.3.0/tests/test_issues.py +132 -0
  568. wandb/vendor/promise-2.3.0/tests/test_promise_list.py +70 -0
  569. wandb/vendor/promise-2.3.0/tests/test_spec.py +584 -0
  570. wandb/vendor/promise-2.3.0/tests/test_thread_safety.py +115 -0
  571. wandb/vendor/promise-2.3.0/tests/utils.py +3 -0
  572. wandb/vendor/promise-2.3.0/wandb_promise/__init__.py +38 -0
  573. wandb/vendor/promise-2.3.0/wandb_promise/async_.py +135 -0
  574. wandb/vendor/promise-2.3.0/wandb_promise/compat.py +32 -0
  575. wandb/vendor/promise-2.3.0/wandb_promise/dataloader.py +326 -0
  576. wandb/vendor/promise-2.3.0/wandb_promise/iterate_promise.py +12 -0
  577. wandb/vendor/promise-2.3.0/wandb_promise/promise.py +848 -0
  578. wandb/vendor/promise-2.3.0/wandb_promise/promise_list.py +151 -0
  579. wandb/vendor/promise-2.3.0/wandb_promise/pyutils/__init__.py +0 -0
  580. wandb/vendor/promise-2.3.0/wandb_promise/pyutils/version.py +83 -0
  581. wandb/vendor/promise-2.3.0/wandb_promise/schedulers/__init__.py +0 -0
  582. wandb/vendor/promise-2.3.0/wandb_promise/schedulers/asyncio.py +22 -0
  583. wandb/vendor/promise-2.3.0/wandb_promise/schedulers/gevent.py +21 -0
  584. wandb/vendor/promise-2.3.0/wandb_promise/schedulers/immediate.py +27 -0
  585. wandb/vendor/promise-2.3.0/wandb_promise/schedulers/thread.py +18 -0
  586. wandb/vendor/promise-2.3.0/wandb_promise/utils.py +56 -0
  587. wandb/vendor/pygments/__init__.py +90 -0
  588. wandb/vendor/pygments/cmdline.py +568 -0
  589. wandb/vendor/pygments/console.py +74 -0
  590. wandb/vendor/pygments/filter.py +74 -0
  591. wandb/vendor/pygments/filters/__init__.py +350 -0
  592. wandb/vendor/pygments/formatter.py +95 -0
  593. wandb/vendor/pygments/formatters/__init__.py +153 -0
  594. wandb/vendor/pygments/formatters/_mapping.py +85 -0
  595. wandb/vendor/pygments/formatters/bbcode.py +109 -0
  596. wandb/vendor/pygments/formatters/html.py +851 -0
  597. wandb/vendor/pygments/formatters/img.py +600 -0
  598. wandb/vendor/pygments/formatters/irc.py +182 -0
  599. wandb/vendor/pygments/formatters/latex.py +482 -0
  600. wandb/vendor/pygments/formatters/other.py +160 -0
  601. wandb/vendor/pygments/formatters/rtf.py +147 -0
  602. wandb/vendor/pygments/formatters/svg.py +153 -0
  603. wandb/vendor/pygments/formatters/terminal.py +136 -0
  604. wandb/vendor/pygments/formatters/terminal256.py +309 -0
  605. wandb/vendor/pygments/lexer.py +871 -0
  606. wandb/vendor/pygments/lexers/__init__.py +329 -0
  607. wandb/vendor/pygments/lexers/_asy_builtins.py +1645 -0
  608. wandb/vendor/pygments/lexers/_cl_builtins.py +232 -0
  609. wandb/vendor/pygments/lexers/_cocoa_builtins.py +72 -0
  610. wandb/vendor/pygments/lexers/_csound_builtins.py +1346 -0
  611. wandb/vendor/pygments/lexers/_lasso_builtins.py +5327 -0
  612. wandb/vendor/pygments/lexers/_lua_builtins.py +295 -0
  613. wandb/vendor/pygments/lexers/_mapping.py +500 -0
  614. wandb/vendor/pygments/lexers/_mql_builtins.py +1172 -0
  615. wandb/vendor/pygments/lexers/_openedge_builtins.py +2547 -0
  616. wandb/vendor/pygments/lexers/_php_builtins.py +4756 -0
  617. wandb/vendor/pygments/lexers/_postgres_builtins.py +621 -0
  618. wandb/vendor/pygments/lexers/_scilab_builtins.py +3094 -0
  619. wandb/vendor/pygments/lexers/_sourcemod_builtins.py +1163 -0
  620. wandb/vendor/pygments/lexers/_stan_builtins.py +532 -0
  621. wandb/vendor/pygments/lexers/_stata_builtins.py +419 -0
  622. wandb/vendor/pygments/lexers/_tsql_builtins.py +1004 -0
  623. wandb/vendor/pygments/lexers/_vim_builtins.py +1939 -0
  624. wandb/vendor/pygments/lexers/actionscript.py +240 -0
  625. wandb/vendor/pygments/lexers/agile.py +24 -0
  626. wandb/vendor/pygments/lexers/algebra.py +221 -0
  627. wandb/vendor/pygments/lexers/ambient.py +76 -0
  628. wandb/vendor/pygments/lexers/ampl.py +87 -0
  629. wandb/vendor/pygments/lexers/apl.py +101 -0
  630. wandb/vendor/pygments/lexers/archetype.py +318 -0
  631. wandb/vendor/pygments/lexers/asm.py +641 -0
  632. wandb/vendor/pygments/lexers/automation.py +374 -0
  633. wandb/vendor/pygments/lexers/basic.py +500 -0
  634. wandb/vendor/pygments/lexers/bibtex.py +160 -0
  635. wandb/vendor/pygments/lexers/business.py +612 -0
  636. wandb/vendor/pygments/lexers/c_cpp.py +252 -0
  637. wandb/vendor/pygments/lexers/c_like.py +541 -0
  638. wandb/vendor/pygments/lexers/capnproto.py +78 -0
  639. wandb/vendor/pygments/lexers/chapel.py +102 -0
  640. wandb/vendor/pygments/lexers/clean.py +288 -0
  641. wandb/vendor/pygments/lexers/compiled.py +34 -0
  642. wandb/vendor/pygments/lexers/configs.py +833 -0
  643. wandb/vendor/pygments/lexers/console.py +114 -0
  644. wandb/vendor/pygments/lexers/crystal.py +393 -0
  645. wandb/vendor/pygments/lexers/csound.py +366 -0
  646. wandb/vendor/pygments/lexers/css.py +689 -0
  647. wandb/vendor/pygments/lexers/d.py +251 -0
  648. wandb/vendor/pygments/lexers/dalvik.py +125 -0
  649. wandb/vendor/pygments/lexers/data.py +555 -0
  650. wandb/vendor/pygments/lexers/diff.py +165 -0
  651. wandb/vendor/pygments/lexers/dotnet.py +691 -0
  652. wandb/vendor/pygments/lexers/dsls.py +878 -0
  653. wandb/vendor/pygments/lexers/dylan.py +289 -0
  654. wandb/vendor/pygments/lexers/ecl.py +125 -0
  655. wandb/vendor/pygments/lexers/eiffel.py +65 -0
  656. wandb/vendor/pygments/lexers/elm.py +121 -0
  657. wandb/vendor/pygments/lexers/erlang.py +533 -0
  658. wandb/vendor/pygments/lexers/esoteric.py +277 -0
  659. wandb/vendor/pygments/lexers/ezhil.py +69 -0
  660. wandb/vendor/pygments/lexers/factor.py +344 -0
  661. wandb/vendor/pygments/lexers/fantom.py +250 -0
  662. wandb/vendor/pygments/lexers/felix.py +273 -0
  663. wandb/vendor/pygments/lexers/forth.py +177 -0
  664. wandb/vendor/pygments/lexers/fortran.py +205 -0
  665. wandb/vendor/pygments/lexers/foxpro.py +428 -0
  666. wandb/vendor/pygments/lexers/functional.py +21 -0
  667. wandb/vendor/pygments/lexers/go.py +101 -0
  668. wandb/vendor/pygments/lexers/grammar_notation.py +213 -0
  669. wandb/vendor/pygments/lexers/graph.py +80 -0
  670. wandb/vendor/pygments/lexers/graphics.py +553 -0
  671. wandb/vendor/pygments/lexers/haskell.py +843 -0
  672. wandb/vendor/pygments/lexers/haxe.py +936 -0
  673. wandb/vendor/pygments/lexers/hdl.py +382 -0
  674. wandb/vendor/pygments/lexers/hexdump.py +103 -0
  675. wandb/vendor/pygments/lexers/html.py +602 -0
  676. wandb/vendor/pygments/lexers/idl.py +270 -0
  677. wandb/vendor/pygments/lexers/igor.py +288 -0
  678. wandb/vendor/pygments/lexers/inferno.py +96 -0
  679. wandb/vendor/pygments/lexers/installers.py +322 -0
  680. wandb/vendor/pygments/lexers/int_fiction.py +1343 -0
  681. wandb/vendor/pygments/lexers/iolang.py +63 -0
  682. wandb/vendor/pygments/lexers/j.py +146 -0
  683. wandb/vendor/pygments/lexers/javascript.py +1525 -0
  684. wandb/vendor/pygments/lexers/julia.py +333 -0
  685. wandb/vendor/pygments/lexers/jvm.py +1573 -0
  686. wandb/vendor/pygments/lexers/lisp.py +2621 -0
  687. wandb/vendor/pygments/lexers/make.py +202 -0
  688. wandb/vendor/pygments/lexers/markup.py +595 -0
  689. wandb/vendor/pygments/lexers/math.py +21 -0
  690. wandb/vendor/pygments/lexers/matlab.py +663 -0
  691. wandb/vendor/pygments/lexers/ml.py +769 -0
  692. wandb/vendor/pygments/lexers/modeling.py +358 -0
  693. wandb/vendor/pygments/lexers/modula2.py +1561 -0
  694. wandb/vendor/pygments/lexers/monte.py +204 -0
  695. wandb/vendor/pygments/lexers/ncl.py +894 -0
  696. wandb/vendor/pygments/lexers/nimrod.py +159 -0
  697. wandb/vendor/pygments/lexers/nit.py +64 -0
  698. wandb/vendor/pygments/lexers/nix.py +136 -0
  699. wandb/vendor/pygments/lexers/oberon.py +105 -0
  700. wandb/vendor/pygments/lexers/objective.py +504 -0
  701. wandb/vendor/pygments/lexers/ooc.py +85 -0
  702. wandb/vendor/pygments/lexers/other.py +41 -0
  703. wandb/vendor/pygments/lexers/parasail.py +79 -0
  704. wandb/vendor/pygments/lexers/parsers.py +835 -0
  705. wandb/vendor/pygments/lexers/pascal.py +644 -0
  706. wandb/vendor/pygments/lexers/pawn.py +199 -0
  707. wandb/vendor/pygments/lexers/perl.py +620 -0
  708. wandb/vendor/pygments/lexers/php.py +267 -0
  709. wandb/vendor/pygments/lexers/praat.py +294 -0
  710. wandb/vendor/pygments/lexers/prolog.py +306 -0
  711. wandb/vendor/pygments/lexers/python.py +939 -0
  712. wandb/vendor/pygments/lexers/qvt.py +152 -0
  713. wandb/vendor/pygments/lexers/r.py +453 -0
  714. wandb/vendor/pygments/lexers/rdf.py +270 -0
  715. wandb/vendor/pygments/lexers/rebol.py +431 -0
  716. wandb/vendor/pygments/lexers/resource.py +85 -0
  717. wandb/vendor/pygments/lexers/rnc.py +67 -0
  718. wandb/vendor/pygments/lexers/roboconf.py +82 -0
  719. wandb/vendor/pygments/lexers/robotframework.py +560 -0
  720. wandb/vendor/pygments/lexers/ruby.py +519 -0
  721. wandb/vendor/pygments/lexers/rust.py +220 -0
  722. wandb/vendor/pygments/lexers/sas.py +228 -0
  723. wandb/vendor/pygments/lexers/scripting.py +1222 -0
  724. wandb/vendor/pygments/lexers/shell.py +794 -0
  725. wandb/vendor/pygments/lexers/smalltalk.py +195 -0
  726. wandb/vendor/pygments/lexers/smv.py +79 -0
  727. wandb/vendor/pygments/lexers/snobol.py +83 -0
  728. wandb/vendor/pygments/lexers/special.py +103 -0
  729. wandb/vendor/pygments/lexers/sql.py +681 -0
  730. wandb/vendor/pygments/lexers/stata.py +108 -0
  731. wandb/vendor/pygments/lexers/supercollider.py +90 -0
  732. wandb/vendor/pygments/lexers/tcl.py +145 -0
  733. wandb/vendor/pygments/lexers/templates.py +2283 -0
  734. wandb/vendor/pygments/lexers/testing.py +207 -0
  735. wandb/vendor/pygments/lexers/text.py +25 -0
  736. wandb/vendor/pygments/lexers/textedit.py +169 -0
  737. wandb/vendor/pygments/lexers/textfmts.py +297 -0
  738. wandb/vendor/pygments/lexers/theorem.py +458 -0
  739. wandb/vendor/pygments/lexers/trafficscript.py +54 -0
  740. wandb/vendor/pygments/lexers/typoscript.py +226 -0
  741. wandb/vendor/pygments/lexers/urbi.py +133 -0
  742. wandb/vendor/pygments/lexers/varnish.py +190 -0
  743. wandb/vendor/pygments/lexers/verification.py +111 -0
  744. wandb/vendor/pygments/lexers/web.py +24 -0
  745. wandb/vendor/pygments/lexers/webmisc.py +988 -0
  746. wandb/vendor/pygments/lexers/whiley.py +116 -0
  747. wandb/vendor/pygments/lexers/x10.py +69 -0
  748. wandb/vendor/pygments/modeline.py +44 -0
  749. wandb/vendor/pygments/plugin.py +68 -0
  750. wandb/vendor/pygments/regexopt.py +92 -0
  751. wandb/vendor/pygments/scanner.py +105 -0
  752. wandb/vendor/pygments/sphinxext.py +158 -0
  753. wandb/vendor/pygments/style.py +155 -0
  754. wandb/vendor/pygments/styles/__init__.py +80 -0
  755. wandb/vendor/pygments/styles/abap.py +29 -0
  756. wandb/vendor/pygments/styles/algol.py +63 -0
  757. wandb/vendor/pygments/styles/algol_nu.py +63 -0
  758. wandb/vendor/pygments/styles/arduino.py +98 -0
  759. wandb/vendor/pygments/styles/autumn.py +65 -0
  760. wandb/vendor/pygments/styles/borland.py +51 -0
  761. wandb/vendor/pygments/styles/bw.py +49 -0
  762. wandb/vendor/pygments/styles/colorful.py +81 -0
  763. wandb/vendor/pygments/styles/default.py +73 -0
  764. wandb/vendor/pygments/styles/emacs.py +72 -0
  765. wandb/vendor/pygments/styles/friendly.py +72 -0
  766. wandb/vendor/pygments/styles/fruity.py +42 -0
  767. wandb/vendor/pygments/styles/igor.py +29 -0
  768. wandb/vendor/pygments/styles/lovelace.py +97 -0
  769. wandb/vendor/pygments/styles/manni.py +75 -0
  770. wandb/vendor/pygments/styles/monokai.py +106 -0
  771. wandb/vendor/pygments/styles/murphy.py +80 -0
  772. wandb/vendor/pygments/styles/native.py +65 -0
  773. wandb/vendor/pygments/styles/paraiso_dark.py +125 -0
  774. wandb/vendor/pygments/styles/paraiso_light.py +125 -0
  775. wandb/vendor/pygments/styles/pastie.py +75 -0
  776. wandb/vendor/pygments/styles/perldoc.py +69 -0
  777. wandb/vendor/pygments/styles/rainbow_dash.py +89 -0
  778. wandb/vendor/pygments/styles/rrt.py +33 -0
  779. wandb/vendor/pygments/styles/sas.py +44 -0
  780. wandb/vendor/pygments/styles/stata.py +40 -0
  781. wandb/vendor/pygments/styles/tango.py +141 -0
  782. wandb/vendor/pygments/styles/trac.py +63 -0
  783. wandb/vendor/pygments/styles/vim.py +63 -0
  784. wandb/vendor/pygments/styles/vs.py +38 -0
  785. wandb/vendor/pygments/styles/xcode.py +51 -0
  786. wandb/vendor/pygments/token.py +213 -0
  787. wandb/vendor/pygments/unistring.py +217 -0
  788. wandb/vendor/pygments/util.py +388 -0
  789. wandb/vendor/pynvml/__init__.py +0 -0
  790. wandb/vendor/pynvml/pynvml.py +4779 -0
  791. wandb/vendor/watchdog_0_9_0/wandb_watchdog/__init__.py +17 -0
  792. wandb/vendor/watchdog_0_9_0/wandb_watchdog/events.py +615 -0
  793. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/__init__.py +98 -0
  794. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/api.py +369 -0
  795. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents.py +172 -0
  796. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents2.py +239 -0
  797. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify.py +218 -0
  798. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_buffer.py +81 -0
  799. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_c.py +575 -0
  800. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/kqueue.py +730 -0
  801. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/polling.py +145 -0
  802. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/read_directory_changes.py +133 -0
  803. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/winapi.py +348 -0
  804. wandb/vendor/watchdog_0_9_0/wandb_watchdog/patterns.py +265 -0
  805. wandb/vendor/watchdog_0_9_0/wandb_watchdog/tricks/__init__.py +174 -0
  806. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/__init__.py +151 -0
  807. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/bricks.py +249 -0
  808. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/compat.py +29 -0
  809. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/decorators.py +198 -0
  810. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/delayed_queue.py +88 -0
  811. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/dirsnapshot.py +293 -0
  812. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/echo.py +157 -0
  813. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/event_backport.py +41 -0
  814. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/importlib2.py +40 -0
  815. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/platform.py +57 -0
  816. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/unicode_paths.py +64 -0
  817. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/win32stat.py +123 -0
  818. wandb/vendor/watchdog_0_9_0/wandb_watchdog/version.py +28 -0
  819. wandb/vendor/watchdog_0_9_0/wandb_watchdog/watchmedo.py +577 -0
  820. wandb/wandb_agent.py +588 -0
  821. wandb/wandb_controller.py +721 -0
  822. wandb/wandb_run.py +9 -0
  823. wandb-0.18.2.dist-info/METADATA +213 -0
  824. wandb-0.18.2.dist-info/RECORD +827 -0
  825. wandb-0.18.2.dist-info/WHEEL +5 -0
  826. wandb-0.18.2.dist-info/entry_points.txt +3 -0
  827. wandb-0.18.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1686 @@
1
+ """sender."""
2
+
3
+ import contextlib
4
+ import gzip
5
+ import json
6
+ import logging
7
+ import os
8
+ import queue
9
+ import sys
10
+ import threading
11
+ import time
12
+ import traceback
13
+ from collections import defaultdict
14
+ from datetime import datetime
15
+ from queue import Queue
16
+ from typing import (
17
+ TYPE_CHECKING,
18
+ Any,
19
+ Dict,
20
+ Generator,
21
+ List,
22
+ Optional,
23
+ Tuple,
24
+ Type,
25
+ Union,
26
+ )
27
+
28
+ import requests
29
+
30
+ import wandb
31
+ from wandb import util
32
+ from wandb.errors import CommError, UsageError
33
+ from wandb.errors.util import ProtobufErrorHandler
34
+ from wandb.filesync.dir_watcher import DirWatcher
35
+ from wandb.proto import wandb_internal_pb2
36
+ from wandb.sdk.artifacts.artifact_saver import ArtifactSaver
37
+ from wandb.sdk.interface import interface
38
+ from wandb.sdk.interface.interface_queue import InterfaceQueue
39
+ from wandb.sdk.internal import (
40
+ context,
41
+ datastore,
42
+ file_stream,
43
+ internal_api,
44
+ sender_config,
45
+ )
46
+ from wandb.sdk.internal.file_pusher import FilePusher
47
+ from wandb.sdk.internal.job_builder import JobBuilder
48
+ from wandb.sdk.internal.settings_static import SettingsStatic
49
+ from wandb.sdk.lib import (
50
+ config_util,
51
+ filenames,
52
+ filesystem,
53
+ proto_util,
54
+ redirect,
55
+ telemetry,
56
+ tracelog,
57
+ )
58
+ from wandb.sdk.lib.mailbox import ContextCancelledError
59
+ from wandb.sdk.lib.proto_util import message_to_dict
60
+
61
+ if sys.version_info >= (3, 8):
62
+ from typing import Literal
63
+ else:
64
+ from typing_extensions import Literal
65
+
66
+ if TYPE_CHECKING:
67
+ from wandb.proto.wandb_internal_pb2 import (
68
+ ArtifactManifest,
69
+ ArtifactManifestEntry,
70
+ ArtifactRecord,
71
+ HttpResponse,
72
+ LocalInfo,
73
+ Record,
74
+ Result,
75
+ RunExitResult,
76
+ RunRecord,
77
+ SummaryRecord,
78
+ )
79
+
80
+ StreamLiterals = Literal["stdout", "stderr"]
81
+
82
+
83
+ logger = logging.getLogger(__name__)
84
+
85
+
86
+ _OUTPUT_MIN_CALLBACK_INTERVAL = 2 # seconds
87
+
88
+
89
+ def _framework_priority() -> Generator[Tuple[str, str], None, None]:
90
+ yield from [
91
+ ("lightgbm", "lightgbm"),
92
+ ("catboost", "catboost"),
93
+ ("xgboost", "xgboost"),
94
+ ("transformers_huggingface", "huggingface"), # backwards compatibility
95
+ ("transformers", "huggingface"),
96
+ ("pytorch_ignite", "ignite"), # backwards compatibility
97
+ ("ignite", "ignite"),
98
+ ("pytorch_lightning", "lightning"),
99
+ ("fastai", "fastai"),
100
+ ("torch", "torch"),
101
+ ("keras", "keras"),
102
+ ("tensorflow", "tensorflow"),
103
+ ("sklearn", "sklearn"),
104
+ ]
105
+
106
+
107
+ def _manifest_json_from_proto(manifest: "ArtifactManifest") -> Dict:
108
+ if manifest.version == 1:
109
+ if manifest.manifest_file_path:
110
+ contents = {}
111
+ with gzip.open(manifest.manifest_file_path, "rt") as f:
112
+ for line in f:
113
+ entry_json = json.loads(line)
114
+ path = entry_json.pop("path")
115
+ contents[path] = entry_json
116
+ else:
117
+ contents = {
118
+ content.path: _manifest_entry_from_proto(content)
119
+ for content in manifest.contents
120
+ }
121
+ else:
122
+ raise ValueError(f"unknown artifact manifest version: {manifest.version}")
123
+
124
+ return {
125
+ "version": manifest.version,
126
+ "storagePolicy": manifest.storage_policy,
127
+ "storagePolicyConfig": {
128
+ config.key: json.loads(config.value_json)
129
+ for config in manifest.storage_policy_config
130
+ },
131
+ "contents": contents,
132
+ }
133
+
134
+
135
+ def _manifest_entry_from_proto(entry: "ArtifactManifestEntry") -> Dict:
136
+ birth_artifact_id = entry.birth_artifact_id if entry.birth_artifact_id else None
137
+ return {
138
+ "digest": entry.digest,
139
+ "birthArtifactID": birth_artifact_id,
140
+ "ref": entry.ref if entry.ref else None,
141
+ "size": entry.size if entry.size is not None else None,
142
+ "local_path": entry.local_path if entry.local_path else None,
143
+ "skip_cache": entry.skip_cache,
144
+ "extra": {extra.key: json.loads(extra.value_json) for extra in entry.extra},
145
+ }
146
+
147
+
148
+ class ResumeState:
149
+ resumed: bool
150
+ step: int
151
+ history: int
152
+ events: int
153
+ output: int
154
+ runtime: float
155
+ wandb_runtime: Optional[int]
156
+ summary: Optional[Dict[str, Any]]
157
+ config: Optional[Dict[str, Any]]
158
+ tags: Optional[List[str]]
159
+
160
+ def __init__(self) -> None:
161
+ self.resumed = False
162
+ self.step = 0
163
+ self.history = 0
164
+ self.events = 0
165
+ self.output = 0
166
+ self.runtime = 0
167
+ # wandb_runtime is the canonical runtime (stored in summary._wandb.runtime)
168
+ self.wandb_runtime = None
169
+ self.summary = None
170
+ self.config = None
171
+ self.tags = None
172
+
173
+ def __str__(self) -> str:
174
+ obj = ",".join(map(lambda it: f"{it[0]}={it[1]}", vars(self).items()))
175
+ return f"ResumeState({obj})"
176
+
177
+
178
+ class _OutputRawStream:
179
+ _stopped: threading.Event
180
+ _queue: queue.Queue
181
+ _emulator: redirect.TerminalEmulator
182
+ _writer_thr: threading.Thread
183
+ _reader_thr: threading.Thread
184
+
185
+ def __init__(self, stream: str, sm: "SendManager"):
186
+ self._stopped = threading.Event()
187
+ self._queue = queue.Queue()
188
+ self._emulator = redirect.TerminalEmulator()
189
+ self._writer_thr = threading.Thread(
190
+ target=sm._output_raw_writer_thread,
191
+ kwargs=dict(stream=stream),
192
+ daemon=True,
193
+ name=f"OutRawWr-{stream}",
194
+ )
195
+ self._reader_thr = threading.Thread(
196
+ target=sm._output_raw_reader_thread,
197
+ kwargs=dict(stream=stream),
198
+ daemon=True,
199
+ name=f"OutRawRd-{stream}",
200
+ )
201
+
202
+ def start(self) -> None:
203
+ self._writer_thr.start()
204
+ self._reader_thr.start()
205
+
206
+
207
+ class SendManager:
208
+ UPDATE_CONFIG_TIME: int = 30
209
+ UPDATE_STATUS_TIME: int = 5
210
+
211
+ _settings: SettingsStatic
212
+ _record_q: "Queue[Record]"
213
+ _result_q: "Queue[Result]"
214
+ _interface: InterfaceQueue
215
+ _api_settings: Dict[str, str]
216
+ _partial_output: Dict[str, str]
217
+ _context_keeper: context.ContextKeeper
218
+
219
+ _telemetry_obj: telemetry.TelemetryRecord
220
+ _fs: Optional["file_stream.FileStreamApi"]
221
+ _run: Optional["RunRecord"]
222
+ _entity: Optional[str]
223
+ _project: Optional[str]
224
+ _dir_watcher: Optional["DirWatcher"]
225
+ _pusher: Optional["FilePusher"]
226
+ _record_exit: Optional["Record"]
227
+ _exit_result: Optional["RunExitResult"]
228
+ _resume_state: ResumeState
229
+ _rewind_response: Optional[Dict[str, Any]]
230
+ _cached_server_info: Dict[str, Any]
231
+ _cached_viewer: Dict[str, Any]
232
+ _server_messages: List[Dict[str, Any]]
233
+ _ds: Optional[datastore.DataStore]
234
+ _output_raw_streams: Dict["StreamLiterals", _OutputRawStream]
235
+ _output_raw_file: Optional[filesystem.CRDedupedFile]
236
+ _send_record_num: int
237
+ _send_end_offset: int
238
+ _debounce_config_time: float
239
+ _debounce_status_time: float
240
+
241
+ def __init__(
242
+ self,
243
+ settings: SettingsStatic,
244
+ record_q: "Queue[Record]",
245
+ result_q: "Queue[Result]",
246
+ interface: InterfaceQueue,
247
+ context_keeper: context.ContextKeeper,
248
+ ) -> None:
249
+ self._settings = settings
250
+ self._record_q = record_q
251
+ self._result_q = result_q
252
+ self._interface = interface
253
+ self._context_keeper = context_keeper
254
+
255
+ self._ds = None
256
+ self._send_record_num = 0
257
+ self._send_end_offset = 0
258
+
259
+ self._fs = None
260
+ self._pusher = None
261
+ self._dir_watcher = None
262
+
263
+ # State updated by login
264
+ self._entity = None
265
+ self._flags = None
266
+
267
+ # State updated by wandb.init
268
+ self._run = None
269
+ self._project = None
270
+
271
+ # keep track of config from key/val updates
272
+ self._consolidated_config = sender_config.ConfigState()
273
+
274
+ self._start_time: int = 0
275
+ self._telemetry_obj = telemetry.TelemetryRecord()
276
+ self._config_metric_pbdict_list: List[Dict[int, Any]] = []
277
+ self._metadata_summary: Dict[str, Any] = defaultdict()
278
+ self._cached_summary: Dict[str, Any] = dict()
279
+ self._config_metric_index_dict: Dict[str, int] = {}
280
+ self._config_metric_dict: Dict[str, wandb_internal_pb2.MetricRecord] = {}
281
+ self._consolidated_summary: Dict[str, Any] = dict()
282
+
283
+ self._cached_server_info = dict()
284
+ self._cached_viewer = dict()
285
+ self._server_messages = []
286
+
287
+ # State updated by resuming
288
+ self._resume_state = ResumeState()
289
+ self._rewind_response = None
290
+
291
+ # State added when run_exit is initiated and complete
292
+ self._record_exit = None
293
+ self._exit_result = None
294
+
295
+ self._api = internal_api.Api(
296
+ default_settings=settings, retry_callback=self.retry_callback
297
+ )
298
+ self._api_settings = dict()
299
+
300
+ # queue filled by retry_callback
301
+ self._retry_q: Queue[HttpResponse] = queue.Queue()
302
+
303
+ # do we need to debounce?
304
+ self._config_needs_debounce: bool = False
305
+
306
+ # TODO(jhr): do something better, why do we need to send full lines?
307
+ self._partial_output = dict()
308
+
309
+ self._exit_code = 0
310
+
311
+ # internal vars for handing raw console output
312
+ self._output_raw_streams = dict()
313
+ self._output_raw_file = None
314
+
315
+ # job builder
316
+ self._job_builder = JobBuilder(settings)
317
+
318
+ time_now = time.monotonic()
319
+ self._debounce_config_time = time_now
320
+ self._debounce_status_time = time_now
321
+
322
+ @classmethod
323
+ def setup(
324
+ cls,
325
+ root_dir: str,
326
+ resume: Union[None, bool, str],
327
+ ) -> "SendManager":
328
+ """Set up a standalone SendManager.
329
+
330
+ Currently, we're using this primarily for `sync.py`.
331
+ """
332
+ files_dir = os.path.join(root_dir, "files")
333
+ settings = wandb.Settings(
334
+ files_dir=files_dir,
335
+ root_dir=root_dir,
336
+ # _start_time=0,
337
+ resume=resume,
338
+ # ignore_globs=(),
339
+ _sync=True,
340
+ disable_job_creation=False,
341
+ _file_stream_timeout_seconds=0,
342
+ )
343
+ record_q: Queue[Record] = queue.Queue()
344
+ result_q: Queue[Result] = queue.Queue()
345
+ publish_interface = InterfaceQueue(record_q=record_q)
346
+ context_keeper = context.ContextKeeper()
347
+ return SendManager(
348
+ settings=SettingsStatic(settings.to_proto()),
349
+ record_q=record_q,
350
+ result_q=result_q,
351
+ interface=publish_interface,
352
+ context_keeper=context_keeper,
353
+ )
354
+
355
+ def __len__(self) -> int:
356
+ return self._record_q.qsize()
357
+
358
+ def __enter__(self) -> "SendManager":
359
+ return self
360
+
361
+ def __exit__(
362
+ self,
363
+ exc_type: Optional[Type[BaseException]],
364
+ exc_value: Optional[BaseException],
365
+ exc_traceback: Optional[traceback.TracebackException],
366
+ ) -> Literal[False]:
367
+ while self:
368
+ data = next(self)
369
+ self.send(data)
370
+ self.finish()
371
+ return False
372
+
373
+ def retry_callback(self, status: int, response_text: str) -> None:
374
+ response = wandb_internal_pb2.HttpResponse()
375
+ response.http_status_code = status
376
+ response.http_response_text = response_text
377
+ self._retry_q.put(response)
378
+
379
+ def send(self, record: "Record") -> None:
380
+ self._update_record_num(record.num)
381
+ self._update_end_offset(record.control.end_offset)
382
+
383
+ record_type = record.WhichOneof("record_type")
384
+ assert record_type
385
+ handler_str = "send_" + record_type
386
+ send_handler = getattr(self, handler_str, None)
387
+ # Don't log output to reduce log noise
388
+ if record_type not in {"output", "request", "output_raw"}:
389
+ logger.debug(f"send: {record_type}")
390
+ assert send_handler, f"unknown send handler: {handler_str}"
391
+
392
+ context_id = context.context_id_from_record(record)
393
+ api_context = self._context_keeper.get(context_id)
394
+ try:
395
+ self._api.set_local_context(api_context)
396
+ send_handler(record)
397
+ except ContextCancelledError:
398
+ logger.debug(f"Record cancelled: {record_type}")
399
+ self._context_keeper.release(context_id)
400
+ finally:
401
+ self._api.clear_local_context()
402
+
403
+ def send_preempting(self, _: "Record") -> None:
404
+ if self._fs:
405
+ self._fs.enqueue_preempting()
406
+
407
+ def send_request_sender_mark(self, _: "Record") -> None:
408
+ self._maybe_report_status(always=True)
409
+
410
+ def send_request(self, record: "Record") -> None:
411
+ request_type = record.request.WhichOneof("request_type")
412
+ assert request_type
413
+ handler_str = "send_request_" + request_type
414
+ send_handler = getattr(self, handler_str, None)
415
+ if request_type != "network_status":
416
+ logger.debug(f"send_request: {request_type}")
417
+ assert send_handler, f"unknown handle: {handler_str}"
418
+ send_handler(record)
419
+
420
+ def _respond_result(self, result: "Result") -> None:
421
+ tracelog.log_message_queue(result, self._result_q)
422
+ context_id = context.context_id_from_result(result)
423
+ self._context_keeper.release(context_id)
424
+ self._result_q.put(result)
425
+
426
+ def _flatten(self, dictionary: Dict) -> None:
427
+ if isinstance(dictionary, dict):
428
+ for k, v in list(dictionary.items()):
429
+ if isinstance(v, dict):
430
+ self._flatten(v)
431
+ dictionary.pop(k)
432
+ for k2, v2 in v.items():
433
+ dictionary[k + "." + k2] = v2
434
+
435
+ def _update_record_num(self, record_num: int) -> None:
436
+ if not record_num:
437
+ return
438
+ # Currently how we handle offline mode and syncing is not
439
+ # compatible with this assertion due to how the exit record
440
+ # is (mis)handled:
441
+ # - using "always_send" in offline mode to trigger defer
442
+ # state machine
443
+ # - skipping the exit record in `wandb sync` mode so that
444
+ # it is always executed as the last record
445
+ if not self._settings._offline and not self._settings._sync:
446
+ assert record_num == self._send_record_num + 1
447
+ self._send_record_num = record_num
448
+
449
+ def _update_end_offset(self, end_offset: int) -> None:
450
+ if not end_offset:
451
+ return
452
+ self._send_end_offset = end_offset
453
+
454
+ def send_request_sender_read(self, record: "Record") -> None:
455
+ if self._ds is None:
456
+ self._ds = datastore.DataStore()
457
+ self._ds.open_for_scan(self._settings.sync_file)
458
+
459
+ # TODO(cancel_paused): implement cancel_set logic
460
+ # The idea is that there is an active request to cancel a
461
+ # message that is being read from the transaction log below
462
+
463
+ start_offset = record.request.sender_read.start_offset
464
+ final_offset = record.request.sender_read.final_offset
465
+ self._ds.seek(start_offset)
466
+
467
+ current_end_offset = 0
468
+ while current_end_offset < final_offset:
469
+ data = self._ds.scan_data()
470
+ assert data
471
+ current_end_offset = self._ds.get_offset()
472
+
473
+ send_record = wandb_internal_pb2.Record()
474
+ send_record.ParseFromString(data)
475
+ self._update_end_offset(current_end_offset)
476
+ self.send(send_record)
477
+
478
+ # make sure we perform deferred operations
479
+ self.debounce()
480
+
481
+ # make sure that we always update writer for every sended read request
482
+ self._maybe_report_status(always=True)
483
+
484
+ def send_request_stop_status(self, record: "Record") -> None:
485
+ result = proto_util._result_from_record(record)
486
+ status_resp = result.response.stop_status_response
487
+ status_resp.run_should_stop = False
488
+ if self._entity and self._project and self._run and self._run.run_id:
489
+ try:
490
+ status_resp.run_should_stop = self._api.check_stop_requested(
491
+ self._project, self._entity, self._run.run_id
492
+ )
493
+ except Exception as e:
494
+ logger.warning("Failed to check stop requested status: %s", e)
495
+ self._respond_result(result)
496
+
497
+ def _maybe_update_config(self, always: bool = False) -> None:
498
+ time_now = time.monotonic()
499
+ if (
500
+ not always
501
+ and time_now < self._debounce_config_time + self.UPDATE_CONFIG_TIME
502
+ ):
503
+ return
504
+ if self._config_needs_debounce:
505
+ self._debounce_config()
506
+ self._debounce_config_time = time_now
507
+
508
+ def _maybe_report_status(self, always: bool = False) -> None:
509
+ time_now = time.monotonic()
510
+ if (
511
+ not always
512
+ and time_now < self._debounce_status_time + self.UPDATE_STATUS_TIME
513
+ ):
514
+ return
515
+ self._debounce_status_time = time_now
516
+
517
+ status_report = wandb_internal_pb2.StatusReportRequest(
518
+ record_num=self._send_record_num,
519
+ sent_offset=self._send_end_offset,
520
+ )
521
+ status_time = time.time()
522
+ status_report.sync_time.FromMicroseconds(int(status_time * 1e6))
523
+ record = self._interface._make_request(status_report=status_report)
524
+ self._interface._publish(record)
525
+
526
+ def debounce(self, final: bool = False) -> None:
527
+ self._maybe_report_status(always=final)
528
+ self._maybe_update_config(always=final)
529
+
530
+ def _debounce_config(self) -> None:
531
+ config_value_dict = self._config_backend_dict()
532
+ # TODO(jhr): check result of upsert_run?
533
+ if self._run:
534
+ self._api.upsert_run(
535
+ name=self._run.run_id,
536
+ config=config_value_dict,
537
+ **self._api_settings, # type: ignore
538
+ )
539
+ self._config_save(config_value_dict)
540
+ self._config_needs_debounce = False
541
+
542
+ def send_request_network_status(self, record: "Record") -> None:
543
+ result = proto_util._result_from_record(record)
544
+ status_resp = result.response.network_status_response
545
+ while True:
546
+ try:
547
+ status_resp.network_responses.append(self._retry_q.get_nowait())
548
+ except queue.Empty:
549
+ break
550
+ except Exception as e:
551
+ logger.warning(f"Error emptying retry queue: {e}")
552
+ self._respond_result(result)
553
+
554
+ def send_request_login(self, record: "Record") -> None:
555
+ # TODO: do something with api_key or anonymous?
556
+ # TODO: return an error if we aren't logged in?
557
+ self._api.reauth()
558
+ viewer = self.get_viewer_info()
559
+ server_info = self.get_server_info()
560
+ # self._login_flags = json.loads(viewer.get("flags", "{}"))
561
+ # self._login_entity = viewer.get("entity")
562
+ if server_info:
563
+ logger.info(f"Login server info: {server_info}")
564
+ self._entity = viewer.get("entity")
565
+ if record.control.req_resp:
566
+ result = proto_util._result_from_record(record)
567
+ if self._entity:
568
+ result.response.login_response.active_entity = self._entity
569
+ self._respond_result(result)
570
+
571
+ def send_exit(self, record: "Record") -> None:
572
+ # track where the exit came from
573
+ self._record_exit = record
574
+
575
+ run_exit = record.exit
576
+ self._exit_code = run_exit.exit_code
577
+ logger.info("handling exit code: %s", run_exit.exit_code)
578
+ runtime = run_exit.runtime
579
+ logger.info("handling runtime: %s", run_exit.runtime)
580
+ self._metadata_summary["runtime"] = runtime
581
+ self._update_summary()
582
+
583
+ # We need to give the request queue a chance to empty between states
584
+ # so use handle_request_defer as a state machine.
585
+ logger.info("send defer")
586
+ self._interface.publish_defer()
587
+
588
+ def send_final(self, record: "Record") -> None:
589
+ pass
590
+
591
+ def _flush_run(self) -> None:
592
+ pass
593
+
594
+ def send_request_status_report(self, record: "Record") -> None:
595
+ # todo? this is just a noop to please wandb sync
596
+ pass
597
+
598
+ def send_request_defer(self, record: "Record") -> None: # noqa: C901
599
+ defer = record.request.defer
600
+ state = defer.state
601
+ logger.info(f"handle sender defer: {state}")
602
+
603
+ def transition_state() -> None:
604
+ state = defer.state + 1
605
+ logger.info(f"send defer: {state}")
606
+ self._interface.publish_defer(state)
607
+
608
+ done = False
609
+ if state == defer.BEGIN:
610
+ transition_state()
611
+ elif state == defer.FLUSH_RUN:
612
+ self._flush_run()
613
+ transition_state()
614
+ elif state == defer.FLUSH_STATS:
615
+ # NOTE: this is handled in handler.py:handle_request_defer()
616
+ transition_state()
617
+ elif state == defer.FLUSH_PARTIAL_HISTORY:
618
+ # NOTE: this is handled in handler.py:handle_request_defer()
619
+ transition_state()
620
+ elif state == defer.FLUSH_TB:
621
+ # NOTE: this is handled in handler.py:handle_request_defer()
622
+ transition_state()
623
+ elif state == defer.FLUSH_SUM:
624
+ # NOTE: this is handled in handler.py:handle_request_defer()
625
+ transition_state()
626
+ elif state == defer.FLUSH_DEBOUNCER:
627
+ self.debounce(final=True)
628
+ transition_state()
629
+ elif state == defer.FLUSH_OUTPUT:
630
+ self._output_raw_finish()
631
+ transition_state()
632
+ elif state == defer.FLUSH_JOB:
633
+ self._flush_job()
634
+ transition_state()
635
+ elif state == defer.FLUSH_DIR:
636
+ if self._dir_watcher:
637
+ self._dir_watcher.finish()
638
+ self._dir_watcher = None
639
+ transition_state()
640
+ elif state == defer.FLUSH_FP:
641
+ if self._pusher:
642
+ # FilePusher generates some events for FileStreamApi, so we
643
+ # need to wait for pusher to finish before going to the next
644
+ # state to ensure that filestream gets all the events that we
645
+ # want before telling it to finish up
646
+ self._pusher.finish(transition_state)
647
+ else:
648
+ transition_state()
649
+ elif state == defer.JOIN_FP:
650
+ if self._pusher:
651
+ self._pusher.join()
652
+ transition_state()
653
+ elif state == defer.FLUSH_FS:
654
+ if self._fs:
655
+ # TODO(jhr): now is a good time to output pending output lines
656
+ self._fs.finish(self._exit_code)
657
+ self._fs = None
658
+ transition_state()
659
+ elif state == defer.FLUSH_FINAL:
660
+ self._interface.publish_final()
661
+ self._interface.publish_footer()
662
+ transition_state()
663
+ elif state == defer.END:
664
+ done = True
665
+ else:
666
+ raise AssertionError("unknown state")
667
+
668
+ if not done:
669
+ return
670
+
671
+ exit_result = wandb_internal_pb2.RunExitResult()
672
+
673
+ # mark exit done in case we are polling on exit
674
+ self._exit_result = exit_result
675
+
676
+ # Report response to mailbox
677
+ if self._record_exit and self._record_exit.control.mailbox_slot:
678
+ result = proto_util._result_from_record(self._record_exit)
679
+ result.exit_result.CopyFrom(exit_result)
680
+ self._respond_result(result)
681
+
682
+ def send_request_poll_exit(self, record: "Record") -> None:
683
+ if not record.control.req_resp and not record.control.mailbox_slot:
684
+ return
685
+
686
+ result = proto_util._result_from_record(record)
687
+
688
+ if self._pusher:
689
+ _alive, status = self._pusher.get_status()
690
+ file_counts = self._pusher.file_counts_by_category()
691
+ resp = result.response.poll_exit_response
692
+ resp.pusher_stats.uploaded_bytes = status.uploaded_bytes
693
+ resp.pusher_stats.total_bytes = status.total_bytes
694
+ resp.pusher_stats.deduped_bytes = status.deduped_bytes
695
+ resp.file_counts.wandb_count = file_counts.wandb
696
+ resp.file_counts.media_count = file_counts.media
697
+ resp.file_counts.artifact_count = file_counts.artifact
698
+ resp.file_counts.other_count = file_counts.other
699
+
700
+ if self._exit_result:
701
+ result.response.poll_exit_response.done = True
702
+ result.response.poll_exit_response.exit_result.CopyFrom(self._exit_result)
703
+
704
+ self._respond_result(result)
705
+
706
+ def _setup_resume(
707
+ self, run: "RunRecord"
708
+ ) -> Optional["wandb_internal_pb2.ErrorInfo"]:
709
+ """Queries the backend for a run; fail if the settings are incompatible."""
710
+ if not self._settings.resume:
711
+ return None
712
+
713
+ # TODO: This causes a race, we need to make the upsert atomically
714
+ # only create or update depending on the resume config
715
+ # we use the runs entity if set, otherwise fallback to users entity
716
+ # todo: ensure entity is not None as self._entity is Optional[str]
717
+ entity = run.entity or self._entity
718
+ logger.info(
719
+ "checking resume status for %s/%s/%s", entity, run.project, run.run_id
720
+ )
721
+ resume_status = self._api.run_resume_status(
722
+ entity=entity, # type: ignore
723
+ project_name=run.project,
724
+ name=run.run_id,
725
+ )
726
+ # No resume status = run does not exist; No t key in wandbConfig = run exists but hasn't been inited
727
+ if not resume_status or '"t":' not in resume_status.get("wandbConfig", ""):
728
+ if self._settings.resume == "must":
729
+ error = wandb_internal_pb2.ErrorInfo()
730
+ error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
731
+ error.message = (
732
+ "You provided an invalid value for the `resume` argument."
733
+ f" The value 'must' is not a valid option for resuming a run ({run.run_id}) that has not been initialized."
734
+ " Please check your inputs and try again with a valid run ID."
735
+ " If you are trying to start a new run, please omit the `resume` argument or use `resume='allow'`."
736
+ )
737
+ return error
738
+ return None
739
+
740
+ #
741
+ # handle cases where we have resume_status
742
+ #
743
+ if self._settings.resume == "never":
744
+ error = wandb_internal_pb2.ErrorInfo()
745
+ error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
746
+ error.message = (
747
+ "You provided an invalid value for the `resume` argument."
748
+ f" The value 'never' is not a valid option for resuming a run ({run.run_id}) that already exists."
749
+ " Please check your inputs and try again with a valid value for the `resume` argument."
750
+ )
751
+ return error
752
+
753
+ history = {}
754
+ events = {}
755
+ config = {}
756
+ summary = {}
757
+ try:
758
+ events_rt = 0
759
+ history_rt = 0
760
+ history = json.loads(resume_status["historyTail"])
761
+ if history:
762
+ history = json.loads(history[-1])
763
+ history_rt = history.get("_runtime", 0)
764
+ events = json.loads(resume_status["eventsTail"])
765
+ if events:
766
+ events = json.loads(events[-1])
767
+ events_rt = events.get("_runtime", 0)
768
+ config = json.loads(resume_status["config"] or "{}")
769
+ summary = json.loads(resume_status["summaryMetrics"] or "{}")
770
+ new_runtime = summary.get("_wandb", {}).get("runtime", None)
771
+ if new_runtime is not None:
772
+ self._resume_state.wandb_runtime = new_runtime
773
+ tags = resume_status.get("tags") or []
774
+
775
+ except (IndexError, ValueError) as e:
776
+ logger.error("unable to load resume tails", exc_info=e)
777
+ if self._settings.resume == "must":
778
+ error = wandb_internal_pb2.ErrorInfo()
779
+ error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
780
+ error.message = "resume='must' but could not resume ({}) ".format(
781
+ run.run_id
782
+ )
783
+ return error
784
+
785
+ # TODO: Do we need to restore config / summary?
786
+ # System metrics runtime is usually greater than history
787
+ self._resume_state.runtime = max(events_rt, history_rt)
788
+ last_step = history.get("_step", 0)
789
+ history_line_count = resume_status["historyLineCount"]
790
+ self._resume_state.step = last_step + 1 if history_line_count > 0 else last_step
791
+ self._resume_state.history = history_line_count
792
+ self._resume_state.events = resume_status["eventsLineCount"]
793
+ self._resume_state.output = resume_status["logLineCount"]
794
+ self._resume_state.config = config
795
+ self._resume_state.summary = summary
796
+ self._resume_state.tags = tags
797
+ self._resume_state.resumed = True
798
+ logger.info("configured resuming with: {}".format(self._resume_state))
799
+ return None
800
+
801
+ def _telemetry_get_framework(self) -> str:
802
+ """Get telemetry data for internal config structure."""
803
+ # detect framework by checking what is loaded
804
+ imports: telemetry.TelemetryImports
805
+ if self._telemetry_obj.HasField("imports_finish"):
806
+ imports = self._telemetry_obj.imports_finish
807
+ elif self._telemetry_obj.HasField("imports_init"):
808
+ imports = self._telemetry_obj.imports_init
809
+ else:
810
+ return ""
811
+ framework = next(
812
+ (n for f, n in _framework_priority() if getattr(imports, f, False)), ""
813
+ )
814
+ return framework
815
+
816
+ def _config_backend_dict(self) -> sender_config.BackendConfigDict:
817
+ config = self._consolidated_config or sender_config.ConfigState()
818
+
819
+ return config.to_backend_dict(
820
+ telemetry_record=self._telemetry_obj,
821
+ framework=self._telemetry_get_framework(),
822
+ start_time_millis=self._start_time,
823
+ metric_pbdicts=self._config_metric_pbdict_list,
824
+ )
825
+
826
+ def _config_save(
827
+ self,
828
+ config_value_dict: sender_config.BackendConfigDict,
829
+ ) -> None:
830
+ config_path = os.path.join(self._settings.files_dir, "config.yaml")
831
+ config_util.save_config_file_from_dict(config_path, config_value_dict)
832
+
833
+ def _sync_spell(self) -> None:
834
+ """Sync this run with spell."""
835
+ if not self._run:
836
+ return
837
+ try:
838
+ env = os.environ
839
+ self._interface.publish_config(
840
+ key=("_wandb", "spell_url"), val=env.get("SPELL_RUN_URL")
841
+ )
842
+ url = "{}/{}/{}/runs/{}".format(
843
+ self._api.app_url, self._run.entity, self._run.project, self._run.run_id
844
+ )
845
+ requests.put(
846
+ env.get("SPELL_API_URL", "https://api.spell.run") + "/wandb_url",
847
+ json={"access_token": env.get("WANDB_ACCESS_TOKEN"), "url": url},
848
+ timeout=2,
849
+ )
850
+ except requests.RequestException:
851
+ pass
852
+ # TODO: do something if sync spell is not successful?
853
+
854
+ def _setup_fork(self, server_run: dict):
855
+ assert self._settings.fork_from
856
+ assert self._settings.fork_from.metric == "_step"
857
+ assert self._run
858
+ first_step = int(self._settings.fork_from.value) + 1
859
+ self._resume_state.step = first_step
860
+ self._resume_state.history = server_run.get("historyLineCount", 0)
861
+ self._run.forked = True
862
+ self._run.starting_step = first_step
863
+
864
+ def _load_rewind_state(self, run: "RunRecord"):
865
+ assert self._settings.resume_from
866
+ self._rewind_response = self._api.rewind_run(
867
+ run_name=run.run_id,
868
+ entity=run.entity or None,
869
+ project=run.project or None,
870
+ metric_name=self._settings.resume_from.metric,
871
+ metric_value=self._settings.resume_from.value,
872
+ program_path=self._settings.program or None,
873
+ )
874
+ self._resume_state.history = self._rewind_response.get("historyLineCount", 0)
875
+ self._resume_state.config = json.loads(
876
+ self._rewind_response.get("config", "{}")
877
+ )
878
+
879
+ def _install_rewind_state(self):
880
+ assert self._settings.resume_from
881
+ assert self._settings.resume_from.metric == "_step"
882
+ assert self._run
883
+ assert self._rewind_response
884
+
885
+ first_step = int(self._settings.resume_from.value) + 1
886
+ self._resume_state.step = first_step
887
+
888
+ # We set the fork flag here because rewind uses the forking
889
+ # infrastructure under the hood. Setting `forked` here
890
+ # ensures that run._step is properly set in the user process.
891
+ self._run.forked = True
892
+ self._run.starting_step = first_step
893
+
894
+ def _handle_error(
895
+ self,
896
+ record: "Record",
897
+ error: "wandb_internal_pb2.ErrorInfo",
898
+ run: "RunRecord",
899
+ ) -> None:
900
+ if record.control.req_resp or record.control.mailbox_slot:
901
+ result = proto_util._result_from_record(record)
902
+ result.run_result.run.CopyFrom(run)
903
+ result.run_result.error.CopyFrom(error)
904
+ self._respond_result(result)
905
+ else:
906
+ logger.error("Got error in async mode: %s", error.message)
907
+
908
+ def send_run(self, record: "Record", file_dir: Optional[str] = None) -> None:
909
+ run = record.run
910
+ error = None
911
+ is_wandb_init = self._run is None
912
+
913
+ # save start time of a run
914
+ self._start_time = int(run.start_time.ToMicroseconds() // 1e6)
915
+
916
+ # update telemetry
917
+ if run.telemetry:
918
+ self._telemetry_obj.MergeFrom(run.telemetry)
919
+ if self._settings._sync:
920
+ self._telemetry_obj.feature.sync = True
921
+
922
+ # build config dict
923
+ config_value_dict: Optional[sender_config.BackendConfigDict] = None
924
+ if run.config:
925
+ self._consolidated_config.update_from_proto(run.config)
926
+ config_value_dict = self._config_backend_dict()
927
+ self._config_save(config_value_dict)
928
+
929
+ do_fork = self._settings.fork_from is not None and is_wandb_init
930
+ do_rewind = self._settings.resume_from is not None and is_wandb_init
931
+ do_resume = bool(self._settings.resume)
932
+
933
+ num_resume_options_set = sum([do_fork, do_rewind, do_resume])
934
+ if num_resume_options_set > 1:
935
+ error = wandb_internal_pb2.ErrorInfo()
936
+ error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
937
+ error.message = (
938
+ "Multiple resume options specified. "
939
+ "Please specify only one of `fork_from`, `resume`, or `resume_from`."
940
+ )
941
+ self._handle_error(record, error, run)
942
+
943
+ if is_wandb_init:
944
+ # Ensure we have a project to query for status
945
+ if run.project == "":
946
+ run.project = util.auto_project_name(self._settings.program)
947
+ # Only check resume status on `wandb.init`
948
+
949
+ if do_resume:
950
+ error = self._setup_resume(run)
951
+
952
+ elif do_rewind:
953
+ error = self._load_rewind_state(run)
954
+
955
+ if error is not None:
956
+ self._handle_error(record, error, run)
957
+ return
958
+
959
+ # Save the resumed config
960
+ if self._resume_state.config is not None:
961
+ self._consolidated_config.merge_resumed_config(
962
+ config_util.dict_strip_value_dict(self._resume_state.config)
963
+ )
964
+
965
+ config_value_dict = self._config_backend_dict()
966
+ self._config_save(config_value_dict)
967
+
968
+ # handle empty config
969
+ # TODO(jhr): consolidate the 4 ways config is built:
970
+ # (passed config, empty config, resume config, send_config)
971
+ if not config_value_dict:
972
+ config_value_dict = self._config_backend_dict()
973
+ self._config_save(config_value_dict)
974
+
975
+ try:
976
+ server_run = self._init_run(run, config_value_dict)
977
+ except (CommError, UsageError) as e:
978
+ logger.error(e, exc_info=True)
979
+ error = ProtobufErrorHandler.from_exception(e)
980
+ self._handle_error(record, error, run)
981
+ return
982
+
983
+ assert self._run # self._run is configured in _init_run()
984
+
985
+ if do_fork:
986
+ error = self._setup_fork(server_run)
987
+
988
+ if error is not None:
989
+ self._handle_error(record, error, run)
990
+ return
991
+
992
+ if record.control.req_resp or record.control.mailbox_slot:
993
+ result = proto_util._result_from_record(record)
994
+ # TODO: we could do self._interface.publish_defer(resp) to notify
995
+ # the handler not to actually perform server updates for this uuid
996
+ # because the user process will send a summary update when we resume
997
+ result.run_result.run.CopyFrom(self._run)
998
+ self._respond_result(result)
999
+
1000
+ # Only spin up our threads on the first run message
1001
+ if is_wandb_init:
1002
+ self._start_run_threads(file_dir)
1003
+ else:
1004
+ logger.info("updated run: %s", self._run.run_id)
1005
+
1006
+ def _update_resume_state(self, is_rewinding: bool, inserted: bool):
1007
+ assert self._run
1008
+ if self._resume_state.resumed:
1009
+ self._run.resumed = True
1010
+ if self._resume_state.wandb_runtime is not None:
1011
+ self._run.runtime = self._resume_state.wandb_runtime
1012
+ elif is_rewinding:
1013
+ # because is_rewinding is mutually exclusive with self._resume_state.resumed,
1014
+ # this block will always execute if is_rewinding is set
1015
+ self._install_rewind_state()
1016
+ else:
1017
+ # If the user is not resuming, and we didn't insert on upsert_run then
1018
+ # it is likely that we are overwriting the run which we might want to
1019
+ # prevent in the future. This could be a false signal since an upsert_run
1020
+ # message which gets retried in the network could also show up as not
1021
+ # inserted.
1022
+ if not inserted:
1023
+ # no need to flush this, it will get updated eventually
1024
+ self._telemetry_obj.feature.maybe_run_overwrite = True
1025
+
1026
+ def _init_run(
1027
+ self,
1028
+ run: "RunRecord",
1029
+ config_dict: Optional[sender_config.BackendConfigDict],
1030
+ ) -> dict:
1031
+ # We subtract the previous runs runtime when resuming
1032
+ start_time = (
1033
+ run.start_time.ToMicroseconds() / 1e6
1034
+ ) - self._resume_state.runtime
1035
+ # TODO: we don't check inserted currently, ultimately we should make
1036
+ # the upsert know the resume state and fail transactionally
1037
+
1038
+ if self._resume_state and self._resume_state.tags and not run.tags:
1039
+ run.tags.extend(self._resume_state.tags)
1040
+
1041
+ is_rewinding = bool(self._settings.resume_from)
1042
+ if is_rewinding:
1043
+ assert self._rewind_response
1044
+ server_run = self._rewind_response
1045
+ server_messages = None
1046
+ inserted = True
1047
+ else:
1048
+ server_run, inserted, server_messages = self._api.upsert_run(
1049
+ name=run.run_id,
1050
+ entity=run.entity or None,
1051
+ project=run.project or None,
1052
+ group=run.run_group or None,
1053
+ job_type=run.job_type or None,
1054
+ display_name=run.display_name or None,
1055
+ notes=run.notes or None,
1056
+ tags=run.tags[:] or None,
1057
+ config=config_dict or None,
1058
+ sweep_name=run.sweep_id or None,
1059
+ host=run.host or None,
1060
+ program_path=self._settings.program or None,
1061
+ repo=run.git.remote_url or None,
1062
+ commit=run.git.commit or None,
1063
+ )
1064
+
1065
+ # TODO: we don't want to create jobs in sweeps, since the
1066
+ # executable doesn't appear to be consistent
1067
+ if run.sweep_id:
1068
+ self._job_builder.disable = True
1069
+
1070
+ self._server_messages = server_messages or []
1071
+ self._run = run
1072
+
1073
+ if self._resume_state.resumed and is_rewinding:
1074
+ # this should not ever be possible to hit, since we check for
1075
+ # resumption above and raise an error if resumption is specified
1076
+ # twice.
1077
+ raise ValueError(
1078
+ "Cannot attempt to rewind and resume a run - only one of "
1079
+ "`resume` or `resume_from` can be specified."
1080
+ )
1081
+
1082
+ self._update_resume_state(is_rewinding, inserted)
1083
+ self._run.starting_step = self._resume_state.step
1084
+ self._run.start_time.FromMicroseconds(int(start_time * 1e6))
1085
+ self._run.config.CopyFrom(self._interface._make_config(config_dict))
1086
+ if self._resume_state.summary is not None:
1087
+ self._run.summary.CopyFrom(
1088
+ self._interface._make_summary_from_dict(self._resume_state.summary)
1089
+ )
1090
+ storage_id = server_run.get("id")
1091
+ if storage_id:
1092
+ self._run.storage_id = storage_id
1093
+ id = server_run.get("name")
1094
+ if id:
1095
+ self._api.set_current_run_id(id)
1096
+ display_name = server_run.get("displayName")
1097
+ if display_name:
1098
+ self._run.display_name = display_name
1099
+ project = server_run.get("project")
1100
+ # TODO: remove self._api.set_settings, and make self._project a property?
1101
+ if project:
1102
+ project_name = project.get("name")
1103
+ if project_name:
1104
+ self._run.project = project_name
1105
+ self._project = project_name
1106
+ self._api_settings["project"] = project_name
1107
+ self._api.set_setting("project", project_name)
1108
+ entity = project.get("entity")
1109
+ if entity:
1110
+ entity_name = entity.get("name")
1111
+ if entity_name:
1112
+ self._run.entity = entity_name
1113
+ self._entity = entity_name
1114
+ self._api_settings["entity"] = entity_name
1115
+ self._api.set_setting("entity", entity_name)
1116
+ sweep_id = server_run.get("sweepName")
1117
+ if sweep_id:
1118
+ self._run.sweep_id = sweep_id
1119
+ if os.getenv("SPELL_RUN_URL"):
1120
+ self._sync_spell()
1121
+ return server_run
1122
+
1123
+ def _start_run_threads(self, file_dir: Optional[str] = None) -> None:
1124
+ assert self._run # self._run is configured by caller
1125
+ self._fs = file_stream.FileStreamApi(
1126
+ self._api,
1127
+ self._run.run_id,
1128
+ self._run.start_time.ToMicroseconds() / 1e6,
1129
+ timeout=self._settings._file_stream_timeout_seconds,
1130
+ settings=self._api_settings,
1131
+ )
1132
+ # Ensure the streaming polices have the proper offsets
1133
+ self._fs.set_file_policy("wandb-summary.json", file_stream.SummaryFilePolicy())
1134
+ self._fs.set_file_policy(
1135
+ "wandb-history.jsonl",
1136
+ file_stream.JsonlFilePolicy(start_chunk_id=self._resume_state.history),
1137
+ )
1138
+ self._fs.set_file_policy(
1139
+ "wandb-events.jsonl",
1140
+ file_stream.JsonlFilePolicy(start_chunk_id=self._resume_state.events),
1141
+ )
1142
+ self._fs.set_file_policy(
1143
+ "output.log",
1144
+ file_stream.CRDedupeFilePolicy(start_chunk_id=self._resume_state.output),
1145
+ )
1146
+
1147
+ # hack to merge run_settings and self._settings object together
1148
+ # so that fields like entity or project are available to be attached to Sentry events.
1149
+ run_settings = message_to_dict(self._run)
1150
+ _settings = dict(self._settings)
1151
+ _settings.update(run_settings)
1152
+ wandb._sentry.configure_scope(tags=_settings, process_context="internal")
1153
+
1154
+ self._fs.start()
1155
+ self._pusher = FilePusher(self._api, self._fs, settings=self._settings)
1156
+ self._dir_watcher = DirWatcher(self._settings, self._pusher, file_dir)
1157
+ logger.info(
1158
+ "run started: %s with start time %s",
1159
+ self._run.run_id,
1160
+ self._run.start_time.ToMicroseconds() / 1e6,
1161
+ )
1162
+
1163
+ def _save_history(self, history_dict: Dict[str, Any]) -> None:
1164
+ if self._fs:
1165
+ self._fs.push(filenames.HISTORY_FNAME, json.dumps(history_dict))
1166
+
1167
+ def send_history(self, record: "Record") -> None:
1168
+ history = record.history
1169
+ history_dict = proto_util.dict_from_proto_list(history.item)
1170
+ self._save_history(history_dict)
1171
+
1172
+ def _update_summary_record(self, summary: "SummaryRecord") -> None:
1173
+ summary_dict = proto_util.dict_from_proto_list(summary.update)
1174
+ self._cached_summary = summary_dict
1175
+ self._update_summary()
1176
+
1177
+ def send_summary(self, record: "Record") -> None:
1178
+ self._update_summary_record(record.summary)
1179
+
1180
+ def send_request_summary_record(self, record: "Record") -> None:
1181
+ self._update_summary_record(record.request.summary_record.summary)
1182
+
1183
+ def _update_summary(self) -> None:
1184
+ summary_dict = self._cached_summary.copy()
1185
+ summary_dict.pop("_wandb", None)
1186
+ if self._metadata_summary:
1187
+ summary_dict["_wandb"] = self._metadata_summary
1188
+ # merge with consolidated summary
1189
+ self._consolidated_summary.update(summary_dict)
1190
+ json_summary = json.dumps(self._consolidated_summary)
1191
+ if self._fs:
1192
+ self._fs.push(filenames.SUMMARY_FNAME, json_summary)
1193
+ # TODO(jhr): we should only write this at the end of the script
1194
+ summary_path = os.path.join(self._settings.files_dir, filenames.SUMMARY_FNAME)
1195
+ with open(summary_path, "w") as f:
1196
+ f.write(json_summary)
1197
+ self._save_file(interface.GlobStr(filenames.SUMMARY_FNAME))
1198
+
1199
+ def send_stats(self, record: "Record") -> None:
1200
+ stats = record.stats
1201
+ if stats.stats_type != wandb_internal_pb2.StatsRecord.StatsType.SYSTEM:
1202
+ return
1203
+ if not self._fs:
1204
+ return
1205
+ if not self._run:
1206
+ return
1207
+ now_us = stats.timestamp.ToMicroseconds()
1208
+ start_us = self._run.start_time.ToMicroseconds()
1209
+ d = dict()
1210
+ for item in stats.item:
1211
+ d[item.key] = json.loads(item.value_json)
1212
+ row: Dict[str, Any] = dict(system=d)
1213
+ self._flatten(row)
1214
+ row["_wandb"] = True
1215
+ row["_timestamp"] = now_us / 1e6
1216
+ row["_runtime"] = (now_us - start_us) / 1e6
1217
+ self._fs.push(filenames.EVENTS_FNAME, json.dumps(row))
1218
+ # TODO(jhr): check fs.push results?
1219
+
1220
+ def _output_raw_finish(self) -> None:
1221
+ for stream, output_raw in self._output_raw_streams.items():
1222
+ output_raw._stopped.set()
1223
+
1224
+ # shut down threads
1225
+ output_raw._writer_thr.join(timeout=5)
1226
+ if output_raw._writer_thr.is_alive():
1227
+ logger.info("processing output...")
1228
+ output_raw._writer_thr.join()
1229
+ output_raw._reader_thr.join()
1230
+
1231
+ # flush output buffers and files
1232
+ self._output_raw_flush(stream)
1233
+ self._output_raw_streams = {}
1234
+ if self._output_raw_file:
1235
+ self._output_raw_file.close()
1236
+ self._output_raw_file = None
1237
+
1238
+ def _output_raw_writer_thread(self, stream: "StreamLiterals") -> None:
1239
+ while True:
1240
+ output_raw = self._output_raw_streams[stream]
1241
+ if output_raw._queue.empty():
1242
+ if output_raw._stopped.is_set():
1243
+ return
1244
+ time.sleep(0.5)
1245
+ continue
1246
+ data = []
1247
+ while not output_raw._queue.empty():
1248
+ data.append(output_raw._queue.get())
1249
+ if output_raw._stopped.is_set() and sum(map(len, data)) > 100000:
1250
+ logger.warning("Terminal output too large. Logging without processing.")
1251
+ self._output_raw_flush(stream)
1252
+ for line in data:
1253
+ self._output_raw_flush(stream, line)
1254
+ # TODO: lets mark that this happened in telemetry
1255
+ return
1256
+ try:
1257
+ output_raw._emulator.write("".join(data))
1258
+ except Exception as e:
1259
+ logger.warning(f"problem writing to output_raw emulator: {e}")
1260
+
1261
+ def _output_raw_reader_thread(self, stream: "StreamLiterals") -> None:
1262
+ output_raw = self._output_raw_streams[stream]
1263
+ while not (output_raw._stopped.is_set() and output_raw._queue.empty()):
1264
+ self._output_raw_flush(stream)
1265
+ time.sleep(_OUTPUT_MIN_CALLBACK_INTERVAL)
1266
+
1267
+ def _output_raw_flush(
1268
+ self, stream: "StreamLiterals", data: Optional[str] = None
1269
+ ) -> None:
1270
+ if data is None:
1271
+ output_raw = self._output_raw_streams[stream]
1272
+ try:
1273
+ data = output_raw._emulator.read()
1274
+ except Exception as e:
1275
+ logger.warning(f"problem reading from output_raw emulator: {e}")
1276
+ if data:
1277
+ self._send_output_line(stream, data)
1278
+ if self._output_raw_file:
1279
+ self._output_raw_file.write(data.encode("utf-8"))
1280
+
1281
+ def send_request_python_packages(self, record: "Record") -> None:
1282
+ import os
1283
+
1284
+ from wandb.sdk.lib.filenames import REQUIREMENTS_FNAME
1285
+
1286
+ installed_packages_list = sorted(
1287
+ f"{r.name}=={r.version}" for r in record.request.python_packages.package
1288
+ )
1289
+ with open(os.path.join(self._settings.files_dir, REQUIREMENTS_FNAME), "w") as f:
1290
+ f.write("\n".join(installed_packages_list))
1291
+
1292
+ def send_output(self, record: "Record") -> None:
1293
+ if not self._fs:
1294
+ return
1295
+ out = record.output
1296
+ stream: StreamLiterals = "stdout"
1297
+ if out.output_type == wandb_internal_pb2.OutputRecord.OutputType.STDERR:
1298
+ stream = "stderr"
1299
+ line = out.line
1300
+ self._send_output_line(stream, line)
1301
+
1302
+ def send_output_raw(self, record: "Record") -> None:
1303
+ if not self._fs:
1304
+ return
1305
+ out = record.output_raw
1306
+ stream: StreamLiterals = "stdout"
1307
+ if out.output_type == wandb_internal_pb2.OutputRawRecord.OutputType.STDERR:
1308
+ stream = "stderr"
1309
+ line = out.line
1310
+
1311
+ output_raw = self._output_raw_streams.get(stream)
1312
+ if not output_raw:
1313
+ output_raw = _OutputRawStream(stream=stream, sm=self)
1314
+ self._output_raw_streams[stream] = output_raw
1315
+
1316
+ # open the console output file shared between both streams
1317
+ if not self._output_raw_file:
1318
+ output_log_path = os.path.join(
1319
+ self._settings.files_dir, filenames.OUTPUT_FNAME
1320
+ )
1321
+ output_raw_file = None
1322
+ try:
1323
+ output_raw_file = filesystem.CRDedupedFile(
1324
+ open(output_log_path, "wb")
1325
+ )
1326
+ except OSError as e:
1327
+ logger.warning(f"could not open output_raw_file: {e}")
1328
+ if output_raw_file:
1329
+ self._output_raw_file = output_raw_file
1330
+ output_raw.start()
1331
+
1332
+ output_raw._queue.put(line)
1333
+
1334
+ def _send_output_line(self, stream: "StreamLiterals", line: str) -> None:
1335
+ """Combined writer for raw and non raw output lines.
1336
+
1337
+ This is combined because they are both post emulator.
1338
+ """
1339
+ prepend = ""
1340
+ if stream == "stderr":
1341
+ prepend = "ERROR "
1342
+ if not line.endswith("\n"):
1343
+ self._partial_output.setdefault(stream, "")
1344
+ if line.startswith("\r"):
1345
+ # TODO: maybe we shouldnt just drop this, what if there was some \ns in the partial
1346
+ # that should probably be the check instead of not line.endswith(\n")
1347
+ # logger.info(f"Dropping data {self._partial_output[stream]}")
1348
+ self._partial_output[stream] = ""
1349
+ self._partial_output[stream] += line
1350
+ # TODO(jhr): how do we make sure this gets flushed?
1351
+ # we might need this for other stuff like telemetry
1352
+ else:
1353
+ # TODO(jhr): use time from timestamp proto
1354
+ # TODO(jhr): do we need to make sure we write full lines?
1355
+ # seems to be some issues with line breaks
1356
+ cur_time = time.time()
1357
+ timestamp = datetime.utcfromtimestamp(cur_time).isoformat() + " "
1358
+ prev_str = self._partial_output.get(stream, "")
1359
+ line = f"{prepend}{timestamp}{prev_str}{line}"
1360
+ if self._fs:
1361
+ self._fs.push(filenames.OUTPUT_FNAME, line)
1362
+ self._partial_output[stream] = ""
1363
+
1364
+ def _update_config(self) -> None:
1365
+ self._config_needs_debounce = True
1366
+
1367
+ def send_config(self, record: "Record") -> None:
1368
+ self._consolidated_config.update_from_proto(record.config)
1369
+ self._update_config()
1370
+
1371
+ def send_metric(self, record: "Record") -> None:
1372
+ metric = record.metric
1373
+ if metric.glob_name:
1374
+ logger.warning("Seen metric with glob (shouldn't happen)")
1375
+ return
1376
+
1377
+ # merge or overwrite
1378
+ old_metric = self._config_metric_dict.get(
1379
+ metric.name, wandb_internal_pb2.MetricRecord()
1380
+ )
1381
+ if metric._control.overwrite:
1382
+ old_metric.CopyFrom(metric)
1383
+ else:
1384
+ old_metric.MergeFrom(metric)
1385
+ self._config_metric_dict[metric.name] = old_metric
1386
+ metric = old_metric
1387
+
1388
+ # convert step_metric to index
1389
+ if metric.step_metric:
1390
+ find_step_idx = self._config_metric_index_dict.get(metric.step_metric)
1391
+ if find_step_idx is not None:
1392
+ # make a copy of this metric as we will be modifying it
1393
+ rec = wandb_internal_pb2.Record()
1394
+ rec.metric.CopyFrom(metric)
1395
+ metric = rec.metric
1396
+
1397
+ metric.ClearField("step_metric")
1398
+ metric.step_metric_index = find_step_idx + 1
1399
+
1400
+ md: Dict[int, Any] = proto_util.proto_encode_to_dict(metric)
1401
+ find_idx = self._config_metric_index_dict.get(metric.name)
1402
+ if find_idx is not None:
1403
+ self._config_metric_pbdict_list[find_idx] = md
1404
+ else:
1405
+ next_idx = len(self._config_metric_pbdict_list)
1406
+ self._config_metric_pbdict_list.append(md)
1407
+ self._config_metric_index_dict[metric.name] = next_idx
1408
+ self._update_config()
1409
+
1410
+ def _update_telemetry_record(self, telemetry: telemetry.TelemetryRecord) -> None:
1411
+ self._telemetry_obj.MergeFrom(telemetry)
1412
+ self._update_config()
1413
+
1414
+ def send_telemetry(self, record: "Record") -> None:
1415
+ self._update_telemetry_record(record.telemetry)
1416
+
1417
+ def send_request_telemetry_record(self, record: "Record") -> None:
1418
+ self._update_telemetry_record(record.request.telemetry_record.telemetry)
1419
+
1420
+ def _save_file(
1421
+ self, fname: interface.GlobStr, policy: "interface.PolicyName" = "end"
1422
+ ) -> None:
1423
+ logger.info("saving file %s with policy %s", fname, policy)
1424
+ if self._dir_watcher:
1425
+ self._dir_watcher.update_policy(fname, policy)
1426
+
1427
+ def send_files(self, record: "Record") -> None:
1428
+ files = record.files
1429
+ for k in files.files:
1430
+ # TODO(jhr): fix paths with directories
1431
+ self._save_file(
1432
+ interface.GlobStr(k.path), interface.file_enum_to_policy(k.policy)
1433
+ )
1434
+
1435
+ def send_header(self, record: "Record") -> None:
1436
+ pass
1437
+
1438
+ def send_footer(self, record: "Record") -> None:
1439
+ pass
1440
+
1441
+ def send_tbrecord(self, record: "Record") -> None:
1442
+ # tbrecord watching threads are handled by handler.py
1443
+ pass
1444
+
1445
+ def send_request_link_artifact(self, record: "Record") -> None:
1446
+ if not (record.control.req_resp or record.control.mailbox_slot):
1447
+ raise ValueError(
1448
+ f"Expected either `req_resp` or `mailbox_slot`, got: {record.control!r}"
1449
+ )
1450
+ result = proto_util._result_from_record(record)
1451
+ link = record.request.link_artifact
1452
+ client_id = link.client_id
1453
+ server_id = link.server_id
1454
+ portfolio_name = link.portfolio_name
1455
+ entity = link.portfolio_entity
1456
+ project = link.portfolio_project
1457
+ aliases = link.portfolio_aliases
1458
+ logger.debug(
1459
+ f"link_artifact params - client_id={client_id}, server_id={server_id}, pfolio={portfolio_name}, entity={entity}, project={project}"
1460
+ )
1461
+ if (client_id or server_id) and portfolio_name and entity and project:
1462
+ try:
1463
+ self._api.link_artifact(
1464
+ client_id, server_id, portfolio_name, entity, project, aliases
1465
+ )
1466
+ except Exception as e:
1467
+ result.response.log_artifact_response.error_message = f'error linking artifact to "{entity}/{project}/{portfolio_name}"; error: {e}'
1468
+ logger.warning("Failed to link artifact to portfolio: %s", e)
1469
+ self._respond_result(result)
1470
+
1471
+ def send_use_artifact(self, record: "Record") -> None:
1472
+ """Pretend to send a used artifact.
1473
+
1474
+ This function doesn't actually send anything, it is just used internally.
1475
+ """
1476
+ use = record.use_artifact
1477
+
1478
+ if use.type == "job" and not use.partial.job_name:
1479
+ self._job_builder.disable = True
1480
+ elif use.partial.job_name:
1481
+ # job is partial, let job builder rebuild job, set job source dict
1482
+ self._job_builder.set_partial_source_id(use.id)
1483
+
1484
+ def send_request_log_artifact(self, record: "Record") -> None:
1485
+ assert record.control.req_resp
1486
+ result = proto_util._result_from_record(record)
1487
+ artifact = record.request.log_artifact.artifact
1488
+ history_step = record.request.log_artifact.history_step
1489
+
1490
+ try:
1491
+ res = self._send_artifact(artifact, history_step)
1492
+ assert res, "Unable to send artifact"
1493
+ result.response.log_artifact_response.artifact_id = res["id"]
1494
+ logger.info(f"logged artifact {artifact.name} - {res}")
1495
+ except Exception as e:
1496
+ result.response.log_artifact_response.error_message = (
1497
+ f'error logging artifact "{artifact.type}/{artifact.name}": {e}'
1498
+ )
1499
+
1500
+ self._respond_result(result)
1501
+
1502
+ def send_artifact(self, record: "Record") -> None:
1503
+ artifact = record.artifact
1504
+ try:
1505
+ res = self._send_artifact(artifact)
1506
+ logger.info(f"sent artifact {artifact.name} - {res}")
1507
+ except Exception as e:
1508
+ logger.error(
1509
+ 'send_artifact: failed for artifact "{}/{}": {}'.format(
1510
+ artifact.type, artifact.name, e
1511
+ )
1512
+ )
1513
+
1514
+ def _send_artifact(
1515
+ self, artifact: "ArtifactRecord", history_step: Optional[int] = None
1516
+ ) -> Optional[Dict]:
1517
+ from wandb.util import parse_version
1518
+
1519
+ assert self._pusher
1520
+ saver = ArtifactSaver(
1521
+ api=self._api,
1522
+ digest=artifact.digest,
1523
+ manifest_json=_manifest_json_from_proto(artifact.manifest),
1524
+ file_pusher=self._pusher,
1525
+ is_user_created=artifact.user_created,
1526
+ )
1527
+
1528
+ if artifact.distributed_id:
1529
+ max_cli_version = self._max_cli_version()
1530
+ if max_cli_version is None or parse_version(
1531
+ max_cli_version
1532
+ ) < parse_version("0.10.16"):
1533
+ logger.warning(
1534
+ "This W&B Server doesn't support distributed artifacts, "
1535
+ "have your administrator install wandb/local >= 0.9.37"
1536
+ )
1537
+ return None
1538
+
1539
+ metadata = json.loads(artifact.metadata) if artifact.metadata else None
1540
+ res = saver.save(
1541
+ type=artifact.type,
1542
+ name=artifact.name,
1543
+ client_id=artifact.client_id,
1544
+ sequence_client_id=artifact.sequence_client_id,
1545
+ metadata=metadata,
1546
+ ttl_duration_seconds=artifact.ttl_duration_seconds or None,
1547
+ description=artifact.description or None,
1548
+ aliases=artifact.aliases,
1549
+ tags=artifact.tags,
1550
+ use_after_commit=artifact.use_after_commit,
1551
+ distributed_id=artifact.distributed_id,
1552
+ finalize=artifact.finalize,
1553
+ incremental=artifact.incremental_beta1,
1554
+ history_step=history_step,
1555
+ base_id=artifact.base_id or None,
1556
+ )
1557
+
1558
+ self._job_builder._handle_server_artifact(res, artifact)
1559
+
1560
+ if artifact.manifest.manifest_file_path:
1561
+ with contextlib.suppress(FileNotFoundError):
1562
+ os.remove(artifact.manifest.manifest_file_path)
1563
+ return res
1564
+
1565
+ def send_alert(self, record: "Record") -> None:
1566
+ from wandb.util import parse_version
1567
+
1568
+ alert = record.alert
1569
+ max_cli_version = self._max_cli_version()
1570
+ if max_cli_version is None or parse_version(max_cli_version) < parse_version(
1571
+ "0.10.9"
1572
+ ):
1573
+ logger.warning(
1574
+ "This W&B server doesn't support alerts, "
1575
+ "have your administrator install wandb/local >= 0.9.31"
1576
+ )
1577
+ else:
1578
+ try:
1579
+ self._api.notify_scriptable_run_alert(
1580
+ title=alert.title,
1581
+ text=alert.text,
1582
+ level=alert.level,
1583
+ wait_duration=alert.wait_duration,
1584
+ )
1585
+ except Exception as e:
1586
+ logger.error(f"send_alert: failed for alert {alert.title!r}: {e}")
1587
+
1588
+ def finish(self) -> None:
1589
+ logger.info("shutting down sender")
1590
+ # if self._tb_watcher:
1591
+ # self._tb_watcher.finish()
1592
+ self._output_raw_finish()
1593
+ if self._dir_watcher:
1594
+ self._dir_watcher.finish()
1595
+ self._dir_watcher = None
1596
+ if self._pusher:
1597
+ self._pusher.finish()
1598
+ self._pusher.join()
1599
+ self._pusher = None
1600
+ if self._fs:
1601
+ self._fs.finish(self._exit_code)
1602
+ self._fs = None
1603
+ wandb._sentry.end_session()
1604
+
1605
+ def _max_cli_version(self) -> Optional[str]:
1606
+ server_info = self.get_server_info()
1607
+ max_cli_version = server_info.get("cliVersionInfo", {}).get(
1608
+ "max_cli_version", None
1609
+ )
1610
+ if not isinstance(max_cli_version, str):
1611
+ return None
1612
+ return max_cli_version
1613
+
1614
+ def get_viewer_server_info(self) -> None:
1615
+ if self._cached_server_info and self._cached_viewer:
1616
+ return
1617
+ self._cached_viewer, self._cached_server_info = self._api.viewer_server_info()
1618
+
1619
+ def get_viewer_info(self) -> Dict[str, Any]:
1620
+ if not self._cached_viewer:
1621
+ self.get_viewer_server_info()
1622
+ return self._cached_viewer
1623
+
1624
+ def get_server_info(self) -> Dict[str, Any]:
1625
+ if not self._cached_server_info:
1626
+ self.get_viewer_server_info()
1627
+ return self._cached_server_info
1628
+
1629
+ def get_local_info(self) -> "LocalInfo":
1630
+ """Queries the server to get the local version information.
1631
+
1632
+ First, we perform an introspection, if it returns empty we deduce that the
1633
+ docker image is out-of-date. Otherwise, we use the returned values to deduce the
1634
+ state of the local server.
1635
+ """
1636
+ local_info = wandb_internal_pb2.LocalInfo()
1637
+ if self._settings._offline:
1638
+ local_info.out_of_date = False
1639
+ return local_info
1640
+
1641
+ latest_local_version = "latest"
1642
+
1643
+ # Assuming the query is successful if the result is empty it indicates that
1644
+ # the backend is out of date since it doesn't have the desired field
1645
+ server_info = self.get_server_info()
1646
+ latest_local_version_info = server_info.get("latestLocalVersionInfo", {})
1647
+ if latest_local_version_info is None:
1648
+ local_info.out_of_date = False
1649
+ else:
1650
+ local_info.out_of_date = latest_local_version_info.get("outOfDate", True)
1651
+ local_info.version = latest_local_version_info.get(
1652
+ "latestVersionString", latest_local_version
1653
+ )
1654
+ return local_info
1655
+
1656
+ def _flush_job(self) -> None:
1657
+ if self._job_builder.disable or self._settings._offline:
1658
+ return
1659
+ self._job_builder.set_config(self._consolidated_config.non_internal_config())
1660
+ summary_dict = self._cached_summary.copy()
1661
+ summary_dict.pop("_wandb", None)
1662
+ self._job_builder.set_summary(summary_dict)
1663
+
1664
+ artifact = self._job_builder.build(api=self._api)
1665
+ if artifact is not None and self._run is not None:
1666
+ proto_artifact = self._interface._make_artifact(artifact)
1667
+ proto_artifact.run_id = self._run.run_id
1668
+ proto_artifact.project = self._run.project
1669
+ proto_artifact.entity = self._run.entity
1670
+ # TODO: this should be removed when the latest tag is handled
1671
+ # by the backend (WB-12116)
1672
+ proto_artifact.aliases.append("latest")
1673
+ # add docker image tag
1674
+ for alias in self._job_builder._aliases:
1675
+ proto_artifact.aliases.append(alias)
1676
+
1677
+ proto_artifact.user_created = True
1678
+ proto_artifact.use_after_commit = True
1679
+ proto_artifact.finalize = True
1680
+
1681
+ self._interface._publish_artifact(proto_artifact)
1682
+
1683
+ def __next__(self) -> "Record":
1684
+ return self._record_q.get(block=True)
1685
+
1686
+ next = __next__