wandb 0.21.2__py3-none-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (904) hide show
  1. package_readme.md +97 -0
  2. wandb/__init__.py +248 -0
  3. wandb/__init__.pyi +1230 -0
  4. wandb/__main__.py +3 -0
  5. wandb/_iterutils.py +65 -0
  6. wandb/_pydantic/__init__.py +30 -0
  7. wandb/_pydantic/base.py +128 -0
  8. wandb/_pydantic/utils.py +80 -0
  9. wandb/_pydantic/v1_compat.py +284 -0
  10. wandb/agents/__init__.py +0 -0
  11. wandb/agents/pyagent.py +386 -0
  12. wandb/analytics/__init__.py +3 -0
  13. wandb/analytics/sentry.py +267 -0
  14. wandb/apis/__init__.py +48 -0
  15. wandb/apis/attrs.py +50 -0
  16. wandb/apis/importers/__init__.py +1 -0
  17. wandb/apis/importers/internals/internal.py +382 -0
  18. wandb/apis/importers/internals/protocols.py +103 -0
  19. wandb/apis/importers/internals/util.py +78 -0
  20. wandb/apis/importers/mlflow.py +254 -0
  21. wandb/apis/importers/validation.py +108 -0
  22. wandb/apis/importers/wandb.py +1608 -0
  23. wandb/apis/internal.py +239 -0
  24. wandb/apis/normalize.py +81 -0
  25. wandb/apis/paginator.py +138 -0
  26. wandb/apis/public/__init__.py +35 -0
  27. wandb/apis/public/api.py +2449 -0
  28. wandb/apis/public/artifacts.py +1046 -0
  29. wandb/apis/public/automations.py +85 -0
  30. wandb/apis/public/const.py +4 -0
  31. wandb/apis/public/files.py +402 -0
  32. wandb/apis/public/history.py +201 -0
  33. wandb/apis/public/integrations.py +203 -0
  34. wandb/apis/public/jobs.py +742 -0
  35. wandb/apis/public/projects.py +276 -0
  36. wandb/apis/public/query_generator.py +176 -0
  37. wandb/apis/public/registries/__init__.py +0 -0
  38. wandb/apis/public/registries/_freezable_list.py +179 -0
  39. wandb/apis/public/registries/_utils.py +138 -0
  40. wandb/apis/public/registries/registries_search.py +347 -0
  41. wandb/apis/public/registries/registry.py +358 -0
  42. wandb/apis/public/reports.py +595 -0
  43. wandb/apis/public/runs.py +1216 -0
  44. wandb/apis/public/sweeps.py +440 -0
  45. wandb/apis/public/teams.py +235 -0
  46. wandb/apis/public/users.py +177 -0
  47. wandb/apis/public/utils.py +210 -0
  48. wandb/apis/reports/__init__.py +1 -0
  49. wandb/apis/reports/v1/__init__.py +8 -0
  50. wandb/apis/reports/v2/__init__.py +8 -0
  51. wandb/apis/workspaces/__init__.py +8 -0
  52. wandb/automations/__init__.py +73 -0
  53. wandb/automations/_filters/__init__.py +40 -0
  54. wandb/automations/_filters/expressions.py +181 -0
  55. wandb/automations/_filters/operators.py +258 -0
  56. wandb/automations/_filters/run_metrics.py +330 -0
  57. wandb/automations/_generated/__init__.py +177 -0
  58. wandb/automations/_generated/create_automation.py +17 -0
  59. wandb/automations/_generated/create_generic_webhook_integration.py +43 -0
  60. wandb/automations/_generated/delete_automation.py +15 -0
  61. wandb/automations/_generated/enums.py +35 -0
  62. wandb/automations/_generated/fragments.py +358 -0
  63. wandb/automations/_generated/generic_webhook_integrations_by_entity.py +22 -0
  64. wandb/automations/_generated/get_automations.py +24 -0
  65. wandb/automations/_generated/get_automations_by_entity.py +26 -0
  66. wandb/automations/_generated/input_types.py +104 -0
  67. wandb/automations/_generated/integrations_by_entity.py +22 -0
  68. wandb/automations/_generated/operations.py +647 -0
  69. wandb/automations/_generated/slack_integrations_by_entity.py +22 -0
  70. wandb/automations/_generated/update_automation.py +17 -0
  71. wandb/automations/_utils.py +235 -0
  72. wandb/automations/_validators.py +165 -0
  73. wandb/automations/actions.py +218 -0
  74. wandb/automations/automations.py +85 -0
  75. wandb/automations/events.py +285 -0
  76. wandb/automations/integrations.py +45 -0
  77. wandb/automations/scopes.py +78 -0
  78. wandb/beta/workflows.py +324 -0
  79. wandb/bin/gpu_stats +0 -0
  80. wandb/bin/wandb-core +0 -0
  81. wandb/cli/__init__.py +0 -0
  82. wandb/cli/beta.py +175 -0
  83. wandb/cli/cli.py +2883 -0
  84. wandb/data_types.py +66 -0
  85. wandb/docker/__init__.py +290 -0
  86. wandb/docker/names.py +40 -0
  87. wandb/docker/wandb-entrypoint.sh +33 -0
  88. wandb/env.py +535 -0
  89. wandb/errors/__init__.py +17 -0
  90. wandb/errors/errors.py +40 -0
  91. wandb/errors/links.py +73 -0
  92. wandb/errors/term.py +415 -0
  93. wandb/errors/util.py +57 -0
  94. wandb/errors/warnings.py +2 -0
  95. wandb/filesync/__init__.py +0 -0
  96. wandb/filesync/dir_watcher.py +404 -0
  97. wandb/filesync/stats.py +100 -0
  98. wandb/filesync/step_checksum.py +142 -0
  99. wandb/filesync/step_prepare.py +179 -0
  100. wandb/filesync/step_upload.py +287 -0
  101. wandb/filesync/upload_job.py +142 -0
  102. wandb/integration/__init__.py +0 -0
  103. wandb/integration/catboost/__init__.py +5 -0
  104. wandb/integration/catboost/catboost.py +182 -0
  105. wandb/integration/cohere/__init__.py +3 -0
  106. wandb/integration/cohere/cohere.py +21 -0
  107. wandb/integration/cohere/resolver.py +347 -0
  108. wandb/integration/diffusers/__init__.py +3 -0
  109. wandb/integration/diffusers/autologger.py +76 -0
  110. wandb/integration/diffusers/pipeline_resolver.py +50 -0
  111. wandb/integration/diffusers/resolvers/__init__.py +9 -0
  112. wandb/integration/diffusers/resolvers/multimodal.py +881 -0
  113. wandb/integration/diffusers/resolvers/utils.py +102 -0
  114. wandb/integration/fastai/__init__.py +243 -0
  115. wandb/integration/gym/__init__.py +98 -0
  116. wandb/integration/huggingface/__init__.py +3 -0
  117. wandb/integration/huggingface/huggingface.py +18 -0
  118. wandb/integration/huggingface/resolver.py +213 -0
  119. wandb/integration/keras/__init__.py +11 -0
  120. wandb/integration/keras/callbacks/__init__.py +5 -0
  121. wandb/integration/keras/callbacks/metrics_logger.py +129 -0
  122. wandb/integration/keras/callbacks/model_checkpoint.py +188 -0
  123. wandb/integration/keras/callbacks/tables_builder.py +228 -0
  124. wandb/integration/keras/keras.py +1086 -0
  125. wandb/integration/kfp/__init__.py +6 -0
  126. wandb/integration/kfp/helpers.py +28 -0
  127. wandb/integration/kfp/kfp_patch.py +335 -0
  128. wandb/integration/kfp/wandb_logging.py +182 -0
  129. wandb/integration/langchain/__init__.py +3 -0
  130. wandb/integration/langchain/wandb_tracer.py +49 -0
  131. wandb/integration/lightgbm/__init__.py +239 -0
  132. wandb/integration/lightning/__init__.py +0 -0
  133. wandb/integration/lightning/fabric/__init__.py +3 -0
  134. wandb/integration/lightning/fabric/logger.py +763 -0
  135. wandb/integration/metaflow/__init__.py +9 -0
  136. wandb/integration/metaflow/data_pandas.py +74 -0
  137. wandb/integration/metaflow/data_pytorch.py +75 -0
  138. wandb/integration/metaflow/data_sklearn.py +76 -0
  139. wandb/integration/metaflow/errors.py +13 -0
  140. wandb/integration/metaflow/metaflow.py +327 -0
  141. wandb/integration/openai/__init__.py +3 -0
  142. wandb/integration/openai/fine_tuning.py +480 -0
  143. wandb/integration/openai/openai.py +22 -0
  144. wandb/integration/openai/resolver.py +240 -0
  145. wandb/integration/prodigy/__init__.py +3 -0
  146. wandb/integration/prodigy/prodigy.py +291 -0
  147. wandb/integration/sacred/__init__.py +117 -0
  148. wandb/integration/sagemaker/__init__.py +14 -0
  149. wandb/integration/sagemaker/auth.py +29 -0
  150. wandb/integration/sagemaker/config.py +58 -0
  151. wandb/integration/sagemaker/files.py +2 -0
  152. wandb/integration/sagemaker/resources.py +63 -0
  153. wandb/integration/sb3/__init__.py +3 -0
  154. wandb/integration/sb3/sb3.py +147 -0
  155. wandb/integration/sklearn/__init__.py +37 -0
  156. wandb/integration/sklearn/calculate/__init__.py +32 -0
  157. wandb/integration/sklearn/calculate/calibration_curves.py +125 -0
  158. wandb/integration/sklearn/calculate/class_proportions.py +68 -0
  159. wandb/integration/sklearn/calculate/confusion_matrix.py +93 -0
  160. wandb/integration/sklearn/calculate/decision_boundaries.py +40 -0
  161. wandb/integration/sklearn/calculate/elbow_curve.py +55 -0
  162. wandb/integration/sklearn/calculate/feature_importances.py +67 -0
  163. wandb/integration/sklearn/calculate/learning_curve.py +64 -0
  164. wandb/integration/sklearn/calculate/outlier_candidates.py +69 -0
  165. wandb/integration/sklearn/calculate/residuals.py +86 -0
  166. wandb/integration/sklearn/calculate/silhouette.py +118 -0
  167. wandb/integration/sklearn/calculate/summary_metrics.py +62 -0
  168. wandb/integration/sklearn/plot/__init__.py +35 -0
  169. wandb/integration/sklearn/plot/classifier.py +329 -0
  170. wandb/integration/sklearn/plot/clusterer.py +146 -0
  171. wandb/integration/sklearn/plot/regressor.py +121 -0
  172. wandb/integration/sklearn/plot/shared.py +91 -0
  173. wandb/integration/sklearn/utils.py +184 -0
  174. wandb/integration/tensorboard/__init__.py +10 -0
  175. wandb/integration/tensorboard/log.py +351 -0
  176. wandb/integration/tensorboard/monkeypatch.py +186 -0
  177. wandb/integration/tensorflow/__init__.py +5 -0
  178. wandb/integration/tensorflow/estimator_hook.py +54 -0
  179. wandb/integration/torch/__init__.py +0 -0
  180. wandb/integration/torch/wandb_torch.py +554 -0
  181. wandb/integration/ultralytics/__init__.py +11 -0
  182. wandb/integration/ultralytics/bbox_utils.py +215 -0
  183. wandb/integration/ultralytics/callback.py +528 -0
  184. wandb/integration/ultralytics/classification_utils.py +83 -0
  185. wandb/integration/ultralytics/mask_utils.py +202 -0
  186. wandb/integration/ultralytics/pose_utils.py +103 -0
  187. wandb/integration/weave/__init__.py +6 -0
  188. wandb/integration/weave/interface.py +49 -0
  189. wandb/integration/weave/weave.py +63 -0
  190. wandb/integration/xgboost/__init__.py +11 -0
  191. wandb/integration/xgboost/xgboost.py +189 -0
  192. wandb/integration/yolov8/__init__.py +0 -0
  193. wandb/integration/yolov8/yolov8.py +284 -0
  194. wandb/jupyter.py +538 -0
  195. wandb/mpmain/__init__.py +0 -0
  196. wandb/mpmain/__main__.py +1 -0
  197. wandb/old/__init__.py +0 -0
  198. wandb/old/core.py +53 -0
  199. wandb/old/settings.py +176 -0
  200. wandb/old/summary.py +438 -0
  201. wandb/plot/__init__.py +30 -0
  202. wandb/plot/bar.py +71 -0
  203. wandb/plot/confusion_matrix.py +185 -0
  204. wandb/plot/custom_chart.py +147 -0
  205. wandb/plot/histogram.py +66 -0
  206. wandb/plot/line.py +75 -0
  207. wandb/plot/line_series.py +173 -0
  208. wandb/plot/pr_curve.py +186 -0
  209. wandb/plot/roc_curve.py +163 -0
  210. wandb/plot/scatter.py +66 -0
  211. wandb/plot/utils.py +184 -0
  212. wandb/plot/viz.py +41 -0
  213. wandb/proto/__init__.py +0 -0
  214. wandb/proto/v3/__init__.py +0 -0
  215. wandb/proto/v3/wandb_base_pb2.py +55 -0
  216. wandb/proto/v3/wandb_internal_pb2.py +1728 -0
  217. wandb/proto/v3/wandb_server_pb2.py +228 -0
  218. wandb/proto/v3/wandb_settings_pb2.py +122 -0
  219. wandb/proto/v3/wandb_telemetry_pb2.py +106 -0
  220. wandb/proto/v4/__init__.py +0 -0
  221. wandb/proto/v4/wandb_base_pb2.py +30 -0
  222. wandb/proto/v4/wandb_internal_pb2.py +382 -0
  223. wandb/proto/v4/wandb_server_pb2.py +67 -0
  224. wandb/proto/v4/wandb_settings_pb2.py +47 -0
  225. wandb/proto/v4/wandb_telemetry_pb2.py +41 -0
  226. wandb/proto/v5/wandb_base_pb2.py +31 -0
  227. wandb/proto/v5/wandb_internal_pb2.py +383 -0
  228. wandb/proto/v5/wandb_server_pb2.py +68 -0
  229. wandb/proto/v5/wandb_settings_pb2.py +48 -0
  230. wandb/proto/v5/wandb_telemetry_pb2.py +42 -0
  231. wandb/proto/v6/wandb_base_pb2.py +41 -0
  232. wandb/proto/v6/wandb_internal_pb2.py +393 -0
  233. wandb/proto/v6/wandb_server_pb2.py +78 -0
  234. wandb/proto/v6/wandb_settings_pb2.py +58 -0
  235. wandb/proto/v6/wandb_telemetry_pb2.py +52 -0
  236. wandb/proto/wandb_base_pb2.py +12 -0
  237. wandb/proto/wandb_deprecated.py +59 -0
  238. wandb/proto/wandb_generate_deprecated.py +30 -0
  239. wandb/proto/wandb_generate_proto.py +49 -0
  240. wandb/proto/wandb_internal_pb2.py +18 -0
  241. wandb/proto/wandb_server_pb2.py +12 -0
  242. wandb/proto/wandb_settings_pb2.py +12 -0
  243. wandb/proto/wandb_telemetry_pb2.py +12 -0
  244. wandb/py.typed +0 -0
  245. wandb/sdk/__init__.py +37 -0
  246. wandb/sdk/artifacts/__init__.py +0 -0
  247. wandb/sdk/artifacts/_factories.py +17 -0
  248. wandb/sdk/artifacts/_generated/__init__.py +508 -0
  249. wandb/sdk/artifacts/_generated/add_aliases.py +21 -0
  250. wandb/sdk/artifacts/_generated/artifact_by_id.py +17 -0
  251. wandb/sdk/artifacts/_generated/artifact_by_name.py +22 -0
  252. wandb/sdk/artifacts/_generated/artifact_collection_membership_file_urls.py +43 -0
  253. wandb/sdk/artifacts/_generated/artifact_collection_membership_files.py +43 -0
  254. wandb/sdk/artifacts/_generated/artifact_created_by.py +47 -0
  255. wandb/sdk/artifacts/_generated/artifact_file_urls.py +22 -0
  256. wandb/sdk/artifacts/_generated/artifact_type.py +31 -0
  257. wandb/sdk/artifacts/_generated/artifact_used_by.py +43 -0
  258. wandb/sdk/artifacts/_generated/artifact_version_files.py +36 -0
  259. wandb/sdk/artifacts/_generated/artifact_via_membership_by_name.py +26 -0
  260. wandb/sdk/artifacts/_generated/create_artifact_collection_tag_assignments.py +36 -0
  261. wandb/sdk/artifacts/_generated/delete_aliases.py +21 -0
  262. wandb/sdk/artifacts/_generated/delete_artifact.py +28 -0
  263. wandb/sdk/artifacts/_generated/delete_artifact_collection_tag_assignments.py +25 -0
  264. wandb/sdk/artifacts/_generated/delete_artifact_portfolio.py +35 -0
  265. wandb/sdk/artifacts/_generated/delete_artifact_sequence.py +35 -0
  266. wandb/sdk/artifacts/_generated/enums.py +22 -0
  267. wandb/sdk/artifacts/_generated/fetch_artifact_manifest.py +38 -0
  268. wandb/sdk/artifacts/_generated/fetch_linked_artifacts.py +67 -0
  269. wandb/sdk/artifacts/_generated/fetch_registries.py +32 -0
  270. wandb/sdk/artifacts/_generated/fragments.py +459 -0
  271. wandb/sdk/artifacts/_generated/input_types.py +46 -0
  272. wandb/sdk/artifacts/_generated/link_artifact.py +27 -0
  273. wandb/sdk/artifacts/_generated/move_artifact_collection.py +35 -0
  274. wandb/sdk/artifacts/_generated/operations.py +1223 -0
  275. wandb/sdk/artifacts/_generated/project_artifact_collection.py +101 -0
  276. wandb/sdk/artifacts/_generated/project_artifact_collections.py +33 -0
  277. wandb/sdk/artifacts/_generated/project_artifact_type.py +24 -0
  278. wandb/sdk/artifacts/_generated/project_artifact_types.py +24 -0
  279. wandb/sdk/artifacts/_generated/project_artifacts.py +42 -0
  280. wandb/sdk/artifacts/_generated/registry_collections.py +34 -0
  281. wandb/sdk/artifacts/_generated/registry_versions.py +34 -0
  282. wandb/sdk/artifacts/_generated/run_input_artifacts.py +51 -0
  283. wandb/sdk/artifacts/_generated/run_output_artifacts.py +51 -0
  284. wandb/sdk/artifacts/_generated/unlink_artifact.py +25 -0
  285. wandb/sdk/artifacts/_generated/update_artifact.py +26 -0
  286. wandb/sdk/artifacts/_generated/update_artifact_portfolio.py +35 -0
  287. wandb/sdk/artifacts/_generated/update_artifact_sequence.py +35 -0
  288. wandb/sdk/artifacts/_graphql_fragments.py +19 -0
  289. wandb/sdk/artifacts/_internal_artifact.py +54 -0
  290. wandb/sdk/artifacts/_validators.py +309 -0
  291. wandb/sdk/artifacts/artifact.py +2702 -0
  292. wandb/sdk/artifacts/artifact_download_logger.py +45 -0
  293. wandb/sdk/artifacts/artifact_file_cache.py +251 -0
  294. wandb/sdk/artifacts/artifact_instance_cache.py +17 -0
  295. wandb/sdk/artifacts/artifact_manifest.py +76 -0
  296. wandb/sdk/artifacts/artifact_manifest_entry.py +258 -0
  297. wandb/sdk/artifacts/artifact_manifests/__init__.py +0 -0
  298. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +94 -0
  299. wandb/sdk/artifacts/artifact_saver.py +277 -0
  300. wandb/sdk/artifacts/artifact_state.py +13 -0
  301. wandb/sdk/artifacts/artifact_ttl.py +9 -0
  302. wandb/sdk/artifacts/exceptions.py +71 -0
  303. wandb/sdk/artifacts/staging.py +27 -0
  304. wandb/sdk/artifacts/storage_handler.py +62 -0
  305. wandb/sdk/artifacts/storage_handlers/__init__.py +0 -0
  306. wandb/sdk/artifacts/storage_handlers/azure_handler.py +214 -0
  307. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +224 -0
  308. wandb/sdk/artifacts/storage_handlers/http_handler.py +114 -0
  309. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +142 -0
  310. wandb/sdk/artifacts/storage_handlers/multi_handler.py +56 -0
  311. wandb/sdk/artifacts/storage_handlers/s3_handler.py +339 -0
  312. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +68 -0
  313. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +131 -0
  314. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +74 -0
  315. wandb/sdk/artifacts/storage_layout.py +8 -0
  316. wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
  317. wandb/sdk/artifacts/storage_policies/register.py +1 -0
  318. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +580 -0
  319. wandb/sdk/artifacts/storage_policy.py +75 -0
  320. wandb/sdk/backend/__init__.py +0 -0
  321. wandb/sdk/backend/backend.py +57 -0
  322. wandb/sdk/data_types/__init__.py +0 -0
  323. wandb/sdk/data_types/_dtypes.py +914 -0
  324. wandb/sdk/data_types/_private.py +10 -0
  325. wandb/sdk/data_types/audio.py +208 -0
  326. wandb/sdk/data_types/base_types/__init__.py +0 -0
  327. wandb/sdk/data_types/base_types/json_metadata.py +55 -0
  328. wandb/sdk/data_types/base_types/media.py +339 -0
  329. wandb/sdk/data_types/base_types/wb_value.py +295 -0
  330. wandb/sdk/data_types/bokeh.py +87 -0
  331. wandb/sdk/data_types/graph.py +439 -0
  332. wandb/sdk/data_types/helper_types/__init__.py +0 -0
  333. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +327 -0
  334. wandb/sdk/data_types/helper_types/classes.py +159 -0
  335. wandb/sdk/data_types/helper_types/image_mask.py +251 -0
  336. wandb/sdk/data_types/histogram.py +107 -0
  337. wandb/sdk/data_types/html.py +165 -0
  338. wandb/sdk/data_types/image.py +974 -0
  339. wandb/sdk/data_types/molecule.py +250 -0
  340. wandb/sdk/data_types/object_3d.py +495 -0
  341. wandb/sdk/data_types/plotly.py +95 -0
  342. wandb/sdk/data_types/saved_model.py +435 -0
  343. wandb/sdk/data_types/table.py +1468 -0
  344. wandb/sdk/data_types/table_decorators.py +108 -0
  345. wandb/sdk/data_types/trace_tree.py +440 -0
  346. wandb/sdk/data_types/utils.py +260 -0
  347. wandb/sdk/data_types/video.py +303 -0
  348. wandb/sdk/integration_utils/__init__.py +0 -0
  349. wandb/sdk/integration_utils/auto_logging.py +232 -0
  350. wandb/sdk/integration_utils/data_logging.py +475 -0
  351. wandb/sdk/interface/__init__.py +0 -0
  352. wandb/sdk/interface/constants.py +4 -0
  353. wandb/sdk/interface/interface.py +1056 -0
  354. wandb/sdk/interface/interface_queue.py +40 -0
  355. wandb/sdk/interface/interface_shared.py +471 -0
  356. wandb/sdk/interface/interface_sock.py +49 -0
  357. wandb/sdk/interface/summary_record.py +67 -0
  358. wandb/sdk/internal/__init__.py +0 -0
  359. wandb/sdk/internal/_generated/__init__.py +15 -0
  360. wandb/sdk/internal/_generated/enums.py +4 -0
  361. wandb/sdk/internal/_generated/input_types.py +4 -0
  362. wandb/sdk/internal/_generated/operations.py +15 -0
  363. wandb/sdk/internal/_generated/server_features_query.py +27 -0
  364. wandb/sdk/internal/context.py +89 -0
  365. wandb/sdk/internal/datastore.py +293 -0
  366. wandb/sdk/internal/file_pusher.py +177 -0
  367. wandb/sdk/internal/file_stream.py +686 -0
  368. wandb/sdk/internal/handler.py +854 -0
  369. wandb/sdk/internal/incremental_table_util.py +53 -0
  370. wandb/sdk/internal/internal_api.py +4723 -0
  371. wandb/sdk/internal/job_builder.py +639 -0
  372. wandb/sdk/internal/profiler.py +79 -0
  373. wandb/sdk/internal/progress.py +77 -0
  374. wandb/sdk/internal/run.py +27 -0
  375. wandb/sdk/internal/sample.py +70 -0
  376. wandb/sdk/internal/sender.py +1692 -0
  377. wandb/sdk/internal/sender_config.py +203 -0
  378. wandb/sdk/internal/settings_static.py +120 -0
  379. wandb/sdk/internal/tb_watcher.py +519 -0
  380. wandb/sdk/internal/thread_local_settings.py +18 -0
  381. wandb/sdk/launch/__init__.py +15 -0
  382. wandb/sdk/launch/_launch.py +331 -0
  383. wandb/sdk/launch/_launch_add.py +255 -0
  384. wandb/sdk/launch/_project_spec.py +565 -0
  385. wandb/sdk/launch/agent/__init__.py +5 -0
  386. wandb/sdk/launch/agent/agent.py +931 -0
  387. wandb/sdk/launch/agent/config.py +296 -0
  388. wandb/sdk/launch/agent/job_status_tracker.py +55 -0
  389. wandb/sdk/launch/agent/run_queue_item_file_saver.py +39 -0
  390. wandb/sdk/launch/builder/__init__.py +0 -0
  391. wandb/sdk/launch/builder/abstract.py +156 -0
  392. wandb/sdk/launch/builder/build.py +296 -0
  393. wandb/sdk/launch/builder/context_manager.py +235 -0
  394. wandb/sdk/launch/builder/docker_builder.py +177 -0
  395. wandb/sdk/launch/builder/kaniko_builder.py +595 -0
  396. wandb/sdk/launch/builder/noop.py +58 -0
  397. wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +188 -0
  398. wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
  399. wandb/sdk/launch/create_job.py +541 -0
  400. wandb/sdk/launch/environment/abstract.py +29 -0
  401. wandb/sdk/launch/environment/aws_environment.py +322 -0
  402. wandb/sdk/launch/environment/azure_environment.py +105 -0
  403. wandb/sdk/launch/environment/gcp_environment.py +334 -0
  404. wandb/sdk/launch/environment/local_environment.py +65 -0
  405. wandb/sdk/launch/errors.py +13 -0
  406. wandb/sdk/launch/git_reference.py +109 -0
  407. wandb/sdk/launch/inputs/files.py +148 -0
  408. wandb/sdk/launch/inputs/internal.py +314 -0
  409. wandb/sdk/launch/inputs/manage.py +113 -0
  410. wandb/sdk/launch/inputs/schema.py +40 -0
  411. wandb/sdk/launch/loader.py +249 -0
  412. wandb/sdk/launch/registry/abstract.py +48 -0
  413. wandb/sdk/launch/registry/anon.py +29 -0
  414. wandb/sdk/launch/registry/azure_container_registry.py +124 -0
  415. wandb/sdk/launch/registry/elastic_container_registry.py +192 -0
  416. wandb/sdk/launch/registry/google_artifact_registry.py +219 -0
  417. wandb/sdk/launch/registry/local_registry.py +65 -0
  418. wandb/sdk/launch/runner/__init__.py +0 -0
  419. wandb/sdk/launch/runner/abstract.py +185 -0
  420. wandb/sdk/launch/runner/kubernetes_monitor.py +473 -0
  421. wandb/sdk/launch/runner/kubernetes_runner.py +1285 -0
  422. wandb/sdk/launch/runner/local_container.py +301 -0
  423. wandb/sdk/launch/runner/local_process.py +78 -0
  424. wandb/sdk/launch/runner/sagemaker_runner.py +424 -0
  425. wandb/sdk/launch/runner/vertex_runner.py +225 -0
  426. wandb/sdk/launch/sweeps/__init__.py +37 -0
  427. wandb/sdk/launch/sweeps/scheduler.py +739 -0
  428. wandb/sdk/launch/sweeps/scheduler_sweep.py +90 -0
  429. wandb/sdk/launch/sweeps/utils.py +324 -0
  430. wandb/sdk/launch/utils.py +746 -0
  431. wandb/sdk/launch/wandb_reference.py +138 -0
  432. wandb/sdk/lib/__init__.py +5 -0
  433. wandb/sdk/lib/apikey.py +334 -0
  434. wandb/sdk/lib/asyncio_compat.py +213 -0
  435. wandb/sdk/lib/asyncio_manager.py +252 -0
  436. wandb/sdk/lib/capped_dict.py +26 -0
  437. wandb/sdk/lib/config_util.py +101 -0
  438. wandb/sdk/lib/console_capture.py +219 -0
  439. wandb/sdk/lib/credentials.py +141 -0
  440. wandb/sdk/lib/deprecate.py +27 -0
  441. wandb/sdk/lib/disabled.py +30 -0
  442. wandb/sdk/lib/exit_hooks.py +54 -0
  443. wandb/sdk/lib/file_stream_utils.py +118 -0
  444. wandb/sdk/lib/filenames.py +64 -0
  445. wandb/sdk/lib/filesystem.py +372 -0
  446. wandb/sdk/lib/fsm.py +165 -0
  447. wandb/sdk/lib/gitlib.py +240 -0
  448. wandb/sdk/lib/gql_request.py +65 -0
  449. wandb/sdk/lib/handler_util.py +21 -0
  450. wandb/sdk/lib/hashutil.py +106 -0
  451. wandb/sdk/lib/import_hooks.py +275 -0
  452. wandb/sdk/lib/interrupt.py +37 -0
  453. wandb/sdk/lib/ipython.py +126 -0
  454. wandb/sdk/lib/json_util.py +75 -0
  455. wandb/sdk/lib/lazyloader.py +63 -0
  456. wandb/sdk/lib/module.py +72 -0
  457. wandb/sdk/lib/paths.py +106 -0
  458. wandb/sdk/lib/preinit.py +42 -0
  459. wandb/sdk/lib/printer.py +571 -0
  460. wandb/sdk/lib/printer_asyncio.py +48 -0
  461. wandb/sdk/lib/progress.py +320 -0
  462. wandb/sdk/lib/proto_util.py +90 -0
  463. wandb/sdk/lib/redirect.py +876 -0
  464. wandb/sdk/lib/retry.py +395 -0
  465. wandb/sdk/lib/run_moment.py +82 -0
  466. wandb/sdk/lib/runid.py +12 -0
  467. wandb/sdk/lib/server.py +58 -0
  468. wandb/sdk/lib/service/ipc_support.py +13 -0
  469. wandb/sdk/lib/service/service_client.py +106 -0
  470. wandb/sdk/lib/service/service_connection.py +192 -0
  471. wandb/sdk/lib/service/service_port_file.py +105 -0
  472. wandb/sdk/lib/service/service_process.py +111 -0
  473. wandb/sdk/lib/service/service_token.py +181 -0
  474. wandb/sdk/lib/sparkline.py +44 -0
  475. wandb/sdk/lib/telemetry.py +100 -0
  476. wandb/sdk/lib/timed_input.py +133 -0
  477. wandb/sdk/lib/timer.py +19 -0
  478. wandb/sdk/lib/wb_logging.py +161 -0
  479. wandb/sdk/mailbox/__init__.py +23 -0
  480. wandb/sdk/mailbox/mailbox.py +143 -0
  481. wandb/sdk/mailbox/mailbox_handle.py +132 -0
  482. wandb/sdk/mailbox/response_handle.py +99 -0
  483. wandb/sdk/mailbox/wait_with_progress.py +100 -0
  484. wandb/sdk/projects/_generated/__init__.py +47 -0
  485. wandb/sdk/projects/_generated/delete_project.py +22 -0
  486. wandb/sdk/projects/_generated/enums.py +4 -0
  487. wandb/sdk/projects/_generated/fetch_registry.py +22 -0
  488. wandb/sdk/projects/_generated/fragments.py +41 -0
  489. wandb/sdk/projects/_generated/input_types.py +13 -0
  490. wandb/sdk/projects/_generated/operations.py +88 -0
  491. wandb/sdk/projects/_generated/rename_project.py +27 -0
  492. wandb/sdk/projects/_generated/upsert_registry_project.py +27 -0
  493. wandb/sdk/verify/__init__.py +0 -0
  494. wandb/sdk/verify/verify.py +555 -0
  495. wandb/sdk/wandb_alerts.py +12 -0
  496. wandb/sdk/wandb_config.py +323 -0
  497. wandb/sdk/wandb_helper.py +54 -0
  498. wandb/sdk/wandb_init.py +1581 -0
  499. wandb/sdk/wandb_login.py +332 -0
  500. wandb/sdk/wandb_metric.py +112 -0
  501. wandb/sdk/wandb_require.py +88 -0
  502. wandb/sdk/wandb_require_helpers.py +44 -0
  503. wandb/sdk/wandb_run.py +4088 -0
  504. wandb/sdk/wandb_settings.py +2105 -0
  505. wandb/sdk/wandb_setup.py +560 -0
  506. wandb/sdk/wandb_summary.py +150 -0
  507. wandb/sdk/wandb_sweep.py +120 -0
  508. wandb/sdk/wandb_sync.py +71 -0
  509. wandb/sdk/wandb_watch.py +146 -0
  510. wandb/sklearn.py +35 -0
  511. wandb/sync/__init__.py +3 -0
  512. wandb/sync/sync.py +452 -0
  513. wandb/trigger.py +29 -0
  514. wandb/util.py +2040 -0
  515. wandb/vendor/__init__.py +0 -0
  516. wandb/vendor/gql-0.2.0/setup.py +40 -0
  517. wandb/vendor/gql-0.2.0/tests/__init__.py +0 -0
  518. wandb/vendor/gql-0.2.0/tests/starwars/__init__.py +0 -0
  519. wandb/vendor/gql-0.2.0/tests/starwars/fixtures.py +96 -0
  520. wandb/vendor/gql-0.2.0/tests/starwars/schema.py +146 -0
  521. wandb/vendor/gql-0.2.0/tests/starwars/test_dsl.py +293 -0
  522. wandb/vendor/gql-0.2.0/tests/starwars/test_query.py +355 -0
  523. wandb/vendor/gql-0.2.0/tests/starwars/test_validation.py +171 -0
  524. wandb/vendor/gql-0.2.0/tests/test_client.py +31 -0
  525. wandb/vendor/gql-0.2.0/tests/test_transport.py +89 -0
  526. wandb/vendor/gql-0.2.0/wandb_gql/__init__.py +4 -0
  527. wandb/vendor/gql-0.2.0/wandb_gql/client.py +75 -0
  528. wandb/vendor/gql-0.2.0/wandb_gql/dsl.py +152 -0
  529. wandb/vendor/gql-0.2.0/wandb_gql/gql.py +10 -0
  530. wandb/vendor/gql-0.2.0/wandb_gql/transport/__init__.py +0 -0
  531. wandb/vendor/gql-0.2.0/wandb_gql/transport/http.py +6 -0
  532. wandb/vendor/gql-0.2.0/wandb_gql/transport/local_schema.py +15 -0
  533. wandb/vendor/gql-0.2.0/wandb_gql/transport/requests.py +46 -0
  534. wandb/vendor/gql-0.2.0/wandb_gql/utils.py +21 -0
  535. wandb/vendor/graphql-core-1.1/setup.py +86 -0
  536. wandb/vendor/graphql-core-1.1/wandb_graphql/__init__.py +287 -0
  537. wandb/vendor/graphql-core-1.1/wandb_graphql/error/__init__.py +6 -0
  538. wandb/vendor/graphql-core-1.1/wandb_graphql/error/base.py +42 -0
  539. wandb/vendor/graphql-core-1.1/wandb_graphql/error/format_error.py +11 -0
  540. wandb/vendor/graphql-core-1.1/wandb_graphql/error/located_error.py +29 -0
  541. wandb/vendor/graphql-core-1.1/wandb_graphql/error/syntax_error.py +36 -0
  542. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/__init__.py +26 -0
  543. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/base.py +311 -0
  544. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executor.py +398 -0
  545. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/__init__.py +0 -0
  546. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/asyncio.py +53 -0
  547. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/gevent.py +22 -0
  548. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/process.py +32 -0
  549. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/sync.py +7 -0
  550. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/thread.py +35 -0
  551. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/executors/utils.py +6 -0
  552. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/__init__.py +0 -0
  553. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/executor.py +66 -0
  554. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/fragment.py +252 -0
  555. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/resolver.py +151 -0
  556. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/experimental/utils.py +7 -0
  557. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/middleware.py +57 -0
  558. wandb/vendor/graphql-core-1.1/wandb_graphql/execution/values.py +145 -0
  559. wandb/vendor/graphql-core-1.1/wandb_graphql/graphql.py +60 -0
  560. wandb/vendor/graphql-core-1.1/wandb_graphql/language/__init__.py +0 -0
  561. wandb/vendor/graphql-core-1.1/wandb_graphql/language/ast.py +1349 -0
  562. wandb/vendor/graphql-core-1.1/wandb_graphql/language/base.py +19 -0
  563. wandb/vendor/graphql-core-1.1/wandb_graphql/language/lexer.py +435 -0
  564. wandb/vendor/graphql-core-1.1/wandb_graphql/language/location.py +30 -0
  565. wandb/vendor/graphql-core-1.1/wandb_graphql/language/parser.py +779 -0
  566. wandb/vendor/graphql-core-1.1/wandb_graphql/language/printer.py +193 -0
  567. wandb/vendor/graphql-core-1.1/wandb_graphql/language/source.py +18 -0
  568. wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor.py +222 -0
  569. wandb/vendor/graphql-core-1.1/wandb_graphql/language/visitor_meta.py +82 -0
  570. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/__init__.py +0 -0
  571. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/cached_property.py +17 -0
  572. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/contain_subset.py +28 -0
  573. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/default_ordered_dict.py +40 -0
  574. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/ordereddict.py +8 -0
  575. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/pair_set.py +43 -0
  576. wandb/vendor/graphql-core-1.1/wandb_graphql/pyutils/version.py +78 -0
  577. wandb/vendor/graphql-core-1.1/wandb_graphql/type/__init__.py +67 -0
  578. wandb/vendor/graphql-core-1.1/wandb_graphql/type/definition.py +619 -0
  579. wandb/vendor/graphql-core-1.1/wandb_graphql/type/directives.py +132 -0
  580. wandb/vendor/graphql-core-1.1/wandb_graphql/type/introspection.py +440 -0
  581. wandb/vendor/graphql-core-1.1/wandb_graphql/type/scalars.py +131 -0
  582. wandb/vendor/graphql-core-1.1/wandb_graphql/type/schema.py +100 -0
  583. wandb/vendor/graphql-core-1.1/wandb_graphql/type/typemap.py +145 -0
  584. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/__init__.py +0 -0
  585. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/assert_valid_name.py +9 -0
  586. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_from_value.py +65 -0
  587. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_code.py +49 -0
  588. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/ast_to_dict.py +24 -0
  589. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/base.py +75 -0
  590. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_ast_schema.py +291 -0
  591. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/build_client_schema.py +250 -0
  592. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/concat_ast.py +9 -0
  593. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/extend_schema.py +357 -0
  594. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_field_def.py +27 -0
  595. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/get_operation_ast.py +21 -0
  596. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/introspection_query.py +90 -0
  597. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_literal_value.py +67 -0
  598. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/is_valid_value.py +66 -0
  599. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/quoted_or_list.py +21 -0
  600. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/schema_printer.py +168 -0
  601. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/suggestion_list.py +56 -0
  602. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_comparators.py +69 -0
  603. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_from_ast.py +21 -0
  604. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/type_info.py +149 -0
  605. wandb/vendor/graphql-core-1.1/wandb_graphql/utils/value_from_ast.py +69 -0
  606. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/__init__.py +4 -0
  607. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/__init__.py +79 -0
  608. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/arguments_of_correct_type.py +24 -0
  609. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/base.py +8 -0
  610. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/default_values_of_correct_type.py +44 -0
  611. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fields_on_correct_type.py +113 -0
  612. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/fragments_on_composite_types.py +33 -0
  613. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_argument_names.py +70 -0
  614. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_directives.py +97 -0
  615. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_fragment_names.py +19 -0
  616. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/known_type_names.py +43 -0
  617. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/lone_anonymous_operation.py +23 -0
  618. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_fragment_cycles.py +59 -0
  619. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_undefined_variables.py +36 -0
  620. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_fragments.py +38 -0
  621. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/no_unused_variables.py +37 -0
  622. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/overlapping_fields_can_be_merged.py +529 -0
  623. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/possible_fragment_spreads.py +44 -0
  624. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/provided_non_null_arguments.py +46 -0
  625. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/scalar_leafs.py +33 -0
  626. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_argument_names.py +32 -0
  627. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_fragment_names.py +28 -0
  628. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_input_field_names.py +33 -0
  629. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_operation_names.py +31 -0
  630. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/unique_variable_names.py +27 -0
  631. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_are_input_types.py +21 -0
  632. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/rules/variables_in_allowed_position.py +53 -0
  633. wandb/vendor/graphql-core-1.1/wandb_graphql/validation/validation.py +158 -0
  634. wandb/vendor/promise-2.3.0/conftest.py +30 -0
  635. wandb/vendor/promise-2.3.0/setup.py +64 -0
  636. wandb/vendor/promise-2.3.0/tests/__init__.py +0 -0
  637. wandb/vendor/promise-2.3.0/tests/conftest.py +8 -0
  638. wandb/vendor/promise-2.3.0/tests/test_awaitable.py +32 -0
  639. wandb/vendor/promise-2.3.0/tests/test_awaitable_35.py +47 -0
  640. wandb/vendor/promise-2.3.0/tests/test_benchmark.py +116 -0
  641. wandb/vendor/promise-2.3.0/tests/test_complex_threads.py +23 -0
  642. wandb/vendor/promise-2.3.0/tests/test_dataloader.py +452 -0
  643. wandb/vendor/promise-2.3.0/tests/test_dataloader_awaitable_35.py +99 -0
  644. wandb/vendor/promise-2.3.0/tests/test_dataloader_extra.py +65 -0
  645. wandb/vendor/promise-2.3.0/tests/test_extra.py +670 -0
  646. wandb/vendor/promise-2.3.0/tests/test_issues.py +132 -0
  647. wandb/vendor/promise-2.3.0/tests/test_promise_list.py +70 -0
  648. wandb/vendor/promise-2.3.0/tests/test_spec.py +584 -0
  649. wandb/vendor/promise-2.3.0/tests/test_thread_safety.py +115 -0
  650. wandb/vendor/promise-2.3.0/tests/utils.py +3 -0
  651. wandb/vendor/promise-2.3.0/wandb_promise/__init__.py +38 -0
  652. wandb/vendor/promise-2.3.0/wandb_promise/async_.py +135 -0
  653. wandb/vendor/promise-2.3.0/wandb_promise/compat.py +32 -0
  654. wandb/vendor/promise-2.3.0/wandb_promise/dataloader.py +326 -0
  655. wandb/vendor/promise-2.3.0/wandb_promise/iterate_promise.py +12 -0
  656. wandb/vendor/promise-2.3.0/wandb_promise/promise.py +848 -0
  657. wandb/vendor/promise-2.3.0/wandb_promise/promise_list.py +151 -0
  658. wandb/vendor/promise-2.3.0/wandb_promise/pyutils/__init__.py +0 -0
  659. wandb/vendor/promise-2.3.0/wandb_promise/pyutils/version.py +83 -0
  660. wandb/vendor/promise-2.3.0/wandb_promise/schedulers/__init__.py +0 -0
  661. wandb/vendor/promise-2.3.0/wandb_promise/schedulers/asyncio.py +22 -0
  662. wandb/vendor/promise-2.3.0/wandb_promise/schedulers/gevent.py +21 -0
  663. wandb/vendor/promise-2.3.0/wandb_promise/schedulers/immediate.py +27 -0
  664. wandb/vendor/promise-2.3.0/wandb_promise/schedulers/thread.py +18 -0
  665. wandb/vendor/promise-2.3.0/wandb_promise/utils.py +56 -0
  666. wandb/vendor/pygments/__init__.py +90 -0
  667. wandb/vendor/pygments/cmdline.py +568 -0
  668. wandb/vendor/pygments/console.py +74 -0
  669. wandb/vendor/pygments/filter.py +74 -0
  670. wandb/vendor/pygments/filters/__init__.py +350 -0
  671. wandb/vendor/pygments/formatter.py +95 -0
  672. wandb/vendor/pygments/formatters/__init__.py +153 -0
  673. wandb/vendor/pygments/formatters/_mapping.py +85 -0
  674. wandb/vendor/pygments/formatters/bbcode.py +109 -0
  675. wandb/vendor/pygments/formatters/html.py +851 -0
  676. wandb/vendor/pygments/formatters/img.py +600 -0
  677. wandb/vendor/pygments/formatters/irc.py +182 -0
  678. wandb/vendor/pygments/formatters/latex.py +482 -0
  679. wandb/vendor/pygments/formatters/other.py +160 -0
  680. wandb/vendor/pygments/formatters/rtf.py +147 -0
  681. wandb/vendor/pygments/formatters/svg.py +153 -0
  682. wandb/vendor/pygments/formatters/terminal.py +136 -0
  683. wandb/vendor/pygments/formatters/terminal256.py +309 -0
  684. wandb/vendor/pygments/lexer.py +871 -0
  685. wandb/vendor/pygments/lexers/__init__.py +329 -0
  686. wandb/vendor/pygments/lexers/_asy_builtins.py +1645 -0
  687. wandb/vendor/pygments/lexers/_cl_builtins.py +232 -0
  688. wandb/vendor/pygments/lexers/_cocoa_builtins.py +72 -0
  689. wandb/vendor/pygments/lexers/_csound_builtins.py +1346 -0
  690. wandb/vendor/pygments/lexers/_lasso_builtins.py +5327 -0
  691. wandb/vendor/pygments/lexers/_lua_builtins.py +295 -0
  692. wandb/vendor/pygments/lexers/_mapping.py +500 -0
  693. wandb/vendor/pygments/lexers/_mql_builtins.py +1172 -0
  694. wandb/vendor/pygments/lexers/_openedge_builtins.py +2547 -0
  695. wandb/vendor/pygments/lexers/_php_builtins.py +4756 -0
  696. wandb/vendor/pygments/lexers/_postgres_builtins.py +621 -0
  697. wandb/vendor/pygments/lexers/_scilab_builtins.py +3094 -0
  698. wandb/vendor/pygments/lexers/_sourcemod_builtins.py +1163 -0
  699. wandb/vendor/pygments/lexers/_stan_builtins.py +532 -0
  700. wandb/vendor/pygments/lexers/_stata_builtins.py +419 -0
  701. wandb/vendor/pygments/lexers/_tsql_builtins.py +1004 -0
  702. wandb/vendor/pygments/lexers/_vim_builtins.py +1939 -0
  703. wandb/vendor/pygments/lexers/actionscript.py +240 -0
  704. wandb/vendor/pygments/lexers/agile.py +24 -0
  705. wandb/vendor/pygments/lexers/algebra.py +221 -0
  706. wandb/vendor/pygments/lexers/ambient.py +76 -0
  707. wandb/vendor/pygments/lexers/ampl.py +87 -0
  708. wandb/vendor/pygments/lexers/apl.py +101 -0
  709. wandb/vendor/pygments/lexers/archetype.py +318 -0
  710. wandb/vendor/pygments/lexers/asm.py +641 -0
  711. wandb/vendor/pygments/lexers/automation.py +374 -0
  712. wandb/vendor/pygments/lexers/basic.py +500 -0
  713. wandb/vendor/pygments/lexers/bibtex.py +160 -0
  714. wandb/vendor/pygments/lexers/business.py +612 -0
  715. wandb/vendor/pygments/lexers/c_cpp.py +252 -0
  716. wandb/vendor/pygments/lexers/c_like.py +541 -0
  717. wandb/vendor/pygments/lexers/capnproto.py +78 -0
  718. wandb/vendor/pygments/lexers/chapel.py +102 -0
  719. wandb/vendor/pygments/lexers/clean.py +288 -0
  720. wandb/vendor/pygments/lexers/compiled.py +34 -0
  721. wandb/vendor/pygments/lexers/configs.py +833 -0
  722. wandb/vendor/pygments/lexers/console.py +114 -0
  723. wandb/vendor/pygments/lexers/crystal.py +393 -0
  724. wandb/vendor/pygments/lexers/csound.py +366 -0
  725. wandb/vendor/pygments/lexers/css.py +689 -0
  726. wandb/vendor/pygments/lexers/d.py +251 -0
  727. wandb/vendor/pygments/lexers/dalvik.py +125 -0
  728. wandb/vendor/pygments/lexers/data.py +555 -0
  729. wandb/vendor/pygments/lexers/diff.py +165 -0
  730. wandb/vendor/pygments/lexers/dotnet.py +691 -0
  731. wandb/vendor/pygments/lexers/dsls.py +878 -0
  732. wandb/vendor/pygments/lexers/dylan.py +289 -0
  733. wandb/vendor/pygments/lexers/ecl.py +125 -0
  734. wandb/vendor/pygments/lexers/eiffel.py +65 -0
  735. wandb/vendor/pygments/lexers/elm.py +121 -0
  736. wandb/vendor/pygments/lexers/erlang.py +533 -0
  737. wandb/vendor/pygments/lexers/esoteric.py +277 -0
  738. wandb/vendor/pygments/lexers/ezhil.py +69 -0
  739. wandb/vendor/pygments/lexers/factor.py +344 -0
  740. wandb/vendor/pygments/lexers/fantom.py +250 -0
  741. wandb/vendor/pygments/lexers/felix.py +273 -0
  742. wandb/vendor/pygments/lexers/forth.py +177 -0
  743. wandb/vendor/pygments/lexers/fortran.py +205 -0
  744. wandb/vendor/pygments/lexers/foxpro.py +428 -0
  745. wandb/vendor/pygments/lexers/functional.py +21 -0
  746. wandb/vendor/pygments/lexers/go.py +101 -0
  747. wandb/vendor/pygments/lexers/grammar_notation.py +213 -0
  748. wandb/vendor/pygments/lexers/graph.py +80 -0
  749. wandb/vendor/pygments/lexers/graphics.py +553 -0
  750. wandb/vendor/pygments/lexers/haskell.py +843 -0
  751. wandb/vendor/pygments/lexers/haxe.py +936 -0
  752. wandb/vendor/pygments/lexers/hdl.py +382 -0
  753. wandb/vendor/pygments/lexers/hexdump.py +103 -0
  754. wandb/vendor/pygments/lexers/html.py +602 -0
  755. wandb/vendor/pygments/lexers/idl.py +270 -0
  756. wandb/vendor/pygments/lexers/igor.py +288 -0
  757. wandb/vendor/pygments/lexers/inferno.py +96 -0
  758. wandb/vendor/pygments/lexers/installers.py +322 -0
  759. wandb/vendor/pygments/lexers/int_fiction.py +1343 -0
  760. wandb/vendor/pygments/lexers/iolang.py +63 -0
  761. wandb/vendor/pygments/lexers/j.py +146 -0
  762. wandb/vendor/pygments/lexers/javascript.py +1525 -0
  763. wandb/vendor/pygments/lexers/julia.py +333 -0
  764. wandb/vendor/pygments/lexers/jvm.py +1573 -0
  765. wandb/vendor/pygments/lexers/lisp.py +2621 -0
  766. wandb/vendor/pygments/lexers/make.py +202 -0
  767. wandb/vendor/pygments/lexers/markup.py +595 -0
  768. wandb/vendor/pygments/lexers/math.py +21 -0
  769. wandb/vendor/pygments/lexers/matlab.py +663 -0
  770. wandb/vendor/pygments/lexers/ml.py +769 -0
  771. wandb/vendor/pygments/lexers/modeling.py +358 -0
  772. wandb/vendor/pygments/lexers/modula2.py +1561 -0
  773. wandb/vendor/pygments/lexers/monte.py +204 -0
  774. wandb/vendor/pygments/lexers/ncl.py +894 -0
  775. wandb/vendor/pygments/lexers/nimrod.py +159 -0
  776. wandb/vendor/pygments/lexers/nit.py +64 -0
  777. wandb/vendor/pygments/lexers/nix.py +136 -0
  778. wandb/vendor/pygments/lexers/oberon.py +105 -0
  779. wandb/vendor/pygments/lexers/objective.py +504 -0
  780. wandb/vendor/pygments/lexers/ooc.py +85 -0
  781. wandb/vendor/pygments/lexers/other.py +41 -0
  782. wandb/vendor/pygments/lexers/parasail.py +79 -0
  783. wandb/vendor/pygments/lexers/parsers.py +835 -0
  784. wandb/vendor/pygments/lexers/pascal.py +644 -0
  785. wandb/vendor/pygments/lexers/pawn.py +199 -0
  786. wandb/vendor/pygments/lexers/perl.py +620 -0
  787. wandb/vendor/pygments/lexers/php.py +267 -0
  788. wandb/vendor/pygments/lexers/praat.py +294 -0
  789. wandb/vendor/pygments/lexers/prolog.py +306 -0
  790. wandb/vendor/pygments/lexers/python.py +939 -0
  791. wandb/vendor/pygments/lexers/qvt.py +152 -0
  792. wandb/vendor/pygments/lexers/r.py +453 -0
  793. wandb/vendor/pygments/lexers/rdf.py +270 -0
  794. wandb/vendor/pygments/lexers/rebol.py +431 -0
  795. wandb/vendor/pygments/lexers/resource.py +85 -0
  796. wandb/vendor/pygments/lexers/rnc.py +67 -0
  797. wandb/vendor/pygments/lexers/roboconf.py +82 -0
  798. wandb/vendor/pygments/lexers/robotframework.py +560 -0
  799. wandb/vendor/pygments/lexers/ruby.py +519 -0
  800. wandb/vendor/pygments/lexers/rust.py +220 -0
  801. wandb/vendor/pygments/lexers/sas.py +228 -0
  802. wandb/vendor/pygments/lexers/scripting.py +1222 -0
  803. wandb/vendor/pygments/lexers/shell.py +794 -0
  804. wandb/vendor/pygments/lexers/smalltalk.py +195 -0
  805. wandb/vendor/pygments/lexers/smv.py +79 -0
  806. wandb/vendor/pygments/lexers/snobol.py +83 -0
  807. wandb/vendor/pygments/lexers/special.py +103 -0
  808. wandb/vendor/pygments/lexers/sql.py +681 -0
  809. wandb/vendor/pygments/lexers/stata.py +108 -0
  810. wandb/vendor/pygments/lexers/supercollider.py +90 -0
  811. wandb/vendor/pygments/lexers/tcl.py +145 -0
  812. wandb/vendor/pygments/lexers/templates.py +2283 -0
  813. wandb/vendor/pygments/lexers/testing.py +207 -0
  814. wandb/vendor/pygments/lexers/text.py +25 -0
  815. wandb/vendor/pygments/lexers/textedit.py +169 -0
  816. wandb/vendor/pygments/lexers/textfmts.py +297 -0
  817. wandb/vendor/pygments/lexers/theorem.py +458 -0
  818. wandb/vendor/pygments/lexers/trafficscript.py +54 -0
  819. wandb/vendor/pygments/lexers/typoscript.py +226 -0
  820. wandb/vendor/pygments/lexers/urbi.py +133 -0
  821. wandb/vendor/pygments/lexers/varnish.py +190 -0
  822. wandb/vendor/pygments/lexers/verification.py +111 -0
  823. wandb/vendor/pygments/lexers/web.py +24 -0
  824. wandb/vendor/pygments/lexers/webmisc.py +988 -0
  825. wandb/vendor/pygments/lexers/whiley.py +116 -0
  826. wandb/vendor/pygments/lexers/x10.py +69 -0
  827. wandb/vendor/pygments/modeline.py +44 -0
  828. wandb/vendor/pygments/plugin.py +68 -0
  829. wandb/vendor/pygments/regexopt.py +92 -0
  830. wandb/vendor/pygments/scanner.py +105 -0
  831. wandb/vendor/pygments/sphinxext.py +158 -0
  832. wandb/vendor/pygments/style.py +155 -0
  833. wandb/vendor/pygments/styles/__init__.py +80 -0
  834. wandb/vendor/pygments/styles/abap.py +29 -0
  835. wandb/vendor/pygments/styles/algol.py +63 -0
  836. wandb/vendor/pygments/styles/algol_nu.py +63 -0
  837. wandb/vendor/pygments/styles/arduino.py +98 -0
  838. wandb/vendor/pygments/styles/autumn.py +65 -0
  839. wandb/vendor/pygments/styles/borland.py +51 -0
  840. wandb/vendor/pygments/styles/bw.py +49 -0
  841. wandb/vendor/pygments/styles/colorful.py +81 -0
  842. wandb/vendor/pygments/styles/default.py +73 -0
  843. wandb/vendor/pygments/styles/emacs.py +72 -0
  844. wandb/vendor/pygments/styles/friendly.py +72 -0
  845. wandb/vendor/pygments/styles/fruity.py +42 -0
  846. wandb/vendor/pygments/styles/igor.py +29 -0
  847. wandb/vendor/pygments/styles/lovelace.py +97 -0
  848. wandb/vendor/pygments/styles/manni.py +75 -0
  849. wandb/vendor/pygments/styles/monokai.py +106 -0
  850. wandb/vendor/pygments/styles/murphy.py +80 -0
  851. wandb/vendor/pygments/styles/native.py +65 -0
  852. wandb/vendor/pygments/styles/paraiso_dark.py +125 -0
  853. wandb/vendor/pygments/styles/paraiso_light.py +125 -0
  854. wandb/vendor/pygments/styles/pastie.py +75 -0
  855. wandb/vendor/pygments/styles/perldoc.py +69 -0
  856. wandb/vendor/pygments/styles/rainbow_dash.py +89 -0
  857. wandb/vendor/pygments/styles/rrt.py +33 -0
  858. wandb/vendor/pygments/styles/sas.py +44 -0
  859. wandb/vendor/pygments/styles/stata.py +40 -0
  860. wandb/vendor/pygments/styles/tango.py +141 -0
  861. wandb/vendor/pygments/styles/trac.py +63 -0
  862. wandb/vendor/pygments/styles/vim.py +63 -0
  863. wandb/vendor/pygments/styles/vs.py +38 -0
  864. wandb/vendor/pygments/styles/xcode.py +51 -0
  865. wandb/vendor/pygments/token.py +213 -0
  866. wandb/vendor/pygments/unistring.py +217 -0
  867. wandb/vendor/pygments/util.py +388 -0
  868. wandb/vendor/watchdog_0_9_0/wandb_watchdog/__init__.py +17 -0
  869. wandb/vendor/watchdog_0_9_0/wandb_watchdog/events.py +615 -0
  870. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/__init__.py +98 -0
  871. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/api.py +369 -0
  872. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents.py +172 -0
  873. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/fsevents2.py +239 -0
  874. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify.py +218 -0
  875. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_buffer.py +81 -0
  876. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/inotify_c.py +575 -0
  877. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/kqueue.py +730 -0
  878. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/polling.py +145 -0
  879. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/read_directory_changes.py +133 -0
  880. wandb/vendor/watchdog_0_9_0/wandb_watchdog/observers/winapi.py +348 -0
  881. wandb/vendor/watchdog_0_9_0/wandb_watchdog/patterns.py +265 -0
  882. wandb/vendor/watchdog_0_9_0/wandb_watchdog/tricks/__init__.py +174 -0
  883. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/__init__.py +151 -0
  884. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/bricks.py +249 -0
  885. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/compat.py +29 -0
  886. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/decorators.py +198 -0
  887. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/delayed_queue.py +88 -0
  888. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/dirsnapshot.py +293 -0
  889. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/echo.py +157 -0
  890. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/event_backport.py +41 -0
  891. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/importlib2.py +40 -0
  892. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/platform.py +57 -0
  893. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/unicode_paths.py +64 -0
  894. wandb/vendor/watchdog_0_9_0/wandb_watchdog/utils/win32stat.py +123 -0
  895. wandb/vendor/watchdog_0_9_0/wandb_watchdog/version.py +28 -0
  896. wandb/vendor/watchdog_0_9_0/wandb_watchdog/watchmedo.py +577 -0
  897. wandb/wandb_agent.py +580 -0
  898. wandb/wandb_controller.py +719 -0
  899. wandb/wandb_run.py +8 -0
  900. wandb-0.21.2.dist-info/METADATA +223 -0
  901. wandb-0.21.2.dist-info/RECORD +904 -0
  902. wandb-0.21.2.dist-info/WHEEL +4 -0
  903. wandb-0.21.2.dist-info/entry_points.txt +3 -0
  904. wandb-0.21.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1692 @@
1
+ """sender."""
2
+
3
+ import contextlib
4
+ import glob
5
+ import gzip
6
+ import json
7
+ import logging
8
+ import os
9
+ import queue
10
+ import threading
11
+ import time
12
+ import traceback
13
+ from collections import defaultdict
14
+ from datetime import datetime
15
+ from queue import Queue
16
+ from typing import (
17
+ TYPE_CHECKING,
18
+ Any,
19
+ Dict,
20
+ Generator,
21
+ List,
22
+ Literal,
23
+ Optional,
24
+ Tuple,
25
+ Type,
26
+ Union,
27
+ )
28
+
29
+ import requests
30
+
31
+ import wandb
32
+ from wandb import util
33
+ from wandb.errors import CommError, UsageError
34
+ from wandb.errors.util import ProtobufErrorHandler
35
+ from wandb.filesync.dir_watcher import DirWatcher
36
+ from wandb.proto import wandb_internal_pb2
37
+ from wandb.sdk.artifacts.artifact_saver import ArtifactSaver
38
+ from wandb.sdk.interface import interface
39
+ from wandb.sdk.interface.interface_queue import InterfaceQueue
40
+ from wandb.sdk.internal import (
41
+ context,
42
+ datastore,
43
+ file_stream,
44
+ internal_api,
45
+ sender_config,
46
+ )
47
+ from wandb.sdk.internal.file_pusher import FilePusher
48
+ from wandb.sdk.internal.job_builder import JobBuilder
49
+ from wandb.sdk.internal.settings_static import SettingsStatic
50
+ from wandb.sdk.lib import (
51
+ config_util,
52
+ filenames,
53
+ filesystem,
54
+ proto_util,
55
+ redirect,
56
+ retry,
57
+ telemetry,
58
+ )
59
+ from wandb.sdk.lib.proto_util import message_to_dict
60
+
61
+ if TYPE_CHECKING:
62
+ from wandb.proto.wandb_internal_pb2 import (
63
+ ArtifactManifest,
64
+ ArtifactManifestEntry,
65
+ ArtifactRecord,
66
+ EnvironmentRecord,
67
+ HttpResponse,
68
+ LocalInfo,
69
+ Record,
70
+ Result,
71
+ RunExitResult,
72
+ RunRecord,
73
+ SummaryRecord,
74
+ )
75
+
76
+ StreamLiterals = Literal["stdout", "stderr"]
77
+
78
+
79
+ logger = logging.getLogger(__name__)
80
+
81
+
82
+ _OUTPUT_MIN_CALLBACK_INTERVAL = 2 # seconds
83
+
84
+
85
+ def _framework_priority() -> Generator[Tuple[str, str], None, None]:
86
+ yield from [
87
+ ("lightgbm", "lightgbm"),
88
+ ("catboost", "catboost"),
89
+ ("xgboost", "xgboost"),
90
+ ("transformers_huggingface", "huggingface"), # backwards compatibility
91
+ ("transformers", "huggingface"),
92
+ ("pytorch_ignite", "ignite"), # backwards compatibility
93
+ ("ignite", "ignite"),
94
+ ("pytorch_lightning", "lightning"),
95
+ ("fastai", "fastai"),
96
+ ("torch", "torch"),
97
+ ("keras", "keras"),
98
+ ("tensorflow", "tensorflow"),
99
+ ("sklearn", "sklearn"),
100
+ ]
101
+
102
+
103
+ def _manifest_json_from_proto(manifest: "ArtifactManifest") -> Dict:
104
+ if manifest.version == 1:
105
+ if manifest.manifest_file_path:
106
+ contents = {}
107
+ with gzip.open(manifest.manifest_file_path, "rt") as f:
108
+ for line in f:
109
+ entry_json = json.loads(line)
110
+ path = entry_json.pop("path")
111
+ contents[path] = entry_json
112
+ else:
113
+ contents = {
114
+ content.path: _manifest_entry_from_proto(content)
115
+ for content in manifest.contents
116
+ }
117
+ else:
118
+ raise ValueError(f"unknown artifact manifest version: {manifest.version}")
119
+
120
+ return {
121
+ "version": manifest.version,
122
+ "storagePolicy": manifest.storage_policy,
123
+ "storagePolicyConfig": {
124
+ config.key: json.loads(config.value_json)
125
+ for config in manifest.storage_policy_config
126
+ },
127
+ "contents": contents,
128
+ }
129
+
130
+
131
+ def _manifest_entry_from_proto(entry: "ArtifactManifestEntry") -> Dict:
132
+ birth_artifact_id = entry.birth_artifact_id if entry.birth_artifact_id else None
133
+ return {
134
+ "digest": entry.digest,
135
+ "birthArtifactID": birth_artifact_id,
136
+ "ref": entry.ref if entry.ref else None,
137
+ "size": entry.size if entry.size is not None else None,
138
+ "local_path": entry.local_path if entry.local_path else None,
139
+ "skip_cache": entry.skip_cache,
140
+ "extra": {extra.key: json.loads(extra.value_json) for extra in entry.extra},
141
+ }
142
+
143
+
144
+ class ResumeState:
145
+ resumed: bool
146
+ step: int
147
+ history: int
148
+ events: int
149
+ output: int
150
+ runtime: float
151
+ wandb_runtime: Optional[int]
152
+ summary: Optional[Dict[str, Any]]
153
+ config: Optional[Dict[str, Any]]
154
+ tags: Optional[List[str]]
155
+
156
+ def __init__(self) -> None:
157
+ self.resumed = False
158
+ self.step = 0
159
+ self.history = 0
160
+ self.events = 0
161
+ self.output = 0
162
+ self.runtime = 0
163
+ # wandb_runtime is the canonical runtime (stored in summary._wandb.runtime)
164
+ self.wandb_runtime = None
165
+ self.summary = None
166
+ self.config = None
167
+ self.tags = None
168
+
169
+ def __str__(self) -> str:
170
+ obj = ",".join(map(lambda it: f"{it[0]}={it[1]}", vars(self).items()))
171
+ return f"ResumeState({obj})"
172
+
173
+
174
+ class _OutputRawStream:
175
+ _stopped: threading.Event
176
+ _queue: queue.Queue
177
+ _emulator: redirect.TerminalEmulator
178
+ _writer_thr: threading.Thread
179
+ _reader_thr: threading.Thread
180
+
181
+ def __init__(self, stream: str, sm: "SendManager"):
182
+ self._stopped = threading.Event()
183
+ self._queue = queue.Queue()
184
+ self._emulator = redirect.TerminalEmulator()
185
+ self._writer_thr = threading.Thread(
186
+ target=sm._output_raw_writer_thread,
187
+ kwargs=dict(stream=stream),
188
+ daemon=True,
189
+ name=f"OutRawWr-{stream}",
190
+ )
191
+ self._reader_thr = threading.Thread(
192
+ target=sm._output_raw_reader_thread,
193
+ kwargs=dict(stream=stream),
194
+ daemon=True,
195
+ name=f"OutRawRd-{stream}",
196
+ )
197
+
198
+ def start(self) -> None:
199
+ self._writer_thr.start()
200
+ self._reader_thr.start()
201
+
202
+
203
+ class SendManager:
204
+ UPDATE_CONFIG_TIME: int = 30
205
+ UPDATE_STATUS_TIME: int = 5
206
+
207
+ _settings: SettingsStatic
208
+ _record_q: "Queue[Record]"
209
+ _result_q: "Queue[Result]"
210
+ _interface: InterfaceQueue
211
+ _api_settings: Dict[str, str]
212
+ _partial_output: Dict[str, str]
213
+ _context_keeper: context.ContextKeeper
214
+
215
+ _telemetry_obj: telemetry.TelemetryRecord
216
+ _environment_obj: "EnvironmentRecord"
217
+ _fs: Optional["file_stream.FileStreamApi"]
218
+ _run: Optional["RunRecord"]
219
+ _entity: Optional[str]
220
+ _project: Optional[str]
221
+ _dir_watcher: Optional["DirWatcher"]
222
+ _pusher: Optional["FilePusher"]
223
+ _record_exit: Optional["Record"]
224
+ _exit_result: Optional["RunExitResult"]
225
+ _resume_state: ResumeState
226
+ _rewind_response: Optional[Dict[str, Any]]
227
+ _cached_server_info: Dict[str, Any]
228
+ _cached_viewer: Dict[str, Any]
229
+ _server_messages: List[Dict[str, Any]]
230
+ _ds: Optional[datastore.DataStore]
231
+ _output_raw_streams: Dict["StreamLiterals", _OutputRawStream]
232
+ _output_raw_file: Optional[filesystem.CRDedupedFile]
233
+ _send_record_num: int
234
+ _send_end_offset: int
235
+ _debounce_config_time: float
236
+ _debounce_status_time: float
237
+
238
+ def __init__(
239
+ self,
240
+ settings: SettingsStatic,
241
+ record_q: "Queue[Record]",
242
+ result_q: "Queue[Result]",
243
+ interface: InterfaceQueue,
244
+ context_keeper: context.ContextKeeper,
245
+ ) -> None:
246
+ self._settings = settings
247
+ self._record_q = record_q
248
+ self._result_q = result_q
249
+ self._interface = interface
250
+ self._context_keeper = context_keeper
251
+
252
+ self._ds = None
253
+ self._send_record_num = 0
254
+ self._send_end_offset = 0
255
+
256
+ self._fs = None
257
+ self._pusher = None
258
+ self._dir_watcher = None
259
+
260
+ # State updated by login
261
+ self._entity = None
262
+ self._flags = None
263
+
264
+ # State updated by wandb.init
265
+ self._run = None
266
+ self._project = None
267
+
268
+ # keep track of config from key/val updates
269
+ self._consolidated_config = sender_config.ConfigState()
270
+
271
+ self._start_time: int = 0
272
+ self._telemetry_obj = telemetry.TelemetryRecord()
273
+ self._environment_obj = wandb_internal_pb2.EnvironmentRecord()
274
+ self._config_metric_pbdict_list: List[Dict[int, Any]] = []
275
+ self._metadata_summary: Dict[str, Any] = defaultdict()
276
+ self._cached_summary: Dict[str, Any] = dict()
277
+ self._config_metric_index_dict: Dict[str, int] = {}
278
+ self._config_metric_dict: Dict[str, wandb_internal_pb2.MetricRecord] = {}
279
+ self._consolidated_summary: Dict[str, Any] = dict()
280
+
281
+ self._cached_server_info = dict()
282
+ self._cached_viewer = dict()
283
+ self._server_messages = []
284
+
285
+ # State updated by resuming
286
+ self._resume_state = ResumeState()
287
+ self._rewind_response = None
288
+
289
+ # State added when run_exit is initiated and complete
290
+ self._record_exit = None
291
+ self._exit_result = None
292
+
293
+ self._api = internal_api.Api(
294
+ default_settings=settings, retry_callback=self.retry_callback
295
+ )
296
+ self._api_settings = dict()
297
+
298
+ # queue filled by retry_callback
299
+ self._retry_q: Queue[HttpResponse] = queue.Queue()
300
+
301
+ # do we need to debounce?
302
+ self._config_needs_debounce: bool = False
303
+
304
+ # TODO(jhr): do something better, why do we need to send full lines?
305
+ self._partial_output = dict()
306
+
307
+ self._exit_code = 0
308
+
309
+ # internal vars for handing raw console output
310
+ self._output_raw_streams = dict()
311
+ self._output_raw_file = None
312
+
313
+ # job builder
314
+ self._job_builder = JobBuilder(settings)
315
+
316
+ time_now = time.monotonic()
317
+ self._debounce_config_time = time_now
318
+ self._debounce_status_time = time_now
319
+
320
+ @classmethod
321
+ def setup(
322
+ cls,
323
+ root_dir: str,
324
+ resume: Union[None, bool, str],
325
+ ) -> "SendManager":
326
+ """Set up a standalone SendManager.
327
+
328
+ Exclusively used in `sync.py`.
329
+ """
330
+ files_dir = os.path.join(root_dir, "files")
331
+ settings = wandb.Settings(
332
+ x_files_dir=files_dir,
333
+ root_dir=root_dir,
334
+ # _start_time=0,
335
+ resume=resume,
336
+ # ignore_globs=(),
337
+ x_sync=True,
338
+ disable_job_creation=False,
339
+ x_file_stream_timeout_seconds=0,
340
+ )
341
+ record_q: Queue[Record] = queue.Queue()
342
+ result_q: Queue[Result] = queue.Queue()
343
+ publish_interface = InterfaceQueue(record_q=record_q)
344
+ context_keeper = context.ContextKeeper()
345
+ return SendManager(
346
+ settings=SettingsStatic(settings.to_proto()),
347
+ record_q=record_q,
348
+ result_q=result_q,
349
+ interface=publish_interface,
350
+ context_keeper=context_keeper,
351
+ )
352
+
353
+ def __len__(self) -> int:
354
+ return self._record_q.qsize()
355
+
356
+ def __enter__(self) -> "SendManager":
357
+ return self
358
+
359
+ def __exit__(
360
+ self,
361
+ exc_type: Optional[Type[BaseException]],
362
+ exc_value: Optional[BaseException],
363
+ exc_traceback: Optional[traceback.TracebackException],
364
+ ) -> Literal[False]:
365
+ while self:
366
+ data = next(self)
367
+ self.send(data)
368
+ self.finish()
369
+ return False
370
+
371
+ def retry_callback(self, status: int, response_text: str) -> None:
372
+ response = wandb_internal_pb2.HttpResponse()
373
+ response.http_status_code = status
374
+ response.http_response_text = response_text
375
+ self._retry_q.put(response)
376
+
377
+ def send(self, record: "Record") -> None:
378
+ self._update_record_num(record.num)
379
+ self._update_end_offset(record.control.end_offset)
380
+
381
+ record_type = record.WhichOneof("record_type")
382
+ assert record_type
383
+ handler_str = "send_" + record_type
384
+ send_handler = getattr(self, handler_str, None)
385
+ # Don't log output to reduce log noise
386
+ if record_type not in {"output", "request", "output_raw"}:
387
+ logger.debug(f"send: {record_type}")
388
+ assert send_handler, f"unknown send handler: {handler_str}"
389
+
390
+ context_id = context.context_id_from_record(record)
391
+ api_context = self._context_keeper.get(context_id)
392
+ try:
393
+ self._api.set_local_context(api_context)
394
+ send_handler(record)
395
+ except retry.RetryCancelledError:
396
+ logger.debug(f"Record cancelled: {record_type}")
397
+ self._context_keeper.release(context_id)
398
+ finally:
399
+ self._api.clear_local_context()
400
+
401
+ def send_preempting(self, _: "Record") -> None:
402
+ if self._fs:
403
+ self._fs.enqueue_preempting()
404
+
405
+ def send_request_sender_mark(self, _: "Record") -> None:
406
+ self._maybe_report_status(always=True)
407
+
408
+ def send_request(self, record: "Record") -> None:
409
+ request_type = record.request.WhichOneof("request_type")
410
+ assert request_type
411
+ handler_str = "send_request_" + request_type
412
+ send_handler = getattr(self, handler_str, None)
413
+ if request_type != "network_status":
414
+ logger.debug(f"send_request: {request_type}")
415
+ assert send_handler, f"unknown handle: {handler_str}"
416
+ send_handler(record)
417
+
418
+ def _respond_result(self, result: "Result") -> None:
419
+ context_id = context.context_id_from_result(result)
420
+ self._context_keeper.release(context_id)
421
+ self._result_q.put(result)
422
+
423
+ def _flatten(self, dictionary: Dict) -> None:
424
+ if isinstance(dictionary, dict):
425
+ for k, v in list(dictionary.items()):
426
+ if isinstance(v, dict):
427
+ self._flatten(v)
428
+ dictionary.pop(k)
429
+ for k2, v2 in v.items():
430
+ dictionary[k + "." + k2] = v2
431
+
432
+ def _update_record_num(self, record_num: int) -> None:
433
+ if not record_num:
434
+ return
435
+ # Currently how we handle offline mode and syncing is not
436
+ # compatible with this assertion due to how the exit record
437
+ # is (mis)handled:
438
+ # - using "always_send" in offline mode to trigger defer
439
+ # state machine
440
+ # - skipping the exit record in `wandb sync` mode so that
441
+ # it is always executed as the last record
442
+ if not self._settings._offline and not self._settings.x_sync:
443
+ assert record_num == self._send_record_num + 1
444
+ self._send_record_num = record_num
445
+
446
+ def _update_end_offset(self, end_offset: int) -> None:
447
+ if not end_offset:
448
+ return
449
+ self._send_end_offset = end_offset
450
+
451
+ def send_request_sender_read(self, record: "Record") -> None:
452
+ if self._ds is None:
453
+ self._ds = datastore.DataStore()
454
+ self._ds.open_for_scan(self._settings.sync_file)
455
+
456
+ # TODO(cancel_paused): implement cancel_set logic
457
+ # The idea is that there is an active request to cancel a
458
+ # message that is being read from the transaction log below
459
+
460
+ start_offset = record.request.sender_read.start_offset
461
+ final_offset = record.request.sender_read.final_offset
462
+ self._ds.seek(start_offset)
463
+
464
+ current_end_offset = 0
465
+ while current_end_offset < final_offset:
466
+ data = self._ds.scan_data()
467
+ assert data
468
+ current_end_offset = self._ds.get_offset()
469
+
470
+ send_record = wandb_internal_pb2.Record()
471
+ send_record.ParseFromString(data)
472
+ self._update_end_offset(current_end_offset)
473
+ self.send(send_record)
474
+
475
+ # make sure we perform deferred operations
476
+ self.debounce()
477
+
478
+ # make sure that we always update writer for every sended read request
479
+ self._maybe_report_status(always=True)
480
+
481
+ def send_request_stop_status(self, record: "Record") -> None:
482
+ result = proto_util._result_from_record(record)
483
+ status_resp = result.response.stop_status_response
484
+ status_resp.run_should_stop = False
485
+ if self._entity and self._project and self._run and self._run.run_id:
486
+ try:
487
+ status_resp.run_should_stop = self._api.check_stop_requested(
488
+ self._project, self._entity, self._run.run_id
489
+ )
490
+ except Exception as e:
491
+ logger.warning("Failed to check stop requested status: %s", e)
492
+ self._respond_result(result)
493
+
494
+ def _maybe_update_config(self, always: bool = False) -> None:
495
+ time_now = time.monotonic()
496
+ if (
497
+ not always
498
+ and time_now < self._debounce_config_time + self.UPDATE_CONFIG_TIME
499
+ ):
500
+ return
501
+ if self._config_needs_debounce:
502
+ self._debounce_config()
503
+ self._debounce_config_time = time_now
504
+
505
+ def _maybe_report_status(self, always: bool = False) -> None:
506
+ time_now = time.monotonic()
507
+ if (
508
+ not always
509
+ and time_now < self._debounce_status_time + self.UPDATE_STATUS_TIME
510
+ ):
511
+ return
512
+ self._debounce_status_time = time_now
513
+
514
+ status_report = wandb_internal_pb2.StatusReportRequest(
515
+ record_num=self._send_record_num,
516
+ sent_offset=self._send_end_offset,
517
+ )
518
+ status_time = time.time()
519
+ status_report.sync_time.FromMicroseconds(int(status_time * 1e6))
520
+ record = self._interface._make_request(status_report=status_report)
521
+ self._interface._publish(record)
522
+
523
+ def debounce(self, final: bool = False) -> None:
524
+ self._maybe_report_status(always=final)
525
+ self._maybe_update_config(always=final)
526
+
527
+ def _debounce_config(self) -> None:
528
+ config_value_dict = self._config_backend_dict()
529
+ # TODO(jhr): check result of upsert_run?
530
+ if self._run:
531
+ self._api.upsert_run(
532
+ name=self._run.run_id,
533
+ config=config_value_dict,
534
+ **self._api_settings, # type: ignore
535
+ )
536
+ self._config_save(config_value_dict)
537
+ self._config_needs_debounce = False
538
+
539
+ def send_request_network_status(self, record: "Record") -> None:
540
+ result = proto_util._result_from_record(record)
541
+ status_resp = result.response.network_status_response
542
+ while True:
543
+ try:
544
+ status_resp.network_responses.append(self._retry_q.get_nowait())
545
+ except queue.Empty:
546
+ break
547
+ except Exception as e:
548
+ logger.warning(f"Error emptying retry queue: {e}")
549
+ self._respond_result(result)
550
+
551
+ def send_exit(self, record: "Record") -> None:
552
+ # track where the exit came from
553
+ self._record_exit = record
554
+
555
+ run_exit = record.exit
556
+ self._exit_code = run_exit.exit_code
557
+ logger.info("handling exit code: %s", run_exit.exit_code)
558
+ runtime = run_exit.runtime
559
+ logger.info("handling runtime: %s", run_exit.runtime)
560
+ self._metadata_summary["runtime"] = runtime
561
+ self._update_summary()
562
+
563
+ # We need to give the request queue a chance to empty between states
564
+ # so use handle_request_defer as a state machine.
565
+ logger.info("send defer")
566
+ self._interface.publish_defer()
567
+
568
+ def send_final(self, record: "Record") -> None:
569
+ pass
570
+
571
+ def _flush_run(self) -> None:
572
+ pass
573
+
574
+ def send_request_status_report(self, record: "Record") -> None:
575
+ # todo? this is just a noop to please wandb sync
576
+ pass
577
+
578
+ def send_request_defer(self, record: "Record") -> None: # noqa: C901
579
+ defer = record.request.defer
580
+ state = defer.state
581
+ logger.info(f"handle sender defer: {state}")
582
+
583
+ def transition_state() -> None:
584
+ state = defer.state + 1
585
+ logger.info(f"send defer: {state}")
586
+ self._interface.publish_defer(state)
587
+
588
+ done = False
589
+ if state == defer.BEGIN:
590
+ transition_state()
591
+ elif state == defer.FLUSH_RUN:
592
+ self._flush_run()
593
+ transition_state()
594
+ elif state == defer.FLUSH_STATS:
595
+ # NOTE: this is handled in handler.py:handle_request_defer()
596
+ transition_state()
597
+ elif state == defer.FLUSH_PARTIAL_HISTORY:
598
+ # NOTE: this is handled in handler.py:handle_request_defer()
599
+ transition_state()
600
+ elif state == defer.FLUSH_TB:
601
+ # NOTE: this is handled in handler.py:handle_request_defer()
602
+ transition_state()
603
+ elif state == defer.FLUSH_SUM:
604
+ # NOTE: this is handled in handler.py:handle_request_defer()
605
+ transition_state()
606
+ elif state == defer.FLUSH_DEBOUNCER:
607
+ self.debounce(final=True)
608
+ transition_state()
609
+ elif state == defer.FLUSH_OUTPUT:
610
+ self._output_raw_finish()
611
+ transition_state()
612
+ elif state == defer.FLUSH_JOB:
613
+ self._flush_job()
614
+ transition_state()
615
+ elif state == defer.FLUSH_DIR:
616
+ if self._dir_watcher:
617
+ self._dir_watcher.finish()
618
+ self._dir_watcher = None
619
+ transition_state()
620
+ elif state == defer.FLUSH_FP:
621
+ if self._pusher:
622
+ # FilePusher generates some events for FileStreamApi, so we
623
+ # need to wait for pusher to finish before going to the next
624
+ # state to ensure that filestream gets all the events that we
625
+ # want before telling it to finish up
626
+ self._pusher.finish(transition_state)
627
+ else:
628
+ transition_state()
629
+ elif state == defer.JOIN_FP:
630
+ if self._pusher:
631
+ self._pusher.join()
632
+ transition_state()
633
+ elif state == defer.FLUSH_FS:
634
+ if self._fs:
635
+ # TODO(jhr): now is a good time to output pending output lines
636
+ self._fs.finish(self._exit_code)
637
+ self._fs = None
638
+ transition_state()
639
+ elif state == defer.FLUSH_FINAL:
640
+ self._interface.publish_final()
641
+ self._interface.publish_footer()
642
+ transition_state()
643
+ elif state == defer.END:
644
+ done = True
645
+ else:
646
+ raise AssertionError("unknown state")
647
+
648
+ if not done:
649
+ return
650
+
651
+ exit_result = wandb_internal_pb2.RunExitResult()
652
+
653
+ # mark exit done in case we are polling on exit
654
+ self._exit_result = exit_result
655
+
656
+ # Report response to mailbox
657
+ if self._record_exit and self._record_exit.control.mailbox_slot:
658
+ result = proto_util._result_from_record(self._record_exit)
659
+ result.exit_result.CopyFrom(exit_result)
660
+ self._respond_result(result)
661
+
662
+ def send_request_poll_exit(self, record: "Record") -> None:
663
+ if not record.control.req_resp and not record.control.mailbox_slot:
664
+ return
665
+
666
+ result = proto_util._result_from_record(record)
667
+
668
+ if self._pusher:
669
+ _alive, status = self._pusher.get_status()
670
+ file_counts = self._pusher.file_counts_by_category()
671
+ resp = result.response.poll_exit_response
672
+ resp.pusher_stats.uploaded_bytes = status.uploaded_bytes
673
+ resp.pusher_stats.total_bytes = status.total_bytes
674
+ resp.pusher_stats.deduped_bytes = status.deduped_bytes
675
+ resp.file_counts.wandb_count = file_counts.wandb
676
+ resp.file_counts.media_count = file_counts.media
677
+ resp.file_counts.artifact_count = file_counts.artifact
678
+ resp.file_counts.other_count = file_counts.other
679
+
680
+ if self._exit_result:
681
+ result.response.poll_exit_response.done = True
682
+ result.response.poll_exit_response.exit_result.CopyFrom(self._exit_result)
683
+
684
+ self._respond_result(result)
685
+
686
+ def _setup_resume(
687
+ self, run: "RunRecord"
688
+ ) -> Optional["wandb_internal_pb2.ErrorInfo"]:
689
+ """Queries the backend for a run; fail if the settings are incompatible."""
690
+ if not self._settings.resume:
691
+ return None
692
+
693
+ # TODO: This causes a race, we need to make the upsert atomically
694
+ # only create or update depending on the resume config
695
+ # we use the runs entity if set, otherwise fallback to users entity
696
+ # todo: ensure entity is not None as self._entity is Optional[str]
697
+ entity = run.entity or self._entity
698
+ logger.info(
699
+ "checking resume status for %s/%s/%s", entity, run.project, run.run_id
700
+ )
701
+ resume_status = self._api.run_resume_status(
702
+ entity=entity, # type: ignore
703
+ project_name=run.project,
704
+ name=run.run_id,
705
+ )
706
+ # No resume status = run does not exist; No t key in wandbConfig = run exists but hasn't been inited
707
+ if not resume_status or '"t":' not in resume_status.get("wandbConfig", ""):
708
+ if self._settings.resume == "must":
709
+ error = wandb_internal_pb2.ErrorInfo()
710
+ error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
711
+ error.message = (
712
+ "You provided an invalid value for the `resume` argument."
713
+ f" The value 'must' is not a valid option for resuming a run ({run.run_id}) that has not been initialized."
714
+ " Please check your inputs and try again with a valid run ID."
715
+ " If you are trying to start a new run, please omit the `resume` argument or use `resume='allow'`."
716
+ )
717
+ return error
718
+ return None
719
+
720
+ #
721
+ # handle cases where we have resume_status
722
+ #
723
+ if self._settings.resume == "never":
724
+ error = wandb_internal_pb2.ErrorInfo()
725
+ error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
726
+ error.message = (
727
+ "You provided an invalid value for the `resume` argument."
728
+ f" The value 'never' is not a valid option for resuming a run ({run.run_id}) that already exists."
729
+ " Please check your inputs and try again with a valid value for the `resume` argument."
730
+ )
731
+ return error
732
+
733
+ history = {}
734
+ events = {}
735
+ config = {}
736
+ summary = {}
737
+ try:
738
+ events_rt = 0
739
+ history_rt = 0
740
+ history = json.loads(resume_status["historyTail"])
741
+ if history:
742
+ history = json.loads(history[-1])
743
+ history_rt = history.get("_runtime", 0)
744
+ events = json.loads(resume_status["eventsTail"])
745
+ if events:
746
+ events = json.loads(events[-1])
747
+ events_rt = events.get("_runtime", 0)
748
+ config = json.loads(resume_status["config"] or "{}")
749
+ summary = json.loads(resume_status["summaryMetrics"] or "{}")
750
+ new_runtime = summary.get("_wandb", {}).get("runtime", None)
751
+ if new_runtime is not None:
752
+ self._resume_state.wandb_runtime = new_runtime
753
+ tags = resume_status.get("tags") or []
754
+
755
+ except (IndexError, ValueError):
756
+ logger.exception("unable to load resume tails")
757
+ if self._settings.resume == "must":
758
+ error = wandb_internal_pb2.ErrorInfo()
759
+ error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
760
+ error.message = f"resume='must' but could not resume ({run.run_id}) "
761
+ return error
762
+
763
+ # TODO: Do we need to restore config / summary?
764
+ # System metrics runtime is usually greater than history
765
+ self._resume_state.runtime = max(events_rt, history_rt)
766
+ last_step = history.get("_step", 0)
767
+ history_line_count = resume_status["historyLineCount"]
768
+ self._resume_state.step = last_step + 1 if history_line_count > 0 else last_step
769
+ self._resume_state.history = history_line_count
770
+ self._resume_state.events = resume_status["eventsLineCount"]
771
+ self._resume_state.output = resume_status["logLineCount"]
772
+ self._resume_state.config = config
773
+ self._resume_state.summary = summary
774
+ self._resume_state.tags = tags
775
+ self._resume_state.resumed = True
776
+ logger.info(f"configured resuming with: {self._resume_state}")
777
+ return None
778
+
779
+ def _telemetry_get_framework(self) -> str:
780
+ """Get telemetry data for internal config structure."""
781
+ # detect framework by checking what is loaded
782
+ imports: telemetry.TelemetryImports
783
+ if self._telemetry_obj.HasField("imports_finish"):
784
+ imports = self._telemetry_obj.imports_finish
785
+ elif self._telemetry_obj.HasField("imports_init"):
786
+ imports = self._telemetry_obj.imports_init
787
+ else:
788
+ return ""
789
+ framework = next(
790
+ (n for f, n in _framework_priority() if getattr(imports, f, False)), ""
791
+ )
792
+ return framework
793
+
794
+ def _config_backend_dict(self) -> sender_config.BackendConfigDict:
795
+ config = self._consolidated_config or sender_config.ConfigState()
796
+ return config.to_backend_dict(
797
+ telemetry_record=self._telemetry_obj,
798
+ framework=self._telemetry_get_framework(),
799
+ start_time_millis=self._start_time,
800
+ metric_pbdicts=self._config_metric_pbdict_list,
801
+ environment_record=self._environment_obj,
802
+ )
803
+
804
+ def _config_save(
805
+ self,
806
+ config_value_dict: sender_config.BackendConfigDict,
807
+ ) -> None:
808
+ config_path = os.path.join(self._settings.files_dir, "config.yaml")
809
+ config_util.save_config_file_from_dict(config_path, config_value_dict)
810
+
811
+ def _sync_spell(self) -> None:
812
+ """Sync this run with spell."""
813
+ if not self._run:
814
+ return
815
+ try:
816
+ env = os.environ
817
+ self._interface.publish_config(
818
+ key=("_wandb", "spell_url"), val=env.get("SPELL_RUN_URL")
819
+ )
820
+ url = f"{self._api.app_url}/{self._run.entity}/{self._run.project}/runs/{self._run.run_id}"
821
+ requests.put(
822
+ env.get("SPELL_API_URL", "https://api.spell.run") + "/wandb_url",
823
+ json={"access_token": env.get("WANDB_ACCESS_TOKEN"), "url": url},
824
+ timeout=2,
825
+ )
826
+ except requests.RequestException:
827
+ pass
828
+ # TODO: do something if sync spell is not successful?
829
+
830
+ def _setup_fork(self, server_run: dict):
831
+ assert self._run
832
+ assert self._run.branch_point
833
+ first_step = int(self._run.branch_point.value) + 1
834
+ self._resume_state.step = first_step
835
+ self._resume_state.history = server_run.get("historyLineCount", 0)
836
+ self._run.forked = True
837
+ self._run.starting_step = first_step
838
+
839
+ def _load_rewind_state(self, run: "RunRecord"):
840
+ assert run.branch_point
841
+ self._rewind_response = self._api.rewind_run(
842
+ run_name=run.run_id,
843
+ entity=run.entity or None,
844
+ project=run.project or None,
845
+ metric_name=run.branch_point.metric,
846
+ metric_value=run.branch_point.value,
847
+ program_path=self._settings.program or None,
848
+ )
849
+ self._resume_state.history = self._rewind_response.get("historyLineCount", 0)
850
+ self._resume_state.config = json.loads(
851
+ self._rewind_response.get("config", "{}")
852
+ )
853
+
854
+ def _install_rewind_state(self):
855
+ assert self._run
856
+ assert self._run.branch_point
857
+ assert self._rewind_response
858
+
859
+ first_step = int(self._run.branch_point.value) + 1
860
+ self._resume_state.step = first_step
861
+
862
+ # We set the fork flag here because rewind uses the forking
863
+ # infrastructure under the hood. Setting `forked` here
864
+ # ensures that run._step is properly set in the user process.
865
+ self._run.forked = True
866
+ self._run.starting_step = first_step
867
+
868
+ def _handle_error(
869
+ self,
870
+ record: "Record",
871
+ error: "wandb_internal_pb2.ErrorInfo",
872
+ run: "RunRecord",
873
+ ) -> None:
874
+ if record.control.req_resp or record.control.mailbox_slot:
875
+ result = proto_util._result_from_record(record)
876
+ result.run_result.run.CopyFrom(run)
877
+ result.run_result.error.CopyFrom(error)
878
+ self._respond_result(result)
879
+ else:
880
+ logger.error("Got error in async mode: %s", error.message)
881
+
882
+ def send_run(self, record: "Record", file_dir: Optional[str] = None) -> None:
883
+ run = record.run
884
+ error = None
885
+ is_wandb_init = self._run is None
886
+
887
+ # save start time of a run
888
+ self._start_time = int(run.start_time.ToMicroseconds() // 1e6)
889
+
890
+ # update telemetry
891
+ if run.telemetry:
892
+ self._telemetry_obj.MergeFrom(run.telemetry)
893
+ if self._settings.x_sync:
894
+ self._telemetry_obj.feature.sync = True
895
+
896
+ # build config dict
897
+ config_value_dict: Optional[sender_config.BackendConfigDict] = None
898
+ if run.config:
899
+ self._consolidated_config.update_from_proto(run.config)
900
+ config_value_dict = self._config_backend_dict()
901
+ self._config_save(config_value_dict)
902
+
903
+ do_rewind = run.branch_point.run == run.run_id
904
+ do_fork = not do_rewind and run.branch_point.run != ""
905
+ do_resume = bool(self._settings.resume)
906
+
907
+ num_resume_options_set = sum([do_fork, do_rewind, do_resume])
908
+ if num_resume_options_set > 1:
909
+ error = wandb_internal_pb2.ErrorInfo()
910
+ error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
911
+ error.message = (
912
+ "Multiple resume options specified. "
913
+ "Please specify only one of `fork_from`, `resume`, or `resume_from`."
914
+ )
915
+ self._handle_error(record, error, run)
916
+
917
+ if is_wandb_init:
918
+ # Ensure we have a project to query for status
919
+ if run.project == "":
920
+ run.project = util.auto_project_name(self._settings.program)
921
+ # Only check resume status on `wandb.init`
922
+
923
+ if do_resume:
924
+ error = self._setup_resume(run)
925
+
926
+ elif do_rewind:
927
+ error = self._load_rewind_state(run)
928
+
929
+ if error is not None:
930
+ self._handle_error(record, error, run)
931
+ return
932
+
933
+ # Save the resumed config
934
+ if self._resume_state.config is not None:
935
+ self._consolidated_config.merge_resumed_config(
936
+ config_util.dict_strip_value_dict(self._resume_state.config)
937
+ )
938
+
939
+ config_value_dict = self._config_backend_dict()
940
+ self._config_save(config_value_dict)
941
+
942
+ # handle empty config
943
+ # TODO(jhr): consolidate the 4 ways config is built:
944
+ # (passed config, empty config, resume config, send_config)
945
+ if not config_value_dict:
946
+ config_value_dict = self._config_backend_dict()
947
+ self._config_save(config_value_dict)
948
+
949
+ try:
950
+ server_run = self._init_run(run, config_value_dict)
951
+ except (CommError, UsageError) as e:
952
+ logger.error(e, exc_info=True)
953
+ error = ProtobufErrorHandler.from_exception(e)
954
+ self._handle_error(record, error, run)
955
+ return
956
+
957
+ assert self._run # self._run is configured in _init_run()
958
+
959
+ if do_fork:
960
+ error = self._setup_fork(server_run)
961
+
962
+ if error is not None:
963
+ self._handle_error(record, error, run)
964
+ return
965
+
966
+ if record.control.req_resp or record.control.mailbox_slot:
967
+ result = proto_util._result_from_record(record)
968
+ # TODO: we could do self._interface.publish_defer(resp) to notify
969
+ # the handler not to actually perform server updates for this uuid
970
+ # because the user process will send a summary update when we resume
971
+ result.run_result.run.CopyFrom(self._run)
972
+ self._respond_result(result)
973
+
974
+ # Only spin up our threads on the first run message
975
+ if is_wandb_init:
976
+ self._start_run_threads(file_dir)
977
+ else:
978
+ logger.info("updated run: %s", self._run.run_id)
979
+
980
+ def _update_resume_state(self, is_rewinding: bool, inserted: bool):
981
+ assert self._run
982
+ if self._resume_state.resumed:
983
+ self._run.resumed = True
984
+ if self._resume_state.wandb_runtime is not None:
985
+ self._run.runtime = self._resume_state.wandb_runtime
986
+ elif is_rewinding:
987
+ # because is_rewinding is mutually exclusive with self._resume_state.resumed,
988
+ # this block will always execute if is_rewinding is set
989
+ self._install_rewind_state()
990
+ else:
991
+ # If the user is not resuming, and we didn't insert on upsert_run then
992
+ # it is likely that we are overwriting the run which we might want to
993
+ # prevent in the future. This could be a false signal since an upsert_run
994
+ # message which gets retried in the network could also show up as not
995
+ # inserted.
996
+ if not inserted:
997
+ # no need to flush this, it will get updated eventually
998
+ self._telemetry_obj.feature.maybe_run_overwrite = True
999
+
1000
+ def _init_run(
1001
+ self,
1002
+ run: "RunRecord",
1003
+ config_dict: Optional[sender_config.BackendConfigDict],
1004
+ ) -> dict:
1005
+ # We subtract the previous runs runtime when resuming
1006
+ start_time = (
1007
+ run.start_time.ToMicroseconds() / 1e6
1008
+ ) - self._resume_state.runtime
1009
+ # TODO: we don't check inserted currently, ultimately we should make
1010
+ # the upsert know the resume state and fail transactionally
1011
+
1012
+ if self._resume_state and self._resume_state.tags and not run.tags:
1013
+ run.tags.extend(self._resume_state.tags)
1014
+
1015
+ is_rewinding = bool(self._settings.resume_from)
1016
+ if is_rewinding:
1017
+ assert self._rewind_response
1018
+ server_run = self._rewind_response
1019
+ server_messages = None
1020
+ inserted = True
1021
+ else:
1022
+ server_run, inserted, server_messages = self._api.upsert_run(
1023
+ name=run.run_id,
1024
+ entity=run.entity or None,
1025
+ project=run.project or None,
1026
+ group=run.run_group or None,
1027
+ job_type=run.job_type or None,
1028
+ display_name=run.display_name or None,
1029
+ notes=run.notes or None,
1030
+ tags=run.tags[:] or None,
1031
+ config=config_dict or None,
1032
+ sweep_name=run.sweep_id or None,
1033
+ host=run.host or None,
1034
+ program_path=self._settings.program or None,
1035
+ repo=run.git.remote_url or None,
1036
+ commit=run.git.commit or None,
1037
+ )
1038
+
1039
+ # TODO: we don't want to create jobs in sweeps, since the
1040
+ # executable doesn't appear to be consistent
1041
+ if run.sweep_id:
1042
+ self._job_builder.disable = True
1043
+
1044
+ self._server_messages = server_messages or []
1045
+ self._run = run
1046
+
1047
+ if self._resume_state.resumed and is_rewinding:
1048
+ # this should not ever be possible to hit, since we check for
1049
+ # resumption above and raise an error if resumption is specified
1050
+ # twice.
1051
+ raise ValueError(
1052
+ "Cannot attempt to rewind and resume a run - only one of "
1053
+ "`resume` or `resume_from` can be specified."
1054
+ )
1055
+
1056
+ self._update_resume_state(is_rewinding, inserted)
1057
+ self._run.starting_step = self._resume_state.step
1058
+ self._run.start_time.FromMicroseconds(int(start_time * 1e6))
1059
+ self._run.config.CopyFrom(self._interface._make_config(config_dict))
1060
+ if self._resume_state.summary is not None:
1061
+ self._run.summary.CopyFrom(
1062
+ self._interface._make_summary_from_dict(self._resume_state.summary)
1063
+ )
1064
+ storage_id = server_run.get("id")
1065
+ if storage_id:
1066
+ self._run.storage_id = storage_id
1067
+ id = server_run.get("name")
1068
+ if id:
1069
+ self._api.set_current_run_id(id)
1070
+ display_name = server_run.get("displayName")
1071
+ if display_name:
1072
+ self._run.display_name = display_name
1073
+ project = server_run.get("project")
1074
+ # TODO: remove self._api.set_settings, and make self._project a property?
1075
+ if project:
1076
+ project_name = project.get("name")
1077
+ if project_name:
1078
+ self._run.project = project_name
1079
+ self._project = project_name
1080
+ self._api_settings["project"] = project_name
1081
+ self._api.set_setting("project", project_name)
1082
+ entity = project.get("entity")
1083
+ if entity:
1084
+ entity_name = entity.get("name")
1085
+ if entity_name:
1086
+ self._run.entity = entity_name
1087
+ self._entity = entity_name
1088
+ self._api_settings["entity"] = entity_name
1089
+ self._api.set_setting("entity", entity_name)
1090
+ sweep_id = server_run.get("sweepName")
1091
+ if sweep_id:
1092
+ self._run.sweep_id = sweep_id
1093
+ if os.getenv("SPELL_RUN_URL"):
1094
+ self._sync_spell()
1095
+ return server_run
1096
+
1097
+ def _start_run_threads(self, file_dir: Optional[str] = None) -> None:
1098
+ assert self._run # self._run is configured by caller
1099
+ self._fs = file_stream.FileStreamApi(
1100
+ self._api,
1101
+ self._run.run_id,
1102
+ self._run.start_time.ToMicroseconds() / 1e6,
1103
+ timeout=self._settings.x_file_stream_timeout_seconds or 0,
1104
+ settings=self._api_settings,
1105
+ )
1106
+ # Ensure the streaming polices have the proper offsets
1107
+ self._fs.set_file_policy("wandb-summary.json", file_stream.SummaryFilePolicy())
1108
+ self._fs.set_file_policy(
1109
+ "wandb-history.jsonl",
1110
+ file_stream.JsonlFilePolicy(start_chunk_id=self._resume_state.history),
1111
+ )
1112
+ self._fs.set_file_policy(
1113
+ "wandb-events.jsonl",
1114
+ file_stream.JsonlFilePolicy(start_chunk_id=self._resume_state.events),
1115
+ )
1116
+ self._fs.set_file_policy(
1117
+ "output.log",
1118
+ file_stream.CRDedupeFilePolicy(start_chunk_id=self._resume_state.output),
1119
+ )
1120
+
1121
+ # hack to merge run_settings and self._settings object together
1122
+ # so that fields like entity or project are available to be attached to Sentry events.
1123
+ run_settings = message_to_dict(self._run)
1124
+ _settings = dict(self._settings)
1125
+ _settings.update(run_settings)
1126
+ wandb._sentry.configure_scope(tags=_settings, process_context="internal")
1127
+
1128
+ self._fs.start()
1129
+ self._pusher = FilePusher(self._api, self._fs, settings=self._settings)
1130
+ self._dir_watcher = DirWatcher(self._settings, self._pusher, file_dir)
1131
+ logger.info(
1132
+ "run started: %s with start time %s",
1133
+ self._run.run_id,
1134
+ self._run.start_time.ToMicroseconds() / 1e6,
1135
+ )
1136
+
1137
+ def _save_history(self, history_dict: Dict[str, Any]) -> None:
1138
+ if self._fs:
1139
+ self._fs.push(filenames.HISTORY_FNAME, json.dumps(history_dict))
1140
+
1141
+ def send_history(self, record: "Record") -> None:
1142
+ history = record.history
1143
+ history_dict = proto_util.dict_from_proto_list(history.item)
1144
+ self._save_history(history_dict)
1145
+
1146
+ def _update_summary_record(self, summary: "SummaryRecord") -> None:
1147
+ summary_dict = proto_util.dict_from_proto_list(summary.update)
1148
+ self._cached_summary = summary_dict
1149
+ self._update_summary()
1150
+
1151
+ def send_summary(self, record: "Record") -> None:
1152
+ self._update_summary_record(record.summary)
1153
+
1154
+ def send_request_summary_record(self, record: "Record") -> None:
1155
+ self._update_summary_record(record.request.summary_record.summary)
1156
+
1157
+ def _update_summary(self) -> None:
1158
+ summary_dict = self._cached_summary.copy()
1159
+ summary_dict.pop("_wandb", None)
1160
+ if self._metadata_summary:
1161
+ summary_dict["_wandb"] = self._metadata_summary
1162
+ # merge with consolidated summary
1163
+ self._consolidated_summary.update(summary_dict)
1164
+ json_summary = json.dumps(self._consolidated_summary)
1165
+ if self._fs:
1166
+ self._fs.push(filenames.SUMMARY_FNAME, json_summary)
1167
+ # TODO(jhr): we should only write this at the end of the script
1168
+ summary_path = os.path.join(self._settings.files_dir, filenames.SUMMARY_FNAME)
1169
+ with open(summary_path, "w") as f:
1170
+ f.write(json_summary)
1171
+ self._save_file(interface.GlobStr(filenames.SUMMARY_FNAME))
1172
+
1173
+ def send_stats(self, record: "Record") -> None:
1174
+ stats = record.stats
1175
+ if stats.stats_type != wandb_internal_pb2.StatsRecord.StatsType.SYSTEM:
1176
+ return
1177
+ if not self._fs:
1178
+ return
1179
+ if not self._run:
1180
+ return
1181
+ now_us = stats.timestamp.ToMicroseconds()
1182
+ start_us = self._run.start_time.ToMicroseconds()
1183
+ d = dict()
1184
+ for item in stats.item:
1185
+ try:
1186
+ d[item.key] = json.loads(item.value_json)
1187
+ except json.JSONDecodeError:
1188
+ logger.exception("error decoding stats json: %s", item.value_json)
1189
+ row: Dict[str, Any] = dict(system=d)
1190
+ self._flatten(row)
1191
+ row["_wandb"] = True
1192
+ row["_timestamp"] = now_us / 1e6
1193
+ row["_runtime"] = (now_us - start_us) / 1e6
1194
+ self._fs.push(filenames.EVENTS_FNAME, json.dumps(row))
1195
+ # TODO(jhr): check fs.push results?
1196
+
1197
+ def _output_raw_finish(self) -> None:
1198
+ for stream, output_raw in self._output_raw_streams.items():
1199
+ output_raw._stopped.set()
1200
+
1201
+ # shut down threads
1202
+ output_raw._writer_thr.join(timeout=5)
1203
+ if output_raw._writer_thr.is_alive():
1204
+ logger.info("processing output...")
1205
+ output_raw._writer_thr.join()
1206
+ output_raw._reader_thr.join()
1207
+
1208
+ # flush output buffers and files
1209
+ self._output_raw_flush(stream)
1210
+ self._output_raw_streams = {}
1211
+ if self._output_raw_file:
1212
+ self._output_raw_file.close()
1213
+ self._output_raw_file = None
1214
+
1215
+ def _output_raw_writer_thread(self, stream: "StreamLiterals") -> None:
1216
+ while True:
1217
+ output_raw = self._output_raw_streams[stream]
1218
+ if output_raw._queue.empty():
1219
+ if output_raw._stopped.is_set():
1220
+ return
1221
+ time.sleep(0.5)
1222
+ continue
1223
+ data = []
1224
+ while not output_raw._queue.empty():
1225
+ data.append(output_raw._queue.get())
1226
+ if output_raw._stopped.is_set() and sum(map(len, data)) > 100000:
1227
+ logger.warning("Terminal output too large. Logging without processing.")
1228
+ self._output_raw_flush(stream)
1229
+ for line in data:
1230
+ self._output_raw_flush(stream, line)
1231
+ # TODO: lets mark that this happened in telemetry
1232
+ return
1233
+ try:
1234
+ output_raw._emulator.write("".join(data))
1235
+ except Exception as e:
1236
+ logger.warning(f"problem writing to output_raw emulator: {e}")
1237
+
1238
+ def _output_raw_reader_thread(self, stream: "StreamLiterals") -> None:
1239
+ output_raw = self._output_raw_streams[stream]
1240
+ while not (output_raw._stopped.is_set() and output_raw._queue.empty()):
1241
+ self._output_raw_flush(stream)
1242
+ time.sleep(_OUTPUT_MIN_CALLBACK_INTERVAL)
1243
+
1244
+ def _output_raw_flush(
1245
+ self, stream: "StreamLiterals", data: Optional[str] = None
1246
+ ) -> None:
1247
+ if data is None:
1248
+ output_raw = self._output_raw_streams[stream]
1249
+ try:
1250
+ data = output_raw._emulator.read()
1251
+ except Exception as e:
1252
+ logger.warning(f"problem reading from output_raw emulator: {e}")
1253
+ if data:
1254
+ self._send_output_line(stream, data)
1255
+ if self._output_raw_file:
1256
+ self._output_raw_file.write(data.encode("utf-8"))
1257
+
1258
+ def send_request_python_packages(self, record: "Record") -> None:
1259
+ import os
1260
+
1261
+ from wandb.sdk.lib.filenames import REQUIREMENTS_FNAME
1262
+
1263
+ installed_packages_list = sorted(
1264
+ f"{r.name}=={r.version}" for r in record.request.python_packages.package
1265
+ )
1266
+ with open(os.path.join(self._settings.files_dir, REQUIREMENTS_FNAME), "w") as f:
1267
+ f.write("\n".join(installed_packages_list))
1268
+
1269
+ def send_output(self, record: "Record") -> None:
1270
+ if not self._fs:
1271
+ return
1272
+ out = record.output
1273
+ stream: StreamLiterals = "stdout"
1274
+ if out.output_type == wandb_internal_pb2.OutputRecord.OutputType.STDERR:
1275
+ stream = "stderr"
1276
+ line = out.line
1277
+ self._send_output_line(stream, line)
1278
+
1279
+ def send_output_raw(self, record: "Record") -> None:
1280
+ if not self._fs:
1281
+ return
1282
+ out = record.output_raw
1283
+ stream: StreamLiterals = "stdout"
1284
+ if out.output_type == wandb_internal_pb2.OutputRawRecord.OutputType.STDERR:
1285
+ stream = "stderr"
1286
+ line = out.line
1287
+
1288
+ output_raw = self._output_raw_streams.get(stream)
1289
+ if not output_raw:
1290
+ output_raw = _OutputRawStream(stream=stream, sm=self)
1291
+ self._output_raw_streams[stream] = output_raw
1292
+
1293
+ # open the console output file shared between both streams
1294
+ if not self._output_raw_file:
1295
+ output_log_path = os.path.join(
1296
+ self._settings.files_dir, filenames.OUTPUT_FNAME
1297
+ )
1298
+ output_raw_file = None
1299
+ try:
1300
+ output_raw_file = filesystem.CRDedupedFile(
1301
+ open(output_log_path, "wb")
1302
+ )
1303
+ except OSError as e:
1304
+ logger.warning(f"could not open output_raw_file: {e}")
1305
+ if output_raw_file:
1306
+ self._output_raw_file = output_raw_file
1307
+ output_raw.start()
1308
+
1309
+ output_raw._queue.put(line)
1310
+
1311
+ def _send_output_line(self, stream: "StreamLiterals", line: str) -> None:
1312
+ """Combined writer for raw and non raw output lines.
1313
+
1314
+ This is combined because they are both post emulator.
1315
+ """
1316
+ prepend = ""
1317
+ if stream == "stderr":
1318
+ prepend = "ERROR "
1319
+ if not line.endswith("\n"):
1320
+ self._partial_output.setdefault(stream, "")
1321
+ if line.startswith("\r"):
1322
+ # TODO: maybe we shouldn't just drop this, what if there was some \ns in the partial
1323
+ # that should probably be the check instead of not line.endswith(\n")
1324
+ # logger.info(f"Dropping data {self._partial_output[stream]}")
1325
+ self._partial_output[stream] = ""
1326
+ self._partial_output[stream] += line
1327
+ # TODO(jhr): how do we make sure this gets flushed?
1328
+ # we might need this for other stuff like telemetry
1329
+ else:
1330
+ # TODO(jhr): use time from timestamp proto
1331
+ # TODO(jhr): do we need to make sure we write full lines?
1332
+ # seems to be some issues with line breaks
1333
+ cur_time = time.time()
1334
+ timestamp = datetime.utcfromtimestamp(cur_time).isoformat() + " "
1335
+ prev_str = self._partial_output.get(stream, "")
1336
+ line = f"{prepend}{timestamp}{prev_str}{line}"
1337
+ if self._fs:
1338
+ self._fs.push(filenames.OUTPUT_FNAME, line)
1339
+ self._partial_output[stream] = ""
1340
+
1341
+ def _update_config(self) -> None:
1342
+ self._config_needs_debounce = True
1343
+
1344
+ def send_config(self, record: "Record") -> None:
1345
+ self._consolidated_config.update_from_proto(record.config)
1346
+ self._update_config()
1347
+
1348
+ def send_metric(self, record: "Record") -> None:
1349
+ metric = record.metric
1350
+ if metric.glob_name:
1351
+ logger.warning("Seen metric with glob (shouldn't happen)")
1352
+ return
1353
+
1354
+ # merge or overwrite
1355
+ old_metric = self._config_metric_dict.get(
1356
+ metric.name, wandb_internal_pb2.MetricRecord()
1357
+ )
1358
+ if metric._control.overwrite:
1359
+ old_metric.CopyFrom(metric)
1360
+ else:
1361
+ old_metric.MergeFrom(metric)
1362
+ self._config_metric_dict[metric.name] = old_metric
1363
+ metric = old_metric
1364
+
1365
+ # convert step_metric to index
1366
+ if metric.step_metric:
1367
+ find_step_idx = self._config_metric_index_dict.get(metric.step_metric)
1368
+ if find_step_idx is not None:
1369
+ # make a copy of this metric as we will be modifying it
1370
+ rec = wandb_internal_pb2.Record()
1371
+ rec.metric.CopyFrom(metric)
1372
+ metric = rec.metric
1373
+
1374
+ metric.ClearField("step_metric")
1375
+ metric.step_metric_index = find_step_idx + 1
1376
+
1377
+ md: Dict[int, Any] = proto_util.proto_encode_to_dict(metric)
1378
+ find_idx = self._config_metric_index_dict.get(metric.name)
1379
+ if find_idx is not None:
1380
+ self._config_metric_pbdict_list[find_idx] = md
1381
+ else:
1382
+ next_idx = len(self._config_metric_pbdict_list)
1383
+ self._config_metric_pbdict_list.append(md)
1384
+ self._config_metric_index_dict[metric.name] = next_idx
1385
+ self._debounce_config()
1386
+
1387
+ def _update_telemetry_record(self, telemetry: telemetry.TelemetryRecord) -> None:
1388
+ self._telemetry_obj.MergeFrom(telemetry)
1389
+ self._debounce_config()
1390
+
1391
+ def send_telemetry(self, record: "Record") -> None:
1392
+ self._update_telemetry_record(record.telemetry)
1393
+
1394
+ def send_request_telemetry_record(self, record: "Record") -> None:
1395
+ self._update_telemetry_record(record.request.telemetry_record.telemetry)
1396
+
1397
+ def _save_file(
1398
+ self, fname: interface.GlobStr, policy: "interface.PolicyName" = "end"
1399
+ ) -> None:
1400
+ logger.info("saving file %s with policy %s", fname, policy)
1401
+ if self._dir_watcher:
1402
+ self._dir_watcher.update_policy(fname, policy)
1403
+
1404
+ def send_files(self, record: "Record") -> None:
1405
+ files = record.files
1406
+ for k in files.files:
1407
+ # TODO(jhr): fix paths with directories
1408
+ self._save_file(
1409
+ interface.GlobStr(glob.escape(k.path)),
1410
+ interface.file_enum_to_policy(k.policy),
1411
+ )
1412
+
1413
+ def send_header(self, record: "Record") -> None:
1414
+ pass
1415
+
1416
+ def send_footer(self, record: "Record") -> None:
1417
+ pass
1418
+
1419
+ def send_tbrecord(self, record: "Record") -> None:
1420
+ # tbrecord watching threads are handled by handler.py
1421
+ pass
1422
+
1423
+ def _update_environment_record(self, environment: "EnvironmentRecord") -> None:
1424
+ self._environment_obj.MergeFrom(environment)
1425
+ self._debounce_config()
1426
+
1427
+ def send_environment(self, record: "Record") -> None:
1428
+ """Inject environment info into config and upload as a JSON file."""
1429
+ self._update_environment_record(record.environment)
1430
+
1431
+ environment_json = json.dumps(proto_util.message_to_dict(self._environment_obj))
1432
+
1433
+ with open(
1434
+ os.path.join(self._settings.files_dir, filenames.METADATA_FNAME), "w"
1435
+ ) as f:
1436
+ f.write(environment_json)
1437
+
1438
+ self._save_file(interface.GlobStr(filenames.METADATA_FNAME), policy="now")
1439
+
1440
+ def send_request_link_artifact(self, record: "Record") -> None:
1441
+ if not (record.control.req_resp or record.control.mailbox_slot):
1442
+ raise ValueError(
1443
+ f"Expected either `req_resp` or `mailbox_slot`, got: {record.control!r}"
1444
+ )
1445
+ result = proto_util._result_from_record(record)
1446
+ link = record.request.link_artifact
1447
+ client_id = link.client_id
1448
+ server_id = link.server_id
1449
+ portfolio_name = link.portfolio_name
1450
+ entity = link.portfolio_entity
1451
+ project = link.portfolio_project
1452
+ aliases = link.portfolio_aliases
1453
+ organization = link.portfolio_organization
1454
+ logger.debug(
1455
+ f"link_artifact params - client_id={client_id}, server_id={server_id}, "
1456
+ f"portfolio_name={portfolio_name}, entity={entity}, project={project}, "
1457
+ f"organization={organization}"
1458
+ )
1459
+ if (client_id or server_id) and portfolio_name and entity and project:
1460
+ try:
1461
+ response = self._api.link_artifact(
1462
+ client_id,
1463
+ server_id,
1464
+ portfolio_name,
1465
+ entity,
1466
+ project,
1467
+ aliases,
1468
+ organization,
1469
+ )
1470
+ result.response.link_artifact_response.version_index = response[
1471
+ "versionIndex"
1472
+ ]
1473
+ except Exception as e:
1474
+ org_or_entity = organization or entity
1475
+ result.response.link_artifact_response.error_message = (
1476
+ f"error linking artifact to "
1477
+ f'"{org_or_entity}/{project}/{portfolio_name}"; error: {e}'
1478
+ )
1479
+ logger.warning("Failed to link artifact to portfolio: %s", e)
1480
+ self._respond_result(result)
1481
+
1482
+ def send_use_artifact(self, record: "Record") -> None:
1483
+ """Pretend to send a used artifact.
1484
+
1485
+ This function doesn't actually send anything, it is just used internally.
1486
+ """
1487
+ use = record.use_artifact
1488
+
1489
+ if use.type == "job" and not use.partial.job_name:
1490
+ self._job_builder.disable = True
1491
+ elif use.partial.job_name:
1492
+ # job is partial, let job builder rebuild job, set job source dict
1493
+ self._job_builder.set_partial_source_id(use.id)
1494
+
1495
+ def send_request_log_artifact(self, record: "Record") -> None:
1496
+ result = proto_util._result_from_record(record)
1497
+ artifact = record.request.log_artifact.artifact
1498
+ history_step = record.request.log_artifact.history_step
1499
+
1500
+ try:
1501
+ res = self._send_artifact(artifact, history_step)
1502
+ assert res, "Unable to send artifact"
1503
+ result.response.log_artifact_response.artifact_id = res["id"]
1504
+ logger.info(f"logged artifact {artifact.name} - {res}")
1505
+ except Exception as e:
1506
+ result.response.log_artifact_response.error_message = (
1507
+ f'error logging artifact "{artifact.type}/{artifact.name}": {e}'
1508
+ )
1509
+
1510
+ self._respond_result(result)
1511
+
1512
+ def send_artifact(self, record: "Record") -> None:
1513
+ artifact = record.artifact
1514
+ try:
1515
+ res = self._send_artifact(artifact)
1516
+ logger.info(f"sent artifact {artifact.name} - {res}")
1517
+ except Exception:
1518
+ logger.exception(
1519
+ f'send_artifact: failed for artifact "{artifact.type}/{artifact.name}"'
1520
+ )
1521
+
1522
+ def _send_artifact(
1523
+ self, artifact: "ArtifactRecord", history_step: Optional[int] = None
1524
+ ) -> Optional[Dict]:
1525
+ from packaging.version import parse
1526
+
1527
+ assert self._pusher
1528
+ saver = ArtifactSaver(
1529
+ api=self._api,
1530
+ digest=artifact.digest,
1531
+ manifest_json=_manifest_json_from_proto(artifact.manifest),
1532
+ file_pusher=self._pusher,
1533
+ is_user_created=artifact.user_created,
1534
+ )
1535
+
1536
+ if artifact.distributed_id:
1537
+ max_cli_version = self._max_cli_version()
1538
+ if max_cli_version is None or parse(max_cli_version) < parse("0.10.16"):
1539
+ logger.warning(
1540
+ "This W&B Server doesn't support distributed artifacts, "
1541
+ "have your administrator install wandb/local >= 0.9.37"
1542
+ )
1543
+ return None
1544
+
1545
+ metadata = json.loads(artifact.metadata) if artifact.metadata else None
1546
+ res = saver.save(
1547
+ entity=artifact.entity,
1548
+ project=artifact.project,
1549
+ type=artifact.type,
1550
+ name=artifact.name,
1551
+ client_id=artifact.client_id,
1552
+ sequence_client_id=artifact.sequence_client_id,
1553
+ metadata=metadata,
1554
+ ttl_duration_seconds=artifact.ttl_duration_seconds or None,
1555
+ description=artifact.description or None,
1556
+ aliases=artifact.aliases,
1557
+ tags=artifact.tags,
1558
+ use_after_commit=artifact.use_after_commit,
1559
+ distributed_id=artifact.distributed_id,
1560
+ finalize=artifact.finalize,
1561
+ incremental=artifact.incremental_beta1,
1562
+ history_step=history_step,
1563
+ base_id=artifact.base_id or None,
1564
+ )
1565
+
1566
+ self._job_builder._handle_server_artifact(res, artifact)
1567
+
1568
+ if artifact.manifest.manifest_file_path:
1569
+ with contextlib.suppress(FileNotFoundError):
1570
+ os.remove(artifact.manifest.manifest_file_path)
1571
+ return res
1572
+
1573
+ def send_alert(self, record: "Record") -> None:
1574
+ from packaging.version import parse
1575
+
1576
+ alert = record.alert
1577
+ max_cli_version = self._max_cli_version()
1578
+ if max_cli_version is None or parse(max_cli_version) < parse("0.10.9"):
1579
+ logger.warning(
1580
+ "This W&B server doesn't support alerts, "
1581
+ "have your administrator install wandb/local >= 0.9.31"
1582
+ )
1583
+ else:
1584
+ try:
1585
+ self._api.notify_scriptable_run_alert(
1586
+ title=alert.title,
1587
+ text=alert.text,
1588
+ level=alert.level,
1589
+ wait_duration=alert.wait_duration,
1590
+ )
1591
+ except Exception:
1592
+ logger.exception(f"send_alert: failed for alert {alert.title!r}")
1593
+
1594
+ def finish(self) -> None:
1595
+ logger.info("shutting down sender")
1596
+ # if self._tb_watcher:
1597
+ # self._tb_watcher.finish()
1598
+ self._output_raw_finish()
1599
+ if self._dir_watcher:
1600
+ self._dir_watcher.finish()
1601
+ self._dir_watcher = None
1602
+ if self._pusher:
1603
+ self._pusher.finish()
1604
+ self._pusher.join()
1605
+ self._pusher = None
1606
+ if self._fs:
1607
+ self._fs.finish(self._exit_code)
1608
+ self._fs = None
1609
+ wandb._sentry.end_session()
1610
+
1611
+ def _max_cli_version(self) -> Optional[str]:
1612
+ server_info = self.get_server_info()
1613
+ max_cli_version = server_info.get("cliVersionInfo", {}).get(
1614
+ "max_cli_version", None
1615
+ )
1616
+ if not isinstance(max_cli_version, str):
1617
+ return None
1618
+ return max_cli_version
1619
+
1620
+ def get_viewer_server_info(self) -> None:
1621
+ if self._cached_server_info and self._cached_viewer:
1622
+ return
1623
+ self._cached_viewer, self._cached_server_info = self._api.viewer_server_info()
1624
+
1625
+ def get_viewer_info(self) -> Dict[str, Any]:
1626
+ if not self._cached_viewer:
1627
+ self.get_viewer_server_info()
1628
+ return self._cached_viewer
1629
+
1630
+ def get_server_info(self) -> Dict[str, Any]:
1631
+ if not self._cached_server_info:
1632
+ self.get_viewer_server_info()
1633
+ return self._cached_server_info
1634
+
1635
+ def get_local_info(self) -> "LocalInfo":
1636
+ """Queries the server to get the local version information.
1637
+
1638
+ First, we perform an introspection, if it returns empty we deduce that the
1639
+ docker image is out-of-date. Otherwise, we use the returned values to deduce the
1640
+ state of the local server.
1641
+ """
1642
+ local_info = wandb_internal_pb2.LocalInfo()
1643
+ if self._settings._offline:
1644
+ local_info.out_of_date = False
1645
+ return local_info
1646
+
1647
+ latest_local_version = "latest"
1648
+
1649
+ # Assuming the query is successful if the result is empty it indicates that
1650
+ # the backend is out of date since it doesn't have the desired field
1651
+ server_info = self.get_server_info()
1652
+ latest_local_version_info = server_info.get("latestLocalVersionInfo", {})
1653
+ if latest_local_version_info is None:
1654
+ local_info.out_of_date = False
1655
+ else:
1656
+ local_info.out_of_date = latest_local_version_info.get("outOfDate", True)
1657
+ local_info.version = latest_local_version_info.get(
1658
+ "latestVersionString", latest_local_version
1659
+ )
1660
+ return local_info
1661
+
1662
+ def _flush_job(self) -> None:
1663
+ if self._job_builder.disable or self._settings._offline:
1664
+ return
1665
+ self._job_builder.set_config(self._consolidated_config.non_internal_config())
1666
+ summary_dict = self._cached_summary.copy()
1667
+ summary_dict.pop("_wandb", None)
1668
+ self._job_builder.set_summary(summary_dict)
1669
+
1670
+ artifact = self._job_builder.build(api=self._api)
1671
+ if artifact is not None and self._run is not None:
1672
+ proto_artifact = self._interface._make_artifact(artifact)
1673
+ proto_artifact.run_id = self._run.run_id
1674
+ proto_artifact.project = self._run.project
1675
+ proto_artifact.entity = self._run.entity
1676
+ # TODO: this should be removed when the latest tag is handled
1677
+ # by the backend (WB-12116)
1678
+ proto_artifact.aliases.append("latest")
1679
+ # add docker image tag
1680
+ for alias in self._job_builder._aliases:
1681
+ proto_artifact.aliases.append(alias)
1682
+
1683
+ proto_artifact.user_created = True
1684
+ proto_artifact.use_after_commit = True
1685
+ proto_artifact.finalize = True
1686
+
1687
+ self._interface._publish_artifact(proto_artifact)
1688
+
1689
+ def __next__(self) -> "Record":
1690
+ return self._record_q.get(block=True)
1691
+
1692
+ next = __next__