workbench 0.8.198__py3-none-any.whl → 0.8.203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. workbench/algorithms/dataframe/proximity.py +11 -4
  2. workbench/api/__init__.py +2 -1
  3. workbench/api/df_store.py +17 -108
  4. workbench/api/feature_set.py +48 -11
  5. workbench/api/model.py +1 -1
  6. workbench/api/parameter_store.py +3 -52
  7. workbench/core/artifacts/__init__.py +11 -2
  8. workbench/core/artifacts/artifact.py +5 -5
  9. workbench/core/artifacts/df_store_core.py +114 -0
  10. workbench/core/artifacts/endpoint_core.py +261 -78
  11. workbench/core/artifacts/feature_set_core.py +69 -1
  12. workbench/core/artifacts/model_core.py +48 -14
  13. workbench/core/artifacts/parameter_store_core.py +98 -0
  14. workbench/core/transforms/features_to_model/features_to_model.py +50 -33
  15. workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
  16. workbench/core/views/view.py +2 -2
  17. workbench/model_scripts/chemprop/chemprop.template +933 -0
  18. workbench/model_scripts/chemprop/generated_model_script.py +933 -0
  19. workbench/model_scripts/chemprop/requirements.txt +11 -0
  20. workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
  21. workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
  22. workbench/model_scripts/custom_models/proximity/proximity.py +11 -4
  23. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +11 -5
  24. workbench/model_scripts/custom_models/uq_models/meta_uq.template +11 -5
  25. workbench/model_scripts/custom_models/uq_models/ngboost.template +11 -5
  26. workbench/model_scripts/custom_models/uq_models/proximity.py +11 -4
  27. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +11 -5
  28. workbench/model_scripts/pytorch_model/generated_model_script.py +365 -173
  29. workbench/model_scripts/pytorch_model/pytorch.template +362 -170
  30. workbench/model_scripts/scikit_learn/generated_model_script.py +302 -0
  31. workbench/model_scripts/script_generation.py +10 -7
  32. workbench/model_scripts/uq_models/generated_model_script.py +43 -27
  33. workbench/model_scripts/uq_models/mapie.template +40 -24
  34. workbench/model_scripts/xgb_model/generated_model_script.py +36 -7
  35. workbench/model_scripts/xgb_model/xgb_model.template +36 -7
  36. workbench/repl/workbench_shell.py +14 -5
  37. workbench/resources/open_source_api.key +1 -1
  38. workbench/scripts/endpoint_test.py +162 -0
  39. workbench/scripts/{lambda_launcher.py → lambda_test.py} +10 -0
  40. workbench/utils/chemprop_utils.py +761 -0
  41. workbench/utils/pytorch_utils.py +527 -0
  42. workbench/utils/xgboost_model_utils.py +10 -5
  43. workbench/web_interface/components/model_plot.py +7 -1
  44. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/METADATA +3 -3
  45. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/RECORD +49 -43
  46. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/entry_points.txt +2 -1
  47. workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
  48. workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
  49. workbench/model_scripts/__pycache__/script_generation.cpython-312.pyc +0 -0
  50. workbench/model_scripts/__pycache__/script_generation.cpython-313.pyc +0 -0
  51. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/WHEEL +0 -0
  52. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/licenses/LICENSE +0 -0
  53. {workbench-0.8.198.dist-info → workbench-0.8.203.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ workbench/algorithms/dataframe/data_source_eda.py,sha256=WgVL6tzBCw1tznQr8RQ6daQ
6
6
  workbench/algorithms/dataframe/feature_space_proximity.py,sha256=6RxzvbpLdDkHMm1D49Nv59SFcyYUj8bisd6_5EpBEGI,3515
7
7
  workbench/algorithms/dataframe/fingerprint_proximity.py,sha256=nGxfmYQ3bfMtvs90s4p7gaY9DN4gijdDU7R6B2lRHgo,5825
8
8
  workbench/algorithms/dataframe/projection_2d.py,sha256=zK4hc0OQrySmfcfFg8y0GxEL34uDNqvZL4OgttB9vRs,7834
9
- workbench/algorithms/dataframe/proximity.py,sha256=MYVkQfn-pqXCm25dwiXaBDQngtBaN8lM8yeILJAstjY,15468
9
+ workbench/algorithms/dataframe/proximity.py,sha256=dPTYD1N-JTIqg6iL7ak_JSouaCdfmBPjG08IRRvTLXU,15836
10
10
  workbench/algorithms/dataframe/storage/aggregation.py,sha256=VuTb7A6Vh6IS5djZeItvOLnnEOlf7tzMQ8OaYIuftvU,2852
11
11
  workbench/algorithms/dataframe/storage/feature_resolution.py,sha256=w_iLf8EFTg7Jc5laH-bsq8MEtZVqcg05W-GihCqR-r4,9450
12
12
  workbench/algorithms/dataframe/storage/feature_spider.py,sha256=uIZ4JHIKuhpy08wBFReSrohb5DGxx8vGroHUbjPm1jE,14353
@@ -27,17 +27,17 @@ workbench/algorithms/sql/descriptive_stats.py,sha256=VxSR5zQi8NmAWrJvOCO3wrmgVHY
27
27
  workbench/algorithms/sql/outliers.py,sha256=2hoilOk0gaz9pwrnGEBY2y7M-UqFED3KO_mFm_0_3ac,10645
28
28
  workbench/algorithms/sql/sample_rows.py,sha256=SRYoGb24QP_iPvOoW9bGZ95yZuseYDtyoNhilfoLu34,2688
29
29
  workbench/algorithms/sql/value_counts.py,sha256=F-rZoLTTKv1cHYl2_tDlvWDjczy76uLTr3EMHa-WrEk,3340
30
- workbench/api/__init__.py,sha256=kvrP70ypDOMdPGj_Eeftdh8J0lu_1qQVne6GXMkD4_E,1027
30
+ workbench/api/__init__.py,sha256=KDKzFb4SL8AArtd9ucTkFYdCxbsBMbK1ZMkj0G2rACY,1065
31
31
  workbench/api/compound.py,sha256=kf5EaM5qjWwsZutcxqj9IC_MPnDV1uVHDMns9OA_GOo,2545
32
32
  workbench/api/data_source.py,sha256=Ngz36YZWxFfpJbmURhM1LQPYjh5kdpZNGo6_fCRePbA,8321
33
- workbench/api/df_store.py,sha256=Wybb3zO-jPpAi2Ns8Ks1-lagvXAaBlRpBZHhnnl3Lms,6131
33
+ workbench/api/df_store.py,sha256=1qSYM3Xb4MwMMTMaF3CX0hOCEzhIbnra5Deivg4cryk,3014
34
34
  workbench/api/endpoint.py,sha256=spLse2UoAsZdu_ZxmAvMJX_aX-zutAsQ5_SPm9Xt-nA,3839
35
- workbench/api/feature_set.py,sha256=Yxei3tvWR4gSLcdJnNndux07dNeKNu1HKgsChJtHxEM,6633
35
+ workbench/api/feature_set.py,sha256=K0Sl59yAf_qZr8EH4rPjDotezCwP5Q7aG38FGaF4zi0,8062
36
36
  workbench/api/graph_store.py,sha256=LremJyPrQFgsHb7hxsctuCsoxx3p7TKtaY5qALHe6pc,4372
37
37
  workbench/api/meta.py,sha256=1_9989cPvf3hd3tA-83hLijOGNnhwXAF8aZF45adeDQ,8596
38
- workbench/api/model.py,sha256=apDRhbnbQvSu-krQJm0zQWvbKNs7DUcslMwLKq4edHk,4442
38
+ workbench/api/model.py,sha256=fncUc8MJwXyteKeXOlAy5IMjE48sH_VmDBi3P2MPGG4,4458
39
39
  workbench/api/monitor.py,sha256=Cez89Uac7Tzt47FxkjoX-YDGccEhvBcxw3sZFtw4ud8,4506
40
- workbench/api/parameter_store.py,sha256=7BObkuATuP6C5AG_46kCWsmuCwuh1vgMJDBSN0gTkwM,4294
40
+ workbench/api/parameter_store.py,sha256=_3MmPxKiVy7_OIgCSRlUv9xbk8nuiOWiCtZgT-AxN1k,2574
41
41
  workbench/api/pipeline.py,sha256=MSYGrDSXrRB_oQELtAlOwBfxSBTw3REAkHy5XBHau0Y,6261
42
42
  workbench/cached/__init__.py,sha256=wvTyIFvusv2HjU3yop6OSr3js5_-SZuR8nPmlCuZQJ4,525
43
43
  workbench/cached/cached_data_source.py,sha256=A0o4H9g1aEms8HkOHWnb46vJ5fx6ebs1aCYaQcf8gPI,2649
@@ -47,24 +47,24 @@ workbench/cached/cached_meta.py,sha256=DTlnb6jblviVmSg9w0F6LRVIuQ_lWBNqGh8vqKP5B
47
47
  workbench/cached/cached_model.py,sha256=iMc_fySUE5qau3feduVXMNb24JY0sBjt1g6WeLLciXc,4348
48
48
  workbench/cached/cached_pipeline.py,sha256=QOVnEKu5RbIdlNpJUi-0Ebh0_-C68RigSPwKh4dvZTM,1948
49
49
  workbench/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- workbench/core/artifacts/__init__.py,sha256=ps7rA_rbWnDbvWbg4kvu--IKMY8WmbPRyv4Si0xub1Q,965
51
- workbench/core/artifacts/artifact.py,sha256=WFGC1F61d7uFSRB7UTWYOF8O_wk8F9rn__THJL2veLM,17752
50
+ workbench/core/artifacts/__init__.py,sha256=ukcgbYlI9m99bzwaBNO01K1h0-cQkzsbh_jT_GyQ-LY,1034
51
+ workbench/core/artifacts/artifact.py,sha256=scWUbX2Sk1rxT8VEm_Z7YTxbOzkDASNyqqXB56xLZ2w,17721
52
52
  workbench/core/artifacts/athena_source.py,sha256=RNmCe7s6uH4gVHpcdJcL84aSbF5Q1ahJBLLGwHYRXEU,26081
53
53
  workbench/core/artifacts/cached_artifact_mixin.py,sha256=ngqFLZ4cQx_TFouXZgXZQsv_7W6XCvxVGXXSfzzaft8,3775
54
54
  workbench/core/artifacts/data_capture_core.py,sha256=q8f79rRTYiZ7T4IQRWXl8ZvPpcvZyNxYERwvo8o0OQc,14858
55
55
  workbench/core/artifacts/data_source_abstract.py,sha256=5IRCzFVK-17cd4NXPMRfx99vQAmQ0WHE5jcm5RfsVTg,10619
56
56
  workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rGgz-6oo3ge4i_YXVC-M,2380
57
- workbench/core/artifacts/endpoint_core.py,sha256=3e7GOWviFvVKU32bKzHGaDAk-wWxuglnZ0nPEXB0LXE,52200
58
- workbench/core/artifacts/feature_set_core.py,sha256=-4_FR4lLGHoeOkPRsAliEubVqZgai2RJJr1qJMo9Wao,36645
59
- workbench/core/artifacts/model_core.py,sha256=9UBuIm0xjFZlIlJ5YX945y5tGaAYz0YzlT5LWk_oG98,51156
57
+ workbench/core/artifacts/df_store_core.py,sha256=AueNr_JvuLLu_ByE7cb3u-isH9u0Q7cMP-UCgCX-Ctg,3536
58
+ workbench/core/artifacts/endpoint_core.py,sha256=oWWJSXSod5JzI7b4JvoxKWm46lv0FNZZf_FIZR4ZP9Q,60832
59
+ workbench/core/artifacts/feature_set_core.py,sha256=wZy-02WXWmSBet5t8mWXFRdv9O4MtW3hWqJuVv7Kok0,39330
60
+ workbench/core/artifacts/model_core.py,sha256=QIgV5MJr8aDY63in83thdNc5-bzkWLn5f5vvsS4aNYo,52348
60
61
  workbench/core/artifacts/monitor_core.py,sha256=M307yz7tEzOEHgv-LmtVy9jKjSbM98fHW3ckmNYrwlU,27897
62
+ workbench/core/artifacts/parameter_store_core.py,sha256=sHvjJMuybM4qdcKhH-Sx6Ur6Yn5ozA3QHwtidsnhyG8,2867
61
63
  workbench/core/cloud_platform/cloud_meta.py,sha256=-g4-LTC3D0PXb3VfaXdLR1ERijKuHdffeMK_zhD-koQ,8809
62
64
  workbench/core/cloud_platform/aws/README.md,sha256=QT5IQXoUHbIA0qQ2wO6_2P2lYjYQFVYuezc22mWY4i8,97
63
65
  workbench/core/cloud_platform/aws/aws_account_clamp.py,sha256=V5iVsoGvSRilARtTdExnt27QptzAcJaW0s3nm2B8-ow,8286
64
- workbench/core/cloud_platform/aws/aws_df_store.py,sha256=utRIlTCPwFneHHZ8_Z3Hw3rOJSeryiFA4wBtucxULRQ,15055
65
66
  workbench/core/cloud_platform/aws/aws_graph_store.py,sha256=ytYxQTplUmeWbsPmxyZbf6mO9qyTl60ewlJG8MyfyEY,9414
66
67
  workbench/core/cloud_platform/aws/aws_meta.py,sha256=eY9Pn6pl2yAyseACFb2nitR-0vLwG4i8CSEXe8Iaswc,34778
67
- workbench/core/cloud_platform/aws/aws_parameter_store.py,sha256=9ekuMOQFHFMIEV68UbHhS_fLB9iqG5Hvu4EV6iamEpk,10400
68
68
  workbench/core/cloud_platform/aws/aws_secrets_manager.py,sha256=TUnddp1gX-OwxJ_oO5ONh7OI4Z2HC_6euGkJ-himCCk,8615
69
69
  workbench/core/cloud_platform/aws/aws_session.py,sha256=2Gc_k4Q87BBeQDgXgVR-w-qmsF6ncZR8wvTeNnixM6k,6926
70
70
  workbench/core/cloud_platform/aws/cache_dataframe.py,sha256=VnObkVqcjg7v4fegrIkXR1j-K2AHTBpSAoriUXDe12A,2314
@@ -102,14 +102,14 @@ workbench/core/transforms/features_to_features/__init__.py,sha256=47DEQpj8HBSa-_
102
102
  workbench/core/transforms/features_to_features/heavy/emr/Readme.md,sha256=YtQgCEQeKe0CQXQkhzMTYq9xOtCsCYb5P5LW2BmRKWQ,68
103
103
  workbench/core/transforms/features_to_features/heavy/glue/Readme.md,sha256=TuyCatWfoDr99zUwvOcxf-TqMkQzaMqXlj5nmFcRzfo,48
104
104
  workbench/core/transforms/features_to_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
- workbench/core/transforms/features_to_model/features_to_model.py,sha256=cExOKz6lrpSMQk2TgUDhiF9jxzAiOtrKSmasKlWVCO4,20110
105
+ workbench/core/transforms/features_to_model/features_to_model.py,sha256=JdKKz3eKrKhicA1WxTfmb1IqQNCdHJE0CKDs66bLHYU,21071
106
106
  workbench/core/transforms/model_to_endpoint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=TIYXvuK0s383PwJ4iS6fCRhuif6oIxsoWb4CpMGJjY4,6358
108
108
  workbench/core/transforms/pandas_transforms/__init__.py,sha256=xL4MT8-fZ1SFqDbTLc8XyxjupHtB1YR6Ej0AC2nwd7I,894
109
109
  workbench/core/transforms/pandas_transforms/data_to_pandas.py,sha256=sJHPeuNF8Q8aQqgRnkdWkyvur5cbggdUVIwR-xF3Dlo,3621
110
110
  workbench/core/transforms/pandas_transforms/features_to_pandas.py,sha256=af6xdPt2V4zhh-SzQa_UYxdmNMzMLXbrbsznV5QoIJg,3441
111
111
  workbench/core/transforms/pandas_transforms/pandas_to_data.py,sha256=cqo6hQmzUGUFACvNuVLZQdgrlXrQIu4NjqK-ujPmoIc,9181
112
- workbench/core/transforms/pandas_transforms/pandas_to_features.py,sha256=mj00L40PXhw-JHG2SZe53yJAzicgn4xuM2VbmOY-wsM,21480
112
+ workbench/core/transforms/pandas_transforms/pandas_to_features.py,sha256=AqXS4ZND7lg94enclRP9wGBrYm4AmhL3c--q0o-6_JM,21972
113
113
  workbench/core/transforms/pandas_transforms/pandas_to_features_chunked.py,sha256=0R8mQlWfbIlTVmYUmrtu2gsw0AE815k6kqPgpd0bmyQ,4422
114
114
  workbench/core/views/__init__.py,sha256=UZJMAJBCMVM3uSYmnFg8c2LWtdu9-479WNAdVMIohAc,962
115
115
  workbench/core/views/column_subset_view.py,sha256=vGDKTTGrPIY-IFOeWvudJrhKiq0OjWDp5rTuuj-X40U,4261
@@ -119,54 +119,58 @@ workbench/core/views/display_view.py,sha256=9K4O77ZnKOh93aMRhxcQJQ1lqScLhuJnU_tH
119
119
  workbench/core/views/inference_view.py,sha256=9s70M0dFdGq0tWvzMZfgUK7EPKtuvcQhux0uyRZuuLM,3293
120
120
  workbench/core/views/pandas_to_view.py,sha256=20uCsnG2iMh-U1VxqVUUtnrWAY98SeuHjmfJK_wcq1I,6422
121
121
  workbench/core/views/training_view.py,sha256=7HwhbQhDBhT3Zo_gssS-b4eueJ0h9nqqT8YGFSuaEcU,9016
122
- workbench/core/views/view.py,sha256=8pZSVDhOFMnAh49ccvnvjQs0dWpiA5IeHaYrztRcqkM,13532
122
+ workbench/core/views/view.py,sha256=DvmEA1xdvL980GET_cnbmHzqSy6IhlNaZcoQnVTtYis,13534
123
123
  workbench/core/views/view_utils.py,sha256=CwOlpqXpumCr6REi-ey7Qjz5_tpg-s4oWHmlOVu8POQ,12270
124
124
  workbench/core/views/storage/mdq_view.py,sha256=qf_ep1KwaXOIfO930laEwNIiCYP7VNOqjE3VdHfopRE,5195
125
- workbench/model_scripts/script_generation.py,sha256=dLxVRrvrrI_HQatJRAXta6UEbFFbkgITNvDJllQZyCM,7905
126
- workbench/model_scripts/__pycache__/script_generation.cpython-312.pyc,sha256=p3q4RDNMkLKpt4UMrdY94rZcZGgvNcTstS2r5ZIvrSg,7570
127
- workbench/model_scripts/__pycache__/script_generation.cpython-313.pyc,sha256=eodln_BQ1mvfLxE6n1_6WFtMBTKqepwWmXubBk7pL1s,7559
125
+ workbench/model_scripts/script_generation.py,sha256=_AhzM2qzjBuI7pIaXBRZ1YOOs2lwsKQGVM_ovL6T1bo,8135
126
+ workbench/model_scripts/chemprop/chemprop.template,sha256=NR1jMb-IPxBAaQ-KiPR09ylL_gTIC35lZwBpBQPtzig,38109
127
+ workbench/model_scripts/chemprop/generated_model_script.py,sha256=Cxfbu7mNf_HLBCzlsOOXR1u1Y-eHMma63YWM9l8ku44,38206
128
+ workbench/model_scripts/chemprop/requirements.txt,sha256=PIuUdPAeDUH3I2M_5nIrCnCfs3FL1l9V5kzHqgCcu7s,281
128
129
  workbench/model_scripts/custom_models/chem_info/Readme.md,sha256=mH1lxJ4Pb7F5nBnVXaiuxpi8zS_yjUw_LBJepVKXhlA,574
130
+ workbench/model_scripts/custom_models/chem_info/fingerprints.py,sha256=Qvs8jaUwguWUq3Q3j695MY0t0Wk3BvroW-oWBwalMUo,5255
129
131
  workbench/model_scripts/custom_models/chem_info/mol_descriptors.py,sha256=c8gkHZ-8s3HJaW9zN9pnYGK7YVW8Y0xFqQ1G_ysrF2Y,18789
130
132
  workbench/model_scripts/custom_models/chem_info/mol_standardize.py,sha256=qPLCdVMSXMOWN-01O1isg2zq7eQyFAI0SNatHkRq1uw,17524
131
133
  workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py,sha256=xljMjdfh4Idi4v1Afq1zZxvF1SDa7pDOLSAhvGBEj88,2891
132
- workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py,sha256=tMyMmeN1xajVWkqkV5mobYB8CYkzW9FRH8Vi3t81uo8,3231
134
+ workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py,sha256=LqVh_AHObo0uxHt_uNmeemScTLjM2j9C3I_QFJXdmUI,3232
133
135
  workbench/model_scripts/custom_models/chem_info/requirements.txt,sha256=7HBUzvNiM8lOir-UfQabXYlUp3gxdGJ42u18EuSMGjc,39
134
136
  workbench/model_scripts/custom_models/meta_endpoints/example.py,sha256=hzOAuLhIGB8vei-555ruNxpsE1GhuByHGjGB0zw8GSs,1726
135
137
  workbench/model_scripts/custom_models/network_security/Readme.md,sha256=Z2gtiu0hLHvEJ1x-_oFq3qJZcsK81sceBAGAGltpqQ8,222
136
138
  workbench/model_scripts/custom_models/proximity/Readme.md,sha256=RlMFAJZgAT2mCgDk-UwR_R0Y_NbCqeI5-8DUsxsbpWQ,289
137
139
  workbench/model_scripts/custom_models/proximity/feature_space_proximity.template,sha256=eOllmqB20BWtTiV53dgpIqXKtgSbPFDW_zf8PvM3oF0,4813
138
- workbench/model_scripts/custom_models/proximity/proximity.py,sha256=MYVkQfn-pqXCm25dwiXaBDQngtBaN8lM8yeILJAstjY,15468
140
+ workbench/model_scripts/custom_models/proximity/proximity.py,sha256=dPTYD1N-JTIqg6iL7ak_JSouaCdfmBPjG08IRRvTLXU,15836
139
141
  workbench/model_scripts/custom_models/proximity/requirements.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
140
142
  workbench/model_scripts/custom_models/uq_models/Readme.md,sha256=UVpL-lvtTrLqwBeQFinLhd_uNrEw4JUlggIdUSDrd-w,188
141
143
  workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template,sha256=ca3CaAk6HVuNv1HnPgABTzRY3oDrRxomjgD4V1ZDwoc,6448
142
- workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template,sha256=xlKLHeLQkScONnrlbAGIsrCm2wwsvcfv4Vdrw4nlc_8,13457
144
+ workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template,sha256=449Enh4-7RrMrxt1oS_SHJHGV8yYcFlWHsLrCVTFQGI,13778
143
145
  workbench/model_scripts/custom_models/uq_models/gaussian_process.template,sha256=3nMlCi8nEbc4N-MQTzjfIcljfDQkUmWeLBfmd18m5fg,6632
144
- workbench/model_scripts/custom_models/uq_models/meta_uq.template,sha256=XTfhODRaHlI1jZGo9pSe-TqNsk2_nuSw0xMO2fKzDv8,14011
145
- workbench/model_scripts/custom_models/uq_models/ngboost.template,sha256=v1rviYTJGJnQRGgAyveXhOQlS-WFCTlc2vdnWq6HIXk,8241
146
- workbench/model_scripts/custom_models/uq_models/proximity.py,sha256=MYVkQfn-pqXCm25dwiXaBDQngtBaN8lM8yeILJAstjY,15468
146
+ workbench/model_scripts/custom_models/uq_models/meta_uq.template,sha256=RIC90o9iI37ylOOJBUVDVF2FmYs9kJl8AifL-AYIwAI,14282
147
+ workbench/model_scripts/custom_models/uq_models/ngboost.template,sha256=_ukYcsL4pnWvFV1oA89_wfVpxWbvoEx6MGwKxc38kSI,8512
148
+ workbench/model_scripts/custom_models/uq_models/proximity.py,sha256=dPTYD1N-JTIqg6iL7ak_JSouaCdfmBPjG08IRRvTLXU,15836
147
149
  workbench/model_scripts/custom_models/uq_models/requirements.txt,sha256=fw7T7t_YJAXK3T6Ysbesxh_Agx_tv0oYx72cEBTqRDY,98
148
150
  workbench/model_scripts/custom_script_example/custom_model_script.py,sha256=T8aydawgRVAdSlDimoWpXxG2YuWWQkbcjBVjAeSG2_0,6408
149
151
  workbench/model_scripts/custom_script_example/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
150
- workbench/model_scripts/ensemble_xgb/ensemble_xgb.template,sha256=pWmuo-EVz0owvkRI-h9mUTYt1-ouyD-_yyQu6SQbYZ4,10350
152
+ workbench/model_scripts/ensemble_xgb/ensemble_xgb.template,sha256=lMEx0IkawcpTI52gSjCp1Wr0g2vWd4kIGuIqjXhA2GA,10671
151
153
  workbench/model_scripts/ensemble_xgb/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
152
- workbench/model_scripts/pytorch_model/generated_model_script.py,sha256=PG9MbcJrWcOxS9kl4tAZqfCjDKxvZpG4LGbHyPBRsmo,26231
153
- workbench/model_scripts/pytorch_model/pytorch.template,sha256=_gRp6DH294FLxF21UpSTq7s9RFfrLjViKvjXQ4yDfBQ,21999
154
+ workbench/model_scripts/pytorch_model/generated_model_script.py,sha256=nst6kRN8T_LmmDANAaFYSC9GdGQtrDYdVBs4mU1RJ-U,32883
155
+ workbench/model_scripts/pytorch_model/pytorch.template,sha256=PFmGO_jP8S6RKvAzAXiuogkVXYTb5MKajJk_57qQDcc,30718
154
156
  workbench/model_scripts/pytorch_model/requirements.txt,sha256=ICS5nW0wix44EJO2tJszJSaUrSvhSfdedn6FcRInGx4,181
157
+ workbench/model_scripts/scikit_learn/generated_model_script.py,sha256=xhQIglpAgPRCH9iwI3wI0N0V6p9AgqW0mVOMuSXzUCk,17187
155
158
  workbench/model_scripts/scikit_learn/requirements.txt,sha256=aVvwiJ3LgBUhM_PyFlb2gHXu_kpGPho3ANBzlOkfcvs,107
156
159
  workbench/model_scripts/scikit_learn/scikit_learn.template,sha256=QQvqx-eX9ZTbYmyupq6R6vIQwosmsmY_MRBPaHyfjdk,12586
157
- workbench/model_scripts/uq_models/generated_model_script.py,sha256=JiPo_lVUSZ66rF6152nZtZ8h6_bzT9rPkKsmRsf6nCU,25707
158
- workbench/model_scripts/uq_models/mapie.template,sha256=lq_kG9aRE_7_Or_jVfM4M5zkn9A1fEatneKI7_2zLQs,22784
160
+ workbench/model_scripts/uq_models/generated_model_script.py,sha256=caAXcK03XQQcPo2rvFJtZqnwQpLAz7v0CQWBWDO2Dts,27866
161
+ workbench/model_scripts/uq_models/mapie.template,sha256=on3I40D7zyNfvfqBf5k8VXCFtmepcxKmqVWCH5Q9S84,23432
159
162
  workbench/model_scripts/uq_models/requirements.txt,sha256=fw7T7t_YJAXK3T6Ysbesxh_Agx_tv0oYx72cEBTqRDY,98
160
- workbench/model_scripts/xgb_model/generated_model_script.py,sha256=jp4OCWdH_j5dc2ZxUMdlxSumZohbV02w9HWrCRt-8kc,18083
163
+ workbench/model_scripts/xgb_model/generated_model_script.py,sha256=qUGg5R-boaswzXtgKp_J7JPxFzMdRNv51QeF-lMWL-4,19334
161
164
  workbench/model_scripts/xgb_model/requirements.txt,sha256=jWlGc7HH7vqyukTm38LN4EyDi8jDUPEay4n45z-30uc,104
162
- workbench/model_scripts/xgb_model/xgb_model.template,sha256=0uXknIEqgUaIFUfu2gfkxa3WHUr8HBBqBepGUTDvrhQ,17917
165
+ workbench/model_scripts/xgb_model/xgb_model.template,sha256=gOXHsymCZjde6L2LvrlTtMRprJ-mXczpE4ZB8mhZZ0s,19168
163
166
  workbench/repl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
- workbench/repl/workbench_shell.py,sha256=cbFaKaRyAJd79i8m6-3OIVK0f-W_wARwJbjxhqC11_s,22135
165
- workbench/resources/open_source_api.key,sha256=3S0OTblsmC0msUPdE_dbBmI83xJNmYscuwLJ57JmuOc,433
167
+ workbench/repl/workbench_shell.py,sha256=__FOnBqe3I6Luzb-N9mAecOUfcPEkctzxBfJSKTqDDA,22504
168
+ workbench/resources/open_source_api.key,sha256=vi9099CjkNnZ1IXB6AQWcG83iFYn2db0iTfTlpGVA1o,432
166
169
  workbench/resources/signature_verify_pub.pem,sha256=V3-u-3_z2PH-805ybkKvzDOBwAbvHxcKn0jLBImEtzM,272
167
170
  workbench/scripts/check_double_bond_stereo.py,sha256=p5hnL54Weq77ES0HCELq9JeoM-PyUGkvVSeWYF2dKyo,7776
171
+ workbench/scripts/endpoint_test.py,sha256=G4GdQMa7KlKX7WiUSFX_OHAzDdCyf8ZbVYbZBkAPiSo,5339
168
172
  workbench/scripts/glue_launcher.py,sha256=bIKQvfGxpAhzbeNvTnHfRW_5kQhY-169_868ZnCejJk,10692
169
- workbench/scripts/lambda_launcher.py,sha256=8-zzsyuzxI8qX66DGpJ1fCJVU0QCfTGdaPqBsJXPr-M,1877
173
+ workbench/scripts/lambda_test.py,sha256=SLAPIXeGQn82neQ6-Hif3VS3LWLwT0-dGw8yWw2aXRQ,2077
170
174
  workbench/scripts/ml_pipeline_batch.py,sha256=1T5JnLlUJR7bwAGBLHmLPOuj1xFRqVIQX8PsuDhHy8o,4907
171
175
  workbench/scripts/ml_pipeline_sqs.py,sha256=5c8qX-SoV4htOUcSXk4OzD7BQskCnaA7cLMiF4Et24c,6666
172
176
  workbench/scripts/monitor_cloud_watch.py,sha256=s7MY4bsHts0nup9G0lWESCvgJZ9Mw1Eo-c8aKRgLjMw,9235
@@ -198,6 +202,7 @@ workbench/utils/athena_utils.py,sha256=DDyLhJujzh1PfejtGU7ZzOf5hLPOgoXmi4Lrn-_AJ
198
202
  workbench/utils/aws_utils.py,sha256=x8c_WxtdSKmBqNg8P_Z6K2m4AsSMEiD_kh2nVaUZ28c,22077
199
203
  workbench/utils/bulk_utils.py,sha256=s1lYN2Uk536MNGetekLYL_VL0N34hUjk1FX9BAz3Qu0,1182
200
204
  workbench/utils/cache.py,sha256=0R5RXYEz_XHARK3anmQC4VRMawMks_cJ8S4vwC2roAE,5524
205
+ workbench/utils/chemprop_utils.py,sha256=0eszF9K2DYB5bOxbWSomr9SuX3QANdF7ROmWa0tikzY,28805
201
206
  workbench/utils/cloudwatch_handler.py,sha256=t0L280Qa1nMq95dwnf8lB5g8FHrQAyGY5S4JwP3yIa8,5165
202
207
  workbench/utils/cloudwatch_utils.py,sha256=wXSqKcJlSnHyC0D6d4RsH8wwmx_0CsffcetUgXlZ_78,4828
203
208
  workbench/utils/color_utils.py,sha256=TmDGLK44t975lkfjt_1O-ee02QxrKfke7vPuXb-V-Uo,11779
@@ -226,6 +231,7 @@ workbench/utils/pipeline_utils.py,sha256=yzR5tgAzz6zNqvxzZR6YqsbS7r3QDKzBXozaM_A
226
231
  workbench/utils/plot_utils.py,sha256=yFveic-4aY7lKT-CPhYdbIkBr-mZqjbhaRmCySWG_kE,6537
227
232
  workbench/utils/plugin_manager.py,sha256=JWfyFHQih_J_MMtAT1cgjGVnNVPk9bM917LkfH8Z-_A,13873
228
233
  workbench/utils/prox_utils.py,sha256=V0YSxI6lboZl8Bed1GUobFqfMhfpehn2FtgqHpkuhDQ,6170
234
+ workbench/utils/pytorch_utils.py,sha256=ig91xlAaWaCp06N4Ml2yoteDQGMJkAfysktbFEImNII,20260
229
235
  workbench/utils/redis_cache.py,sha256=39LFSWmOlNNcah02D3sBnmibc-DPeKC3SNq71K4HaB4,12893
230
236
  workbench/utils/repl_utils.py,sha256=rWOMv2HiEIp8ZL6Ps6DlwiJlGr-pOhv9OZQhm3aR-1A,4668
231
237
  workbench/utils/s3_utils.py,sha256=Xme_o_cftC_jWnw6R9YKS6-6C11zaCBAoQDlY3dZb5o,7337
@@ -240,7 +246,7 @@ workbench/utils/workbench_cache.py,sha256=IQchxB81iR4eVggHBxUJdXxUCRkqWz1jKe5gxN
240
246
  workbench/utils/workbench_event_bridge.py,sha256=z1GmXOB-Qs7VOgC6Hjnp2DI9nSEWepaSXejACxTIR7o,4150
241
247
  workbench/utils/workbench_logging.py,sha256=WCuMWhQwibrvcGAyj96h2wowh6dH7zNlDJ7sWUzdCeI,10263
242
248
  workbench/utils/workbench_sqs.py,sha256=RwM80z7YWwdtMaCKh7KWF8v38f7eBRU7kyC7ZhTRuI0,2072
243
- workbench/utils/xgboost_model_utils.py,sha256=IF2d4dwwGMnRhvXheq82PZgAWRviac0DdnHDzTwa9_I,24955
249
+ workbench/utils/xgboost_model_utils.py,sha256=Zs3nTqZRDm2rbziuFVg5XzYyjf6TwBUltqmb0PmP4H8,25046
244
250
  workbench/utils/chem_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
245
251
  workbench/utils/chem_utils/fingerprints.py,sha256=Qvs8jaUwguWUq3Q3j695MY0t0Wk3BvroW-oWBwalMUo,5255
246
252
  workbench/utils/chem_utils/misc.py,sha256=Nevf8_opu-uIPrv_1_0ubuFVVo2_fGUkMoLAHB3XAeo,7372
@@ -256,7 +262,7 @@ workbench/web_interface/components/component_interface.py,sha256=QCPWqiZLkVsAEzQ
256
262
  workbench/web_interface/components/correlation_matrix.py,sha256=Lv4vRta5-TdxBsu0G8Ea7hyyR3XyPes-k5AfL6qZWEc,6376
257
263
  workbench/web_interface/components/data_details_markdown.py,sha256=axDs6eXniglBmvFwIKjpJ5oyT-3D4FO9IcfA_cl-EJ8,9706
258
264
  workbench/web_interface/components/endpoint_metric_plots.py,sha256=H0cXuj9UQrrh_2JvRHtq7O8pMXFXKs7o9XpzySENylw,3441
259
- workbench/web_interface/components/model_plot.py,sha256=Rojx_ZED4P9gvgeEsUm6xnwMNPoeOyn0evw45BWTITc,2536
265
+ workbench/web_interface/components/model_plot.py,sha256=9KSILXvq1L_DUZszj5ozWwi43jEtJlpWdqSs3mXBPeQ,2774
260
266
  workbench/web_interface/components/plugin_interface.py,sha256=jGRq4igUTVXUT4sDqqsKKI2yjilV0ORNBQq6CjEWE84,9563
261
267
  workbench/web_interface/components/plugin_unit_test.py,sha256=Lx3HhIMHzrwDUYs2bADSFYzQq3sFHS9RyA415hyUOdc,7747
262
268
  workbench/web_interface/components/regression_plot.py,sha256=k18Bd0fcH7ig6kL5GqC_dINci3_YLle_fSEM32zXtzY,3342
@@ -285,9 +291,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
285
291
  workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
286
292
  workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
287
293
  workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
288
- workbench-0.8.198.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
289
- workbench-0.8.198.dist-info/METADATA,sha256=jzKIsckClbcN7Xcf0CUk3EWYXqR_hsVE7r4lPGaCj3c,10495
290
- workbench-0.8.198.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
291
- workbench-0.8.198.dist-info/entry_points.txt,sha256=o7ohD4D2oygnHp7i9-C0LfcHDuPW5Tv0JXGAg97DpGk,413
292
- workbench-0.8.198.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
293
- workbench-0.8.198.dist-info/RECORD,,
294
+ workbench-0.8.203.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
295
+ workbench-0.8.203.dist-info/METADATA,sha256=qC58O-dE5_EMFpEJWDa9fyPSNwRt-n6K7krsrsJP13I,10500
296
+ workbench-0.8.203.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
297
+ workbench-0.8.203.dist-info/entry_points.txt,sha256=j02NCuno2Y_BuE4jEvw-IL73WZ9lkTpLwom29uKcLCw,458
298
+ workbench-0.8.203.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
299
+ workbench-0.8.203.dist-info/RECORD,,
@@ -1,7 +1,8 @@
1
1
  [console_scripts]
2
2
  cloud_watch = workbench.scripts.monitor_cloud_watch:main
3
+ endpoint_test = workbench.scripts.endpoint_test:main
3
4
  glue_launcher = workbench.scripts.glue_launcher:main
4
- lambda_launcher = workbench.scripts.lambda_launcher:main
5
+ lambda_test = workbench.scripts.lambda_test:main
5
6
  ml_pipeline_batch = workbench.scripts.ml_pipeline_batch:main
6
7
  ml_pipeline_sqs = workbench.scripts.ml_pipeline_sqs:main
7
8
  workbench = workbench.repl.workbench_shell:launch_shell
@@ -1,404 +0,0 @@
1
- """AWSDFStore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy"""
2
-
3
- from datetime import datetime
4
- from typing import Union
5
- import logging
6
- import awswrangler as wr
7
- import pandas as pd
8
- import re
9
- from urllib.parse import urlparse
10
-
11
- # Workbench Imports
12
- from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
13
- from workbench.utils.config_manager import ConfigManager
14
- from workbench.utils.aws_utils import not_found_returns_none
15
-
16
-
17
- class AWSDFStore:
18
- """AWSDFStore: Fast/efficient storage of DataFrames using AWS S3/Parquet/Snappy
19
-
20
- Common Usage:
21
- ```python
22
- df_store = AWSDFStore()
23
-
24
- # List Data
25
- df_store.list()
26
-
27
- # Add DataFrame
28
- df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
29
- df_store.upsert("/test/my_data", df)
30
-
31
- # Retrieve DataFrame
32
- df = df_store.get("/test/my_data")
33
- print(df)
34
-
35
- # Delete Data
36
- df_store.delete("/test/my_data")
37
- ```
38
- """
39
-
40
- def __init__(self, path_prefix: Union[str, None] = None):
41
- """AWSDFStore Init Method
42
-
43
- Args:
44
- path_prefix (Union[str, None], optional): Path prefix for storage locations (Defaults to None)
45
- """
46
- self.log = logging.getLogger("workbench")
47
- self._base_prefix = "df_store/"
48
- self.path_prefix = self._base_prefix + path_prefix if path_prefix else self._base_prefix
49
- self.path_prefix = re.sub(r"/+", "/", self.path_prefix) # Collapse slashes
50
-
51
- # Get the Workbench Bucket
52
- config = ConfigManager()
53
- self.workbench_bucket = config.get_config("WORKBENCH_BUCKET")
54
-
55
- # Get the S3 Client
56
- self.boto3_session = AWSAccountClamp().boto3_session
57
- self.s3_client = self.boto3_session.client("s3")
58
-
59
- def list(self, include_cache: bool = False) -> list:
60
- """List all objects in the data_store prefix
61
-
62
- Args:
63
- include_cache (bool, optional): Include cache objects in the list (Defaults to False)
64
-
65
- Returns:
66
- list: A list of all the objects in the data_store prefix.
67
- """
68
- df = self.summary(include_cache=include_cache)
69
- return df["location"].tolist()
70
-
71
- def last_modified(self, location: str) -> Union[datetime, None]:
72
- """Return the last modified date of a graph.
73
-
74
- Args:
75
- location (str): Logical location of the graph.
76
-
77
- Returns:
78
- Union[datetime, None]: Last modified datetime or None if not found.
79
- """
80
- s3_uri = self._generate_s3_uri(location)
81
- bucket, key = self._parse_s3_uri(s3_uri)
82
-
83
- try:
84
- response = self.s3_client.head_object(Bucket=bucket, Key=key)
85
- return response["LastModified"]
86
- except self.s3_client.exceptions.ClientError:
87
- return None
88
-
89
- def summary(self, include_cache: bool = False) -> pd.DataFrame:
90
- """Return a nicely formatted summary of object locations, sizes (in MB), and modified dates.
91
-
92
- Args:
93
- include_cache (bool, optional): Include cache objects in the summary (Defaults to False)
94
- """
95
- df = self.details(include_cache=include_cache)
96
-
97
- # Create a formatted DataFrame
98
- formatted_df = pd.DataFrame(
99
- {
100
- "location": df["location"],
101
- "size (MB)": (df["size"] / (1024 * 1024)).round(2), # Convert size to MB
102
- "modified": pd.to_datetime(df["modified"]).dt.strftime("%Y-%m-%d %H:%M:%S"), # Format date
103
- }
104
- )
105
- return formatted_df
106
-
107
- def details(self, include_cache: bool = False) -> pd.DataFrame:
108
- """Return detailed metadata for all objects, optionally excluding the specified prefix.
109
-
110
- Args:
111
- include_cache (bool, optional): Include cache objects in the details (Defaults to False)
112
- """
113
- try:
114
- response = self.s3_client.list_objects_v2(Bucket=self.workbench_bucket, Prefix=self.path_prefix)
115
- if "Contents" not in response:
116
- return pd.DataFrame(columns=["location", "s3_file", "size", "modified"])
117
-
118
- # Collect details for each object
119
- data = []
120
- for obj in response["Contents"]:
121
- full_key = obj["Key"]
122
-
123
- # Reverse logic: Strip the bucket/prefix in the front and .parquet in the end
124
- location = full_key.replace(f"{self.path_prefix}", "/").split(".parquet")[0]
125
- s3_file = f"s3://{self.workbench_bucket}/{full_key}"
126
- size = obj["Size"]
127
- modified = obj["LastModified"]
128
- data.append([location, s3_file, size, modified])
129
-
130
- # Create the DataFrame
131
- df = pd.DataFrame(data, columns=["location", "s3_file", "size", "modified"])
132
-
133
- # Apply the exclude_prefix filter if set
134
- cache_prefix = "/workbench/dataframe_cache/"
135
- if not include_cache:
136
- df = df[~df["location"].str.startswith(cache_prefix)]
137
-
138
- return df
139
-
140
- except Exception as e:
141
- self.log.error(f"Failed to get object details: {e}")
142
- return pd.DataFrame(columns=["location", "s3_file", "size", "created", "modified"])
143
-
144
- def check(self, location: str) -> bool:
145
- """Check if a DataFrame exists at the specified location
146
-
147
- Args:
148
- location (str): The location of the data to check.
149
-
150
- Returns:
151
- bool: True if the data exists, False otherwise.
152
- """
153
- # Generate the specific S3 prefix for the target location
154
- s3_prefix = f"{self.path_prefix}/{location}.parquet/"
155
- s3_prefix = re.sub(r"/+", "/", s3_prefix) # Collapse slashes
156
-
157
- # Use list_objects_v2 to check if any objects exist under this specific prefix
158
- response = self.s3_client.list_objects_v2(Bucket=self.workbench_bucket, Prefix=s3_prefix, MaxKeys=1)
159
- return "Contents" in response
160
-
161
- @not_found_returns_none
162
- def get(self, location: str) -> Union[pd.DataFrame, None]:
163
- """Retrieve a DataFrame from AWS S3.
164
-
165
- Args:
166
- location (str): The location of the data to retrieve.
167
-
168
- Returns:
169
- pd.DataFrame: The retrieved DataFrame or None if not found.
170
- """
171
- s3_uri = self._generate_s3_uri(location)
172
- return wr.s3.read_parquet(s3_uri)
173
-
174
- def upsert(self, location: str, data: Union[pd.DataFrame, pd.Series]):
175
- """Insert or update a DataFrame or Series in the AWS S3.
176
-
177
- Args:
178
- location (str): The location of the data.
179
- data (Union[pd.DataFrame, pd.Series]): The data to be stored.
180
- """
181
- # Check if the data is a Pandas Series, convert it to a DataFrame
182
- if isinstance(data, pd.Series):
183
- data = data.to_frame()
184
-
185
- # Ensure data is a DataFrame
186
- if not isinstance(data, pd.DataFrame):
187
- raise ValueError("Only Pandas DataFrame or Series objects are supported.")
188
-
189
- # Convert object columns to string type to avoid PyArrow type inference issues.
190
- data = self.type_convert_before_parquet(data)
191
-
192
- # Update/Insert the DataFrame to S3
193
- s3_uri = self._generate_s3_uri(location)
194
- try:
195
- wr.s3.to_parquet(df=data, path=s3_uri, dataset=True, mode="overwrite", index=True)
196
- self.log.info(f"Dataframe cached {s3_uri}...")
197
- except Exception as e:
198
- self.log.error(f"Failed to cache dataframe '{s3_uri}': {e}")
199
- raise
200
-
201
- @staticmethod
202
- def type_convert_before_parquet(df: pd.DataFrame) -> pd.DataFrame:
203
- # Convert object columns to string type to avoid PyArrow type inference issues.
204
- df = df.copy()
205
- object_cols = df.select_dtypes(include=["object"]).columns
206
- df[object_cols] = df[object_cols].astype("str")
207
- return df
208
-
209
- def delete(self, location: str):
210
- """Delete a DataFrame from the AWS S3.
211
-
212
- Args:
213
- location (str): The location of the data to delete.
214
- """
215
- s3_uri = self._generate_s3_uri(location)
216
-
217
- # Check if the folder (prefix) exists in S3
218
- if not wr.s3.list_objects(s3_uri):
219
- self.log.info(f"Data '{location}' does not exist in S3...")
220
- return
221
-
222
- # Delete the data from S3
223
- try:
224
- wr.s3.delete_objects(s3_uri)
225
- self.log.info(f"Data '{location}' deleted successfully from S3.")
226
- except Exception as e:
227
- self.log.error(f"Failed to delete data '{location}': {e}")
228
-
229
- def delete_recursive(self, location: str):
230
- """Recursively delete all data under the specified location in AWS S3.
231
-
232
- Args:
233
- location (str): The location prefix of the data to delete.
234
- """
235
- # Construct the full prefix for S3
236
- s3_prefix = re.sub(r"/+", "/", f"{self.path_prefix}/{location}") # Collapse slashes
237
- s3_prefix = s3_prefix.rstrip("/") + "/" # Ensure the prefix ends with a slash
238
-
239
- # List all objects under the given prefix
240
- try:
241
- response = self.s3_client.list_objects_v2(Bucket=self.workbench_bucket, Prefix=s3_prefix)
242
- if "Contents" not in response:
243
- self.log.info(f"No data found under '{s3_prefix}' to delete.")
244
- return
245
-
246
- # Gather all keys to delete
247
- keys = [{"Key": obj["Key"]} for obj in response["Contents"]]
248
- response = self.s3_client.delete_objects(Bucket=self.workbench_bucket, Delete={"Objects": keys})
249
- for response in response.get("Deleted", []):
250
- self.log.info(f"Deleted: {response['Key']}")
251
-
252
- except Exception as e:
253
- self.log.error(f"Failed to delete data recursively at '{location}': {e}")
254
-
255
- def list_subfiles(self, prefix: str) -> list:
256
- """Return a list of file locations with the given prefix.
257
-
258
- Args:
259
- prefix (str, optional): Only include files with the given prefix
260
-
261
- Returns:
262
- list: List of file locations (paths)
263
- """
264
- try:
265
- full_prefix = f"{self.path_prefix}{prefix.lstrip('/')}"
266
- response = self.s3_client.list_objects_v2(Bucket=self.workbench_bucket, Prefix=full_prefix)
267
- if "Contents" not in response:
268
- return []
269
-
270
- locations = []
271
- for obj in response["Contents"]:
272
- full_key = obj["Key"]
273
- location = full_key.replace(f"{self.path_prefix}", "/").split(".parquet")[0]
274
- locations.append(location)
275
- return locations
276
-
277
- except Exception as e:
278
- self.log.error(f"Failed to list subfiles: {e}")
279
- return []
280
-
281
- def _generate_s3_uri(self, location: str) -> str:
282
- """Generate the S3 URI for the given location."""
283
- s3_path = f"{self.workbench_bucket}/{self.path_prefix}/{location}.parquet"
284
- return f"s3://{re.sub(r'/+', '/', s3_path)}"
285
-
286
- def _parse_s3_uri(self, s3_uri: str) -> tuple:
287
- """Parse an S3 URI into bucket and key."""
288
- parsed = urlparse(s3_uri)
289
- if parsed.scheme != "s3":
290
- raise ValueError(f"Invalid S3 URI: {s3_uri}")
291
- return parsed.netloc, parsed.path.lstrip("/")
292
-
293
- def __repr__(self):
294
- """Return a string representation of the AWSDFStore object."""
295
- # Use the summary() method and format it to align columns for printing
296
- summary_df = self.summary()
297
-
298
- # Sanity check: If there are no objects, return a message
299
- if summary_df.empty:
300
- return "AWSDFStore: No data objects found in the store."
301
-
302
- # Dynamically compute the max length of the 'location' column and add 5 spaces for padding
303
- max_location_len = summary_df["location"].str.len().max() + 2
304
- summary_df["location"] = summary_df["location"].str.ljust(max_location_len)
305
-
306
- # Format the size column to include (MB) and ensure 3 spaces between size and date
307
- summary_df["size (MB)"] = summary_df["size (MB)"].apply(lambda x: f"{x:.2f} MB")
308
-
309
- # Enclose the modified date in parentheses and ensure 3 spaces between size and date
310
- summary_df["modified"] = summary_df["modified"].apply(lambda x: f" ({x})")
311
-
312
- # Convert the DataFrame to a string, remove headers, and return
313
- return summary_df.to_string(index=False, header=False)
314
-
315
-
316
- if __name__ == "__main__":
317
- """Exercise the AWSDFStore Class"""
318
- import time
319
-
320
- # Create a AWSDFStore manager
321
- df_store = AWSDFStore()
322
-
323
- # Details of the Dataframe Store
324
- print("Detailed Data...")
325
- print(df_store.details())
326
-
327
- # List all objects
328
- print("List Data...")
329
- print(df_store.list())
330
-
331
- # Add a new DataFrame
332
- my_df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
333
- df_store.upsert("/testing/test_data", my_df)
334
-
335
- # Check the last modified date
336
- print("Last Modified Date:")
337
- print(df_store.last_modified("/testing/test_data"))
338
-
339
- # Get the DataFrame
340
- print(f"Getting data 'test_data':\n{df_store.get('/testing/test_data')}")
341
-
342
- # Now let's test adding a Series
343
- series = pd.Series([1, 2, 3, 4], name="Series")
344
- df_store.upsert("/testing/test_series", series)
345
- print(f"Getting data 'test_series':\n{df_store.get('/testing/test_series')}")
346
-
347
- # Summary of the data
348
- print("Summary Data...")
349
- print(df_store.summary())
350
-
351
- # Repr of the AWSDFStore object
352
- print("AWSDFStore Object:")
353
- print(df_store)
354
-
355
- # Check if the data exists
356
- print("Check if data exists...")
357
- print(df_store.check("/testing/test_data"))
358
- print(df_store.check("/testing/test_series"))
359
-
360
- # Time the check
361
- start_time = time.time()
362
- print(df_store.check("/testing/test_data"))
363
- print("--- Check %s seconds ---" % (time.time() - start_time))
364
-
365
- # Test list_subfiles
366
- print("List Subfiles:")
367
- print(df_store.list_subfiles("/testing"))
368
-
369
- # Now delete the test data
370
- df_store.delete("/testing/test_data")
371
- df_store.delete("/testing/test_series")
372
-
373
- # Check if the data exists
374
- print("Check if data exists...")
375
- print(df_store.check("/testing/test_data"))
376
- print(df_store.check("/testing/test_series"))
377
-
378
- # Add a bunch of dataframes and then test recursive delete
379
- for i in range(10):
380
- df_store.upsert(f"/testing/data_{i}", pd.DataFrame({"A": [1, 2], "B": [3, 4]}))
381
- print("Before Recursive Delete:")
382
- print(df_store.summary())
383
- df_store.delete_recursive("/testing")
384
- print("After Recursive Delete:")
385
- print(df_store.summary())
386
-
387
- # Get a non-existent DataFrame
388
- print("Getting non-existent data...")
389
- print(df_store.get("/testing/no_where"))
390
-
391
- # Test path_prefix
392
- df_store = AWSDFStore(path_prefix="/super/test")
393
- print(df_store.path_prefix)
394
- df_store.upsert("test_data", my_df)
395
- print(df_store.get("test_data"))
396
- print(df_store.summary())
397
- df_store.delete("test_data")
398
- print(df_store.summary())
399
-
400
- # Test columns with Spaces in them
401
- my_df = pd.DataFrame({"My A": [1, 2], "My B": [3, 4]})
402
- df_store.upsert("/testing/test_data", my_df)
403
- my_df = df_store.get("/testing/test_data")
404
- print(my_df)