workbench 0.8.174__py3-none-any.whl → 0.8.227__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

Files changed (145) hide show
  1. workbench/__init__.py +1 -0
  2. workbench/algorithms/dataframe/__init__.py +1 -2
  3. workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
  4. workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
  5. workbench/algorithms/dataframe/fingerprint_proximity.py +422 -86
  6. workbench/algorithms/dataframe/projection_2d.py +44 -21
  7. workbench/algorithms/dataframe/proximity.py +259 -305
  8. workbench/algorithms/graph/light/proximity_graph.py +12 -11
  9. workbench/algorithms/models/cleanlab_model.py +382 -0
  10. workbench/algorithms/models/noise_model.py +388 -0
  11. workbench/algorithms/sql/column_stats.py +0 -1
  12. workbench/algorithms/sql/correlations.py +0 -1
  13. workbench/algorithms/sql/descriptive_stats.py +0 -1
  14. workbench/algorithms/sql/outliers.py +3 -3
  15. workbench/api/__init__.py +5 -1
  16. workbench/api/df_store.py +17 -108
  17. workbench/api/endpoint.py +14 -12
  18. workbench/api/feature_set.py +117 -11
  19. workbench/api/meta.py +0 -1
  20. workbench/api/meta_model.py +289 -0
  21. workbench/api/model.py +52 -21
  22. workbench/api/parameter_store.py +3 -52
  23. workbench/cached/cached_meta.py +0 -1
  24. workbench/cached/cached_model.py +49 -11
  25. workbench/core/artifacts/__init__.py +11 -2
  26. workbench/core/artifacts/artifact.py +7 -7
  27. workbench/core/artifacts/data_capture_core.py +8 -1
  28. workbench/core/artifacts/df_store_core.py +114 -0
  29. workbench/core/artifacts/endpoint_core.py +323 -205
  30. workbench/core/artifacts/feature_set_core.py +249 -45
  31. workbench/core/artifacts/model_core.py +133 -101
  32. workbench/core/artifacts/parameter_store_core.py +98 -0
  33. workbench/core/cloud_platform/aws/aws_account_clamp.py +48 -2
  34. workbench/core/cloud_platform/cloud_meta.py +0 -1
  35. workbench/core/pipelines/pipeline_executor.py +1 -1
  36. workbench/core/transforms/features_to_model/features_to_model.py +60 -44
  37. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +43 -10
  38. workbench/core/transforms/pandas_transforms/pandas_to_features.py +38 -2
  39. workbench/core/views/training_view.py +113 -42
  40. workbench/core/views/view.py +53 -3
  41. workbench/core/views/view_utils.py +4 -4
  42. workbench/model_script_utils/model_script_utils.py +339 -0
  43. workbench/model_script_utils/pytorch_utils.py +405 -0
  44. workbench/model_script_utils/uq_harness.py +277 -0
  45. workbench/model_scripts/chemprop/chemprop.template +774 -0
  46. workbench/model_scripts/chemprop/generated_model_script.py +774 -0
  47. workbench/model_scripts/chemprop/model_script_utils.py +339 -0
  48. workbench/model_scripts/chemprop/requirements.txt +3 -0
  49. workbench/model_scripts/custom_models/chem_info/fingerprints.py +175 -0
  50. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +18 -7
  51. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +80 -58
  52. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +0 -1
  53. workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -2
  54. workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
  55. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +8 -10
  56. workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
  57. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
  58. workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
  59. workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
  60. workbench/model_scripts/custom_models/uq_models/ngboost.template +15 -16
  61. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
  62. workbench/model_scripts/meta_model/generated_model_script.py +209 -0
  63. workbench/model_scripts/meta_model/meta_model.template +209 -0
  64. workbench/model_scripts/pytorch_model/generated_model_script.py +443 -499
  65. workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
  66. workbench/model_scripts/pytorch_model/pytorch.template +440 -496
  67. workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
  68. workbench/model_scripts/pytorch_model/requirements.txt +1 -1
  69. workbench/model_scripts/pytorch_model/uq_harness.py +277 -0
  70. workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
  71. workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
  72. workbench/model_scripts/script_generation.py +15 -12
  73. workbench/model_scripts/uq_models/generated_model_script.py +248 -0
  74. workbench/model_scripts/xgb_model/generated_model_script.py +371 -403
  75. workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
  76. workbench/model_scripts/xgb_model/uq_harness.py +277 -0
  77. workbench/model_scripts/xgb_model/xgb_model.template +367 -399
  78. workbench/repl/workbench_shell.py +18 -14
  79. workbench/resources/open_source_api.key +1 -1
  80. workbench/scripts/endpoint_test.py +162 -0
  81. workbench/scripts/lambda_test.py +73 -0
  82. workbench/scripts/meta_model_sim.py +35 -0
  83. workbench/scripts/ml_pipeline_sqs.py +122 -6
  84. workbench/scripts/training_test.py +85 -0
  85. workbench/themes/dark/custom.css +59 -0
  86. workbench/themes/dark/plotly.json +5 -5
  87. workbench/themes/light/custom.css +153 -40
  88. workbench/themes/light/plotly.json +9 -9
  89. workbench/themes/midnight_blue/custom.css +59 -0
  90. workbench/utils/aws_utils.py +0 -1
  91. workbench/utils/chem_utils/fingerprints.py +87 -46
  92. workbench/utils/chem_utils/mol_descriptors.py +18 -7
  93. workbench/utils/chem_utils/mol_standardize.py +80 -58
  94. workbench/utils/chem_utils/projections.py +16 -6
  95. workbench/utils/chem_utils/vis.py +25 -27
  96. workbench/utils/chemprop_utils.py +141 -0
  97. workbench/utils/config_manager.py +2 -6
  98. workbench/utils/endpoint_utils.py +5 -7
  99. workbench/utils/license_manager.py +2 -6
  100. workbench/utils/markdown_utils.py +57 -0
  101. workbench/utils/meta_model_simulator.py +499 -0
  102. workbench/utils/metrics_utils.py +256 -0
  103. workbench/utils/model_utils.py +274 -87
  104. workbench/utils/pipeline_utils.py +0 -1
  105. workbench/utils/plot_utils.py +159 -34
  106. workbench/utils/pytorch_utils.py +87 -0
  107. workbench/utils/shap_utils.py +11 -57
  108. workbench/utils/theme_manager.py +95 -30
  109. workbench/utils/xgboost_local_crossfold.py +267 -0
  110. workbench/utils/xgboost_model_utils.py +127 -220
  111. workbench/web_interface/components/experiments/outlier_plot.py +0 -1
  112. workbench/web_interface/components/model_plot.py +16 -2
  113. workbench/web_interface/components/plugin_unit_test.py +5 -3
  114. workbench/web_interface/components/plugins/ag_table.py +2 -4
  115. workbench/web_interface/components/plugins/confusion_matrix.py +3 -6
  116. workbench/web_interface/components/plugins/model_details.py +48 -80
  117. workbench/web_interface/components/plugins/scatter_plot.py +192 -92
  118. workbench/web_interface/components/settings_menu.py +184 -0
  119. workbench/web_interface/page_views/main_page.py +0 -1
  120. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/METADATA +31 -17
  121. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/RECORD +125 -111
  122. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/entry_points.txt +4 -0
  123. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/licenses/LICENSE +1 -1
  124. workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
  125. workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
  126. workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
  127. workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
  128. workbench/model_scripts/custom_models/proximity/proximity.py +0 -384
  129. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
  130. workbench/model_scripts/custom_models/uq_models/mapie.template +0 -502
  131. workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -386
  132. workbench/model_scripts/custom_models/uq_models/proximity.py +0 -384
  133. workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
  134. workbench/model_scripts/quant_regression/quant_regression.template +0 -279
  135. workbench/model_scripts/quant_regression/requirements.txt +0 -1
  136. workbench/themes/quartz/base_css.url +0 -1
  137. workbench/themes/quartz/custom.css +0 -117
  138. workbench/themes/quartz/plotly.json +0 -642
  139. workbench/themes/quartz_dark/base_css.url +0 -1
  140. workbench/themes/quartz_dark/custom.css +0 -131
  141. workbench/themes/quartz_dark/plotly.json +0 -642
  142. workbench/utils/fast_inference.py +0 -167
  143. workbench/utils/resource_utils.py +0 -39
  144. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/WHEEL +0 -0
  145. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/top_level.txt +0 -0
@@ -483,11 +483,11 @@
483
483
  [1.0, "rgb(200, 100, 100)"]
484
484
  ],
485
485
  "sequential": [
486
- [0.0, "rgb(100, 100, 200)"],
487
- [0.4, "rgb(100, 200, 100)"],
488
- [0.65, "rgb(180, 180, 50)"],
489
- [0.85, "rgb(200, 100, 100)"],
490
- [1.0, "rgb(200, 100, 100)"]
486
+ [0.0, "rgba(80, 100, 255, 1.0)"],
487
+ [0.25, "rgba(70, 145, 220, 1.0)"],
488
+ [0.5, "rgba(70, 220, 100, 1.0)"],
489
+ [0.75, "rgba(255, 181, 80, 1.0)"],
490
+ [1.0, "rgba(232, 50, 131, 1.0)"]
491
491
  ],
492
492
  "sequentialminus": [
493
493
  [0.0, "rgb(255, 100, 100)"],
@@ -1,17 +1,66 @@
1
+ /* Light Theme Color Palette - Blue-Purple Blend */
2
+ :root {
3
+ /* Primary colors - blue to purple blend, low saturation */
4
+ --wb-lightest: rgb(200, 205, 215); /* Very light blue-lavender - backgrounds */
5
+ --wb-light: rgb(195, 205, 225); /* Light periwinkle - alternate rows */
6
+ --wb-medium: rgb(175, 185, 215); /* Medium blue-purple - headers */
7
+ --wb-accent: rgb(120, 130, 180); /* Blue-purple accent - borders */
8
+ --wb-dark: rgb(80, 90, 140); /* Dark blue-purple - strong accents */
9
+
10
+ /* Text colors */
11
+ --wb-text-primary: rgb(45, 50, 65); /* Primary text - dark blue-purple */
12
+ --wb-text-secondary: rgb(70, 75, 95); /* Secondary text */
13
+ --wb-text-muted: rgb(95, 100, 120); /* Muted text */
14
+
15
+ /* Semantic colors */
16
+ --wb-success: rgb(40, 110, 50);
17
+ --wb-info: rgb(60, 80, 160);
18
+ --wb-warning: rgb(170, 90, 10);
19
+ --wb-danger: rgb(180, 50, 80);
20
+ --wb-pink: rgb(200, 100, 200);
21
+
22
+ /* Shadows - purple-tinted */
23
+ --wb-shadow-light: rgba(80, 90, 140, 0.15);
24
+ --wb-shadow-medium: rgba(80, 90, 140, 0.25);
25
+
26
+ /* Dropdown/form colors */
27
+ --wb-dropdown-bg: rgb(185, 185, 195);
28
+ --wb-dropdown-focus: rgb(195, 195, 205);
29
+ --wb-dropdown-hover: rgb(195, 195, 205);
30
+
31
+ /* Container colors (with alpha) */
32
+ --wb-container-bg: rgba(175, 185, 215, 0.5);
33
+ --wb-container-bg-light: rgba(175, 185, 215, 0.25);
34
+ --wb-table-cell-bg: rgba(205, 215, 235, 0.5);
35
+
36
+ /* Body gradient colors */
37
+ --wb-body-gradient-start: rgb(180, 195, 215);
38
+ --wb-body-gradient-mid: rgb(170, 175, 205);
39
+ --wb-body-gradient-end: rgb(185, 175, 205);
40
+
41
+ /* Table header gradient colors */
42
+ --wb-header-red-start: rgb(200, 120, 120);
43
+ --wb-header-red-end: rgb(160, 90, 90);
44
+ --wb-header-yellow-start: rgb(180, 175, 100);
45
+ --wb-header-yellow-end: rgb(140, 135, 70);
46
+ --wb-header-green-start: rgb(110, 170, 110);
47
+ --wb-header-green-end: rgb(80, 130, 80);
48
+ --wb-header-purple-start: rgb(160, 120, 180);
49
+ --wb-header-purple-end: rgb(120, 85, 140);
50
+ }
51
+
1
52
  h1, h2, h3, h4 {
2
- color: rgb(80, 80, 80); /* We want the headers dark gray */
53
+ color: var(--wb-text-primary);
3
54
  }
4
55
 
5
56
  body {
6
- color: rgb(80, 80, 80); /* Dark gray text */
7
- /* background: linear-gradient(90deg, rgba(240,240,240,1) 0%, rgba(240,240,240,1) 50%, rgba(150,150,195,1) 100%);*/
8
- /* background: linear-gradient(90deg, rgba(200,200,220,1) 0%, rgba(150,150,195,1) 100%); */
9
- background: linear-gradient(90deg, rgba(200,200,220,1) 0%, rgba(170,170,210,1) 100%);
57
+ color: var(--wb-text-primary);
58
+ background: linear-gradient(135deg, var(--wb-body-gradient-start) 0%, var(--wb-body-gradient-mid) 50%, var(--wb-body-gradient-end) 100%);
10
59
  }
11
60
 
12
61
  /* Custom CSS to style bold text */
13
62
  b, strong {
14
- color: rgb(40, 40, 40);
63
+ color: var(--wb-text-primary);
15
64
  }
16
65
 
17
66
  /* Reduce spacing around lists */
@@ -27,22 +76,28 @@ ul, ol {
27
76
 
28
77
  /* Adjust header background color */
29
78
  .ag-header {
30
- --ag-header-background-color: rgba(150, 150, 195);
79
+ --ag-header-background-color: var(--wb-medium);
31
80
  }
32
81
 
33
- /* Adjust cell background */
82
+ /* Adjust cell background and text color */
34
83
  .ag-cell {
35
- background-color: rgb(240, 240, 240);
84
+ background-color: var(--wb-lightest);
85
+ color: var(--wb-text-primary);
36
86
  }
37
87
 
38
88
  /* Alternate row colors */
39
89
  .ag-row:nth-child(even) .ag-cell {
40
- background-color: rgb(230, 230, 230);
90
+ background-color: var(--wb-light);
91
+ }
92
+
93
+ /* AgGrid header text color */
94
+ .ag-header-cell-text {
95
+ color: var(--wb-text-primary);
41
96
  }
42
97
 
43
98
  /* Selection color for the entire row */
44
99
  .ag-row.ag-row-selected .ag-cell {
45
- background-color: rgba(170, 170, 205, 1.0);
100
+ background-color: var(--wb-medium);
46
101
  }
47
102
 
48
103
  /* There's a one pixel border around the grid that we want to remove */
@@ -53,76 +108,109 @@ ul, ol {
53
108
 
54
109
  /* Box shadow and rounded corners for all AgGrid themes */
55
110
  [class*="ag-theme-"] {
56
- box-shadow: 2px 2px 6px 5px rgba(0, 0, 0, 0.2); /* Drop shadow */
57
- border-radius: 12px; /* Rounded corners */
58
- border: 0.5px solid rgba(0, 0, 205, 0.5);
111
+ box-shadow: 2px 2px 6px 5px var(--wb-shadow-light);
112
+ border-radius: 12px;
113
+ border: 0.5px solid var(--wb-accent);
59
114
  margin: 0;
60
115
  padding: 0;
61
116
  }
62
117
 
63
118
  /* Apply styling to Workbench containers */
64
119
  .workbench-container {
65
- background-color: rgba(240, 240, 240, 0.5);
66
- box-shadow: 2px 2px 6px 5px rgba(0, 0, 0, 0.2);
67
- border-radius: 12px; /* Rounded corners */
120
+ background-color: var(--wb-container-bg);
121
+ box-shadow: 2px 2px 6px 5px var(--wb-shadow-light);
122
+ border-radius: 12px;
68
123
  }
69
124
 
70
125
  /* Apply styling to Workbench Offsets */
71
126
  .workbench-offset {
72
- background-color: rgba(240, 240, 240, 0.25);
73
- box-shadow: 1px 1px 3px 2px rgba(0, 0, 0, 0.2);
74
- border-radius: 12px; /* Rounded corners */
127
+ background-color: var(--wb-container-bg-light);
128
+ box-shadow: 1px 1px 3px 2px var(--wb-shadow-light);
129
+ border-radius: 12px;
75
130
  }
76
131
 
77
132
  /* Apply styling to Workbench Highlight */
78
133
  .workbench-highlight {
79
- background-color: rgba(240, 240, 240, 0.5);
80
- box-shadow: 1px 1px 3px 2px rgba(0, 0, 0, 0.2);
81
- border-radius: 12px; /* Rounded corners */
134
+ background-color: var(--wb-container-bg);
135
+ box-shadow: 1px 1px 3px 2px var(--wb-shadow-light);
136
+ border-radius: 12px;
82
137
  }
83
138
 
84
139
  /* Apply styling to custom tooltips */
85
140
  .custom-tooltip {
86
- background-color: rgba(240, 240, 240, 1.0);
87
- box-shadow: 2px 2px 6px 5px rgba(0, 0, 0, 0.25);
88
- border-radius: 25px; /* Rounded corners */
89
- overflow: hidden; /* Ensure contents fit inside the rounded corners */
90
- border: 2px solid rgba(128, 128, 128, 1); /* 1-pixel grey */
141
+ background-color: var(--wb-lightest);
142
+ box-shadow: 2px 2px 6px 5px var(--wb-shadow-medium);
143
+ border-radius: 25px;
144
+ overflow: hidden;
145
+ border: 2px solid var(--wb-accent);
91
146
  }
92
147
 
93
148
  /* Some of the HTML/Markdown will use color hints like 'green-text' or 'blue-text' */
94
149
  .green-text {
95
- color: rgb(40, 100, 40);
150
+ color: var(--wb-success);
96
151
  }
97
152
 
98
153
  .blue-text {
99
- color: rgb(70, 70, 160);
154
+ color: var(--wb-info);
100
155
  }
101
156
 
102
157
  .pink-text {
103
- color: rgb(200, 100, 200);
158
+ color: var(--wb-pink);
104
159
  }
105
160
 
106
161
  .red-text {
107
- color: rgb(200, 60, 100);
162
+ color: var(--wb-danger);
108
163
  }
109
164
 
110
165
  .orange-text {
111
- color: rgb(160, 80, 0);
166
+ color: var(--wb-warning);
112
167
  }
113
168
 
114
169
  .alert {
115
- color: rgb(200, 60, 100);
170
+ color: var(--wb-danger);
116
171
  }
117
172
 
118
173
  .warning {
119
- color: rgb(160, 80, 0);
174
+ color: var(--wb-warning);
120
175
  }
121
176
 
122
177
  .good {
123
- color: rgb(40, 100, 40);
178
+ color: var(--wb-success);
179
+ }
180
+
181
+ /* Dropdown styling (dcc.Dropdown) - override Bootstrap's variables */
182
+ .dash-dropdown {
183
+ --bs-body-bg: var(--wb-dropdown-bg);
184
+ --bs-body-color: var(--wb-text-primary);
185
+ --bs-border-color: var(--wb-accent);
186
+ }
187
+
188
+ /* Bootstrap form controls (dbc components) */
189
+ .form-select, .form-control {
190
+ background-color: var(--wb-dropdown-bg) !important;
191
+ border: 1px solid var(--wb-accent) !important;
192
+ color: var(--wb-text-primary) !important;
124
193
  }
125
194
 
195
+ .form-select:focus, .form-control:focus {
196
+ background-color: var(--wb-dropdown-focus) !important;
197
+ border-color: var(--wb-dark) !important;
198
+ box-shadow: 0 0 0 0.2rem var(--wb-shadow-light) !important;
199
+ }
200
+
201
+ .dropdown-menu {
202
+ background-color: var(--wb-dropdown-bg) !important;
203
+ border: 1px solid var(--wb-accent) !important;
204
+ }
205
+
206
+ .dropdown-item {
207
+ color: var(--wb-text-primary) !important;
208
+ }
209
+
210
+ .dropdown-item:hover, .dropdown-item:focus {
211
+ background-color: var(--wb-dropdown-hover) !important;
212
+ color: var(--wb-text-primary) !important;
213
+ }
126
214
 
127
215
  /* Table styling */
128
216
  table {
@@ -131,15 +219,40 @@ table {
131
219
 
132
220
  th {
133
221
  padding: 10px;
134
- border: 1px solid rgb(120, 120, 120);
135
- background-color: rgba(150, 150, 195);
222
+ border: 1px solid var(--wb-accent);
223
+ background-color: var(--wb-medium);
136
224
  font-weight: bold;
137
225
  text-align: center !important;
138
226
  }
139
227
 
140
228
  td {
141
229
  padding: 5px;
142
- border: 0.5px solid rgb(120, 120, 120);
143
- background-color: rgba(240, 240, 240, 0.5);
230
+ border: 0.5px solid var(--wb-accent);
231
+ background-color: var(--wb-table-cell-bg);
144
232
  text-align: center !important;
145
233
  }
234
+
235
+ /* AG Grid table header colors - gradient theme */
236
+ /* Data Sources tables - red gradient */
237
+ #main_data_sources .ag-header,
238
+ #data_sources_table .ag-header {
239
+ background: linear-gradient(180deg, var(--wb-header-red-start) 0%, var(--wb-header-red-end) 100%) !important;
240
+ }
241
+
242
+ /* Feature Sets tables - yellow/olive gradient */
243
+ #main_feature_sets .ag-header,
244
+ #feature_sets_table .ag-header {
245
+ background: linear-gradient(180deg, var(--wb-header-yellow-start) 0%, var(--wb-header-yellow-end) 100%) !important;
246
+ }
247
+
248
+ /* Models tables - green gradient */
249
+ #main_models .ag-header,
250
+ #models_table .ag-header {
251
+ background: linear-gradient(180deg, var(--wb-header-green-start) 0%, var(--wb-header-green-end) 100%) !important;
252
+ }
253
+
254
+ /* Endpoints tables - purple gradient */
255
+ #main_endpoints .ag-header,
256
+ #endpoints_table .ag-header {
257
+ background: linear-gradient(180deg, var(--wb-header-purple-start) 0%, var(--wb-header-purple-end) 100%) !important;
258
+ }
@@ -133,7 +133,7 @@
133
133
  "ticks": ""
134
134
  },
135
135
  "colorscale": [
136
- [0.0, "rgb(100, 100, 200)"],
136
+ [0.0, "rgba(100, 100, 200, 0.8)"],
137
137
  [0.4, "rgb(100, 200, 100)"],
138
138
  [0.65, "rgb(170, 170, 50)"],
139
139
  [0.85, "rgb(200, 100, 100)"],
@@ -476,14 +476,14 @@
476
476
  },
477
477
  "colorscale": {
478
478
  "diverging": [
479
- [0.0, "rgb(100, 100, 255)"],
479
+ [0.0, "rgba(100, 100, 255, 0.8)"],
480
480
  [0.4, "rgb(100, 240, 100)"],
481
481
  [0.65, "rgb(200, 200, 50)"],
482
482
  [0.85, "rgb(255, 100, 100)"],
483
483
  [1.0, "rgb(255, 100, 100)"]
484
484
  ],
485
485
  "sequential": [
486
- [0.0, "rgb(100, 100, 200)"],
486
+ [0.0, "rgba(100, 100, 200, 0.8)"],
487
487
  [0.4, "rgb(100, 200, 100)"],
488
488
  [0.65, "rgb(170, 170, 50)"],
489
489
  [0.85, "rgb(200, 100, 100)"],
@@ -561,8 +561,8 @@
561
561
  "mapbox": {
562
562
  "style": "light"
563
563
  },
564
- "paper_bgcolor": "rgba(0, 0, 0, 0.0)",
565
- "plot_bgcolor": "rgba(0, 0, 0, 0.0)",
564
+ "paper_bgcolor": "rgba(255, 255, 255, 0.0)",
565
+ "plot_bgcolor": "rgba(255, 255, 255, 0.0)",
566
566
  "polar": {
567
567
  "angularaxis": {
568
568
  "gridcolor": "#EBF0F8",
@@ -633,25 +633,25 @@
633
633
  },
634
634
  "xaxis": {
635
635
  "automargin": true,
636
- "gridcolor": "#ffffff",
636
+ "gridcolor": "rgba(120, 130, 160, 0.3)",
637
637
  "linecolor": "#EBF0F8",
638
638
  "ticks": "",
639
639
  "title": {
640
640
  "standoff": 15
641
641
  },
642
- "zerolinecolor": "#edeeee",
642
+ "zerolinecolor": "rgba(120, 130, 160, 0.4)",
643
643
  "zerolinewidth": 2,
644
644
  "gridwidth": 0.5
645
645
  },
646
646
  "yaxis": {
647
647
  "automargin": true,
648
- "gridcolor": "#ffffff",
648
+ "gridcolor": "rgba(120, 130, 160, 0.3)",
649
649
  "linecolor": "#EBF0F8",
650
650
  "ticks": "",
651
651
  "title": {
652
652
  "standoff": 15
653
653
  },
654
- "zerolinecolor": "#edeeee",
654
+ "zerolinecolor": "rgba(120, 130, 160, 0.4)",
655
655
  "zerolinewidth": 2,
656
656
  "gridwidth": 0.5
657
657
  },
@@ -133,6 +133,40 @@ a:hover {
133
133
  color: rgb(100, 255, 100);
134
134
  }
135
135
 
136
+ /* Dropdown styling (dcc.Dropdown) - override Bootstrap's --bs-body-bg variable */
137
+ .dash-dropdown {
138
+ --bs-body-bg: rgb(55, 60, 90);
139
+ --bs-border-color: rgb(80, 85, 115);
140
+ }
141
+
142
+
143
+ /* Bootstrap form controls (dbc components) */
144
+ .form-select, .form-control {
145
+ background-color: rgb(55, 60, 90) !important;
146
+ border: 1px solid rgb(80, 85, 115) !important;
147
+ color: rgb(210, 210, 210) !important;
148
+ }
149
+
150
+ .form-select:focus, .form-control:focus {
151
+ background-color: rgb(60, 65, 95) !important;
152
+ border-color: rgb(100, 105, 140) !important;
153
+ box-shadow: 0 0 0 0.2rem rgba(100, 105, 140, 0.25) !important;
154
+ }
155
+
156
+ .dropdown-menu {
157
+ background-color: rgb(55, 60, 90) !important;
158
+ border: 1px solid rgb(80, 85, 115) !important;
159
+ }
160
+
161
+ .dropdown-item {
162
+ color: rgb(210, 210, 210) !important;
163
+ }
164
+
165
+ .dropdown-item:hover, .dropdown-item:focus {
166
+ background-color: rgb(70, 75, 110) !important;
167
+ color: rgb(230, 230, 230) !important;
168
+ }
169
+
136
170
  /* Table styling */
137
171
  table {
138
172
  width: 100%;
@@ -151,4 +185,29 @@ td {
151
185
  padding: 5px;
152
186
  border: 0.5px solid #444;
153
187
  text-align: center !important;
188
+ }
189
+
190
+ /* AG Grid table header colors - gradient theme */
191
+ /* Data Sources tables - red gradient */
192
+ #main_data_sources .ag-header,
193
+ #data_sources_table .ag-header {
194
+ background: linear-gradient(180deg, rgb(130, 55, 55) 0%, rgb(80, 35, 35) 100%) !important;
195
+ }
196
+
197
+ /* Feature Sets tables - yellow/olive gradient */
198
+ #main_feature_sets .ag-header,
199
+ #feature_sets_table .ag-header {
200
+ background: linear-gradient(180deg, rgb(110, 105, 50) 0%, rgb(70, 65, 30) 100%) !important;
201
+ }
202
+
203
+ /* Models tables - green gradient */
204
+ #main_models .ag-header,
205
+ #models_table .ag-header {
206
+ background: linear-gradient(180deg, rgb(50, 100, 50) 0%, rgb(30, 60, 30) 100%) !important;
207
+ }
208
+
209
+ /* Endpoints tables - purple gradient */
210
+ #main_endpoints .ag-header,
211
+ #endpoints_table .ag-header {
212
+ background: linear-gradient(180deg, rgb(90, 55, 110) 0%, rgb(55, 30, 70) 100%) !important;
154
213
  }
@@ -17,7 +17,6 @@ from botocore.exceptions import ClientError
17
17
  from sagemaker.session import Session as SageSession
18
18
  from collections.abc import Mapping, Iterable
19
19
 
20
-
21
20
  # Workbench Imports
22
21
  from workbench.utils.config_manager import ConfigManager
23
22
  from workbench.utils.deprecated_utils import deprecated
@@ -1,31 +1,48 @@
1
- """Molecular fingerprint computation utilities"""
1
+ """Molecular fingerprint computation utilities for ADMET modeling.
2
+
3
+ This module provides Morgan count fingerprints, the standard for ADMET prediction.
4
+ Count fingerprints outperform binary fingerprints for molecular property prediction.
5
+
6
+ References:
7
+ - Count vs Binary: https://pubs.acs.org/doi/10.1021/acs.est.3c02198
8
+ - ECFP/Morgan: https://pubs.acs.org/doi/10.1021/ci100050t
9
+ """
2
10
 
3
11
  import logging
4
- import pandas as pd
5
12
 
6
- # Molecular Descriptor Imports
7
- from rdkit import Chem
8
- from rdkit.Chem import rdFingerprintGenerator
13
+ import numpy as np
14
+ import pandas as pd
15
+ from rdkit import Chem, RDLogger
16
+ from rdkit.Chem import AllChem
9
17
  from rdkit.Chem.MolStandardize import rdMolStandardize
10
18
 
19
+ # Suppress RDKit warnings (e.g., "not removing hydrogen atom without neighbors")
20
+ # Keep errors enabled so we see actual problems
21
+ RDLogger.DisableLog("rdApp.warning")
22
+
11
23
  # Set up the logger
12
24
  log = logging.getLogger("workbench")
13
25
 
14
26
 
15
- def compute_morgan_fingerprints(df: pd.DataFrame, radius=2, n_bits=2048, counts=True) -> pd.DataFrame:
16
- """Compute and add Morgan fingerprints to the DataFrame.
27
+ def compute_morgan_fingerprints(df: pd.DataFrame, radius: int = 2, n_bits: int = 2048) -> pd.DataFrame:
28
+ """Compute Morgan count fingerprints for ADMET modeling.
29
+
30
+ Generates true count fingerprints where each bit position contains the
31
+ number of times that substructure appears in the molecule (clamped to 0-255).
32
+ This is the recommended approach for ADMET prediction per 2025 research.
17
33
 
18
34
  Args:
19
- df (pd.DataFrame): Input DataFrame containing SMILES strings.
20
- radius (int): Radius for the Morgan fingerprint.
21
- n_bits (int): Number of bits for the fingerprint.
22
- counts (bool): Count simulation for the fingerprint.
35
+ df: Input DataFrame containing SMILES strings.
36
+ radius: Radius for the Morgan fingerprint (default 2 = ECFP4 equivalent).
37
+ n_bits: Number of bits for the fingerprint (default 2048).
23
38
 
24
39
  Returns:
25
- pd.DataFrame: The input DataFrame with the Morgan fingerprints added as bit strings.
40
+ pd.DataFrame: Input DataFrame with 'fingerprint' column added.
41
+ Values are comma-separated uint8 counts.
26
42
 
27
43
  Note:
28
- See: https://greglandrum.github.io/rdkit-blog/posts/2021-07-06-simulating-counts.html
44
+ Count fingerprints outperform binary for ADMET prediction.
45
+ See: https://pubs.acs.org/doi/10.1021/acs.est.3c02198
29
46
  """
30
47
  delete_mol_column = False
31
48
 
@@ -39,7 +56,7 @@ def compute_morgan_fingerprints(df: pd.DataFrame, radius=2, n_bits=2048, counts=
39
56
  log.warning("Detected serialized molecules in 'molecule' column. Removing...")
40
57
  del df["molecule"]
41
58
 
42
- # Convert SMILES to RDKit molecule objects (vectorized)
59
+ # Convert SMILES to RDKit molecule objects
43
60
  if "molecule" not in df.columns:
44
61
  log.info("Converting SMILES to RDKit Molecules...")
45
62
  delete_mol_column = True
@@ -47,23 +64,32 @@ def compute_morgan_fingerprints(df: pd.DataFrame, radius=2, n_bits=2048, counts=
47
64
  # Make sure our molecules are not None
48
65
  failed_smiles = df[df["molecule"].isnull()][smiles_column].tolist()
49
66
  if failed_smiles:
50
- log.error(f"Failed to convert the following SMILES to molecules: {failed_smiles}")
51
- df = df.dropna(subset=["molecule"])
67
+ log.warning(f"Failed to convert {len(failed_smiles)} SMILES to molecules ({failed_smiles})")
68
+ df = df.dropna(subset=["molecule"]).copy()
52
69
 
53
70
  # If we have fragments in our compounds, get the largest fragment before computing fingerprints
54
71
  largest_frags = df["molecule"].apply(
55
72
  lambda mol: rdMolStandardize.LargestFragmentChooser().choose(mol) if mol else None
56
73
  )
57
74
 
58
- # Create a Morgan fingerprint generator
59
- if counts:
60
- n_bits *= 4 # Multiply by 4 to simulate counts
61
- morgan_generator = rdFingerprintGenerator.GetMorganGenerator(radius=radius, fpSize=n_bits, countSimulation=counts)
75
+ def mol_to_count_string(mol):
76
+ """Convert molecule to comma-separated count fingerprint string."""
77
+ if mol is None:
78
+ return pd.NA
62
79
 
63
- # Compute Morgan fingerprints (vectorized)
64
- fingerprints = largest_frags.apply(
65
- lambda mol: (morgan_generator.GetFingerprint(mol).ToBitString() if mol else pd.NA)
66
- )
80
+ # Get hashed Morgan fingerprint with counts
81
+ fp = AllChem.GetHashedMorganFingerprint(mol, radius, nBits=n_bits)
82
+
83
+ # Initialize array and populate with counts (clamped to uint8 range)
84
+ counts = np.zeros(n_bits, dtype=np.uint8)
85
+ for idx, count in fp.GetNonzeroElements().items():
86
+ counts[idx] = min(count, 255)
87
+
88
+ # Return as comma-separated string
89
+ return ",".join(map(str, counts))
90
+
91
+ # Compute Morgan count fingerprints
92
+ fingerprints = largest_frags.apply(mol_to_count_string)
67
93
 
68
94
  # Add the fingerprints to the DataFrame
69
95
  df["fingerprint"] = fingerprints
@@ -71,59 +97,62 @@ def compute_morgan_fingerprints(df: pd.DataFrame, radius=2, n_bits=2048, counts=
71
97
  # Drop the intermediate 'molecule' column if it was added
72
98
  if delete_mol_column:
73
99
  del df["molecule"]
100
+
74
101
  return df
75
102
 
76
103
 
77
104
  if __name__ == "__main__":
78
- print("Running molecular fingerprint tests...")
79
- print("Note: This requires molecular_screening module to be available")
105
+ print("Running Morgan count fingerprint tests...")
80
106
 
81
107
  # Test molecules
82
108
  test_molecules = {
83
109
  "aspirin": "CC(=O)OC1=CC=CC=C1C(=O)O",
84
110
  "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
85
111
  "glucose": "C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O", # With stereochemistry
86
- "sodium_acetate": "CC(=O)[O-].[Na+]", # Salt
112
+ "sodium_acetate": "CC(=O)[O-].[Na+]", # Salt (largest fragment used)
87
113
  "benzene": "c1ccccc1",
88
114
  "butene_e": "C/C=C/C", # E-butene
89
115
  "butene_z": "C/C=C\\C", # Z-butene
90
116
  }
91
117
 
92
- # Test 1: Morgan Fingerprints
93
- print("\n1. Testing Morgan fingerprint generation...")
118
+ # Test 1: Morgan Count Fingerprints (default parameters)
119
+ print("\n1. Testing Morgan fingerprint generation (radius=2, n_bits=2048)...")
94
120
 
95
121
  test_df = pd.DataFrame({"SMILES": list(test_molecules.values()), "name": list(test_molecules.keys())})
96
-
97
- fp_df = compute_morgan_fingerprints(test_df.copy(), radius=2, n_bits=512, counts=False)
122
+ fp_df = compute_morgan_fingerprints(test_df.copy())
98
123
 
99
124
  print(" Fingerprint generation results:")
100
125
  for _, row in fp_df.iterrows():
101
126
  fp = row.get("fingerprint", "N/A")
102
- fp_len = len(fp) if fp != "N/A" else 0
103
- print(f" {row['name']:15} {fp_len} bits")
127
+ if pd.notna(fp):
128
+ counts = [int(x) for x in fp.split(",")]
129
+ non_zero = sum(1 for c in counts if c > 0)
130
+ max_count = max(counts)
131
+ print(f" {row['name']:15} → {len(counts)} features, {non_zero} non-zero, max={max_count}")
132
+ else:
133
+ print(f" {row['name']:15} → N/A")
104
134
 
105
- # Test 2: Different fingerprint parameters
106
- print("\n2. Testing different fingerprint parameters...")
135
+ # Test 2: Different parameters
136
+ print("\n2. Testing with different parameters (radius=3, n_bits=1024)...")
107
137
 
108
- # Test with counts enabled
109
- fp_counts_df = compute_morgan_fingerprints(test_df.copy(), radius=3, n_bits=256, counts=True)
138
+ fp_df_custom = compute_morgan_fingerprints(test_df.copy(), radius=3, n_bits=1024)
110
139
 
111
- print(" With count simulation (256 bits * 4):")
112
- for _, row in fp_counts_df.iterrows():
140
+ for _, row in fp_df_custom.iterrows():
113
141
  fp = row.get("fingerprint", "N/A")
114
- fp_len = len(fp) if fp != "N/A" else 0
115
- print(f" {row['name']:15} {fp_len} bits")
142
+ if pd.notna(fp):
143
+ counts = [int(x) for x in fp.split(",")]
144
+ non_zero = sum(1 for c in counts if c > 0)
145
+ print(f" {row['name']:15} → {len(counts)} features, {non_zero} non-zero")
146
+ else:
147
+ print(f" {row['name']:15} → N/A")
116
148
 
117
149
  # Test 3: Edge cases
118
150
  print("\n3. Testing edge cases...")
119
151
 
120
152
  # Invalid SMILES
121
153
  invalid_df = pd.DataFrame({"SMILES": ["INVALID", ""]})
122
- try:
123
- fp_invalid = compute_morgan_fingerprints(invalid_df.copy())
124
- print(f" ✓ Invalid SMILES handled: {len(fp_invalid)} valid molecules")
125
- except Exception as e:
126
- print(f" ✓ Invalid SMILES properly raised error: {type(e).__name__}")
154
+ fp_invalid = compute_morgan_fingerprints(invalid_df.copy())
155
+ print(f" ✓ Invalid SMILES handled: {len(fp_invalid)} rows returned")
127
156
 
128
157
  # Test with pre-existing molecule column
129
158
  mol_df = test_df.copy()
@@ -131,4 +160,16 @@ if __name__ == "__main__":
131
160
  fp_with_mol = compute_morgan_fingerprints(mol_df)
132
161
  print(f" ✓ Pre-existing molecule column handled: {len(fp_with_mol)} fingerprints generated")
133
162
 
163
+ # Test 4: Verify count values are reasonable
164
+ print("\n4. Verifying count distribution...")
165
+ all_counts = []
166
+ for _, row in fp_df.iterrows():
167
+ fp = row.get("fingerprint", "N/A")
168
+ if pd.notna(fp):
169
+ counts = [int(x) for x in fp.split(",")]
170
+ all_counts.extend([c for c in counts if c > 0])
171
+
172
+ if all_counts:
173
+ print(f" Non-zero counts: min={min(all_counts)}, max={max(all_counts)}, mean={np.mean(all_counts):.2f}")
174
+
134
175
  print("\n✅ All fingerprint tests completed!")