workbench 0.8.174__py3-none-any.whl → 0.8.227__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

Files changed (145) hide show
  1. workbench/__init__.py +1 -0
  2. workbench/algorithms/dataframe/__init__.py +1 -2
  3. workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
  4. workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
  5. workbench/algorithms/dataframe/fingerprint_proximity.py +422 -86
  6. workbench/algorithms/dataframe/projection_2d.py +44 -21
  7. workbench/algorithms/dataframe/proximity.py +259 -305
  8. workbench/algorithms/graph/light/proximity_graph.py +12 -11
  9. workbench/algorithms/models/cleanlab_model.py +382 -0
  10. workbench/algorithms/models/noise_model.py +388 -0
  11. workbench/algorithms/sql/column_stats.py +0 -1
  12. workbench/algorithms/sql/correlations.py +0 -1
  13. workbench/algorithms/sql/descriptive_stats.py +0 -1
  14. workbench/algorithms/sql/outliers.py +3 -3
  15. workbench/api/__init__.py +5 -1
  16. workbench/api/df_store.py +17 -108
  17. workbench/api/endpoint.py +14 -12
  18. workbench/api/feature_set.py +117 -11
  19. workbench/api/meta.py +0 -1
  20. workbench/api/meta_model.py +289 -0
  21. workbench/api/model.py +52 -21
  22. workbench/api/parameter_store.py +3 -52
  23. workbench/cached/cached_meta.py +0 -1
  24. workbench/cached/cached_model.py +49 -11
  25. workbench/core/artifacts/__init__.py +11 -2
  26. workbench/core/artifacts/artifact.py +7 -7
  27. workbench/core/artifacts/data_capture_core.py +8 -1
  28. workbench/core/artifacts/df_store_core.py +114 -0
  29. workbench/core/artifacts/endpoint_core.py +323 -205
  30. workbench/core/artifacts/feature_set_core.py +249 -45
  31. workbench/core/artifacts/model_core.py +133 -101
  32. workbench/core/artifacts/parameter_store_core.py +98 -0
  33. workbench/core/cloud_platform/aws/aws_account_clamp.py +48 -2
  34. workbench/core/cloud_platform/cloud_meta.py +0 -1
  35. workbench/core/pipelines/pipeline_executor.py +1 -1
  36. workbench/core/transforms/features_to_model/features_to_model.py +60 -44
  37. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +43 -10
  38. workbench/core/transforms/pandas_transforms/pandas_to_features.py +38 -2
  39. workbench/core/views/training_view.py +113 -42
  40. workbench/core/views/view.py +53 -3
  41. workbench/core/views/view_utils.py +4 -4
  42. workbench/model_script_utils/model_script_utils.py +339 -0
  43. workbench/model_script_utils/pytorch_utils.py +405 -0
  44. workbench/model_script_utils/uq_harness.py +277 -0
  45. workbench/model_scripts/chemprop/chemprop.template +774 -0
  46. workbench/model_scripts/chemprop/generated_model_script.py +774 -0
  47. workbench/model_scripts/chemprop/model_script_utils.py +339 -0
  48. workbench/model_scripts/chemprop/requirements.txt +3 -0
  49. workbench/model_scripts/custom_models/chem_info/fingerprints.py +175 -0
  50. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +18 -7
  51. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +80 -58
  52. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +0 -1
  53. workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -2
  54. workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
  55. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +8 -10
  56. workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
  57. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
  58. workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
  59. workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
  60. workbench/model_scripts/custom_models/uq_models/ngboost.template +15 -16
  61. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
  62. workbench/model_scripts/meta_model/generated_model_script.py +209 -0
  63. workbench/model_scripts/meta_model/meta_model.template +209 -0
  64. workbench/model_scripts/pytorch_model/generated_model_script.py +443 -499
  65. workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
  66. workbench/model_scripts/pytorch_model/pytorch.template +440 -496
  67. workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
  68. workbench/model_scripts/pytorch_model/requirements.txt +1 -1
  69. workbench/model_scripts/pytorch_model/uq_harness.py +277 -0
  70. workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
  71. workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
  72. workbench/model_scripts/script_generation.py +15 -12
  73. workbench/model_scripts/uq_models/generated_model_script.py +248 -0
  74. workbench/model_scripts/xgb_model/generated_model_script.py +371 -403
  75. workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
  76. workbench/model_scripts/xgb_model/uq_harness.py +277 -0
  77. workbench/model_scripts/xgb_model/xgb_model.template +367 -399
  78. workbench/repl/workbench_shell.py +18 -14
  79. workbench/resources/open_source_api.key +1 -1
  80. workbench/scripts/endpoint_test.py +162 -0
  81. workbench/scripts/lambda_test.py +73 -0
  82. workbench/scripts/meta_model_sim.py +35 -0
  83. workbench/scripts/ml_pipeline_sqs.py +122 -6
  84. workbench/scripts/training_test.py +85 -0
  85. workbench/themes/dark/custom.css +59 -0
  86. workbench/themes/dark/plotly.json +5 -5
  87. workbench/themes/light/custom.css +153 -40
  88. workbench/themes/light/plotly.json +9 -9
  89. workbench/themes/midnight_blue/custom.css +59 -0
  90. workbench/utils/aws_utils.py +0 -1
  91. workbench/utils/chem_utils/fingerprints.py +87 -46
  92. workbench/utils/chem_utils/mol_descriptors.py +18 -7
  93. workbench/utils/chem_utils/mol_standardize.py +80 -58
  94. workbench/utils/chem_utils/projections.py +16 -6
  95. workbench/utils/chem_utils/vis.py +25 -27
  96. workbench/utils/chemprop_utils.py +141 -0
  97. workbench/utils/config_manager.py +2 -6
  98. workbench/utils/endpoint_utils.py +5 -7
  99. workbench/utils/license_manager.py +2 -6
  100. workbench/utils/markdown_utils.py +57 -0
  101. workbench/utils/meta_model_simulator.py +499 -0
  102. workbench/utils/metrics_utils.py +256 -0
  103. workbench/utils/model_utils.py +274 -87
  104. workbench/utils/pipeline_utils.py +0 -1
  105. workbench/utils/plot_utils.py +159 -34
  106. workbench/utils/pytorch_utils.py +87 -0
  107. workbench/utils/shap_utils.py +11 -57
  108. workbench/utils/theme_manager.py +95 -30
  109. workbench/utils/xgboost_local_crossfold.py +267 -0
  110. workbench/utils/xgboost_model_utils.py +127 -220
  111. workbench/web_interface/components/experiments/outlier_plot.py +0 -1
  112. workbench/web_interface/components/model_plot.py +16 -2
  113. workbench/web_interface/components/plugin_unit_test.py +5 -3
  114. workbench/web_interface/components/plugins/ag_table.py +2 -4
  115. workbench/web_interface/components/plugins/confusion_matrix.py +3 -6
  116. workbench/web_interface/components/plugins/model_details.py +48 -80
  117. workbench/web_interface/components/plugins/scatter_plot.py +192 -92
  118. workbench/web_interface/components/settings_menu.py +184 -0
  119. workbench/web_interface/page_views/main_page.py +0 -1
  120. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/METADATA +31 -17
  121. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/RECORD +125 -111
  122. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/entry_points.txt +4 -0
  123. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/licenses/LICENSE +1 -1
  124. workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
  125. workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
  126. workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
  127. workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
  128. workbench/model_scripts/custom_models/proximity/proximity.py +0 -384
  129. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
  130. workbench/model_scripts/custom_models/uq_models/mapie.template +0 -502
  131. workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -386
  132. workbench/model_scripts/custom_models/uq_models/proximity.py +0 -384
  133. workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
  134. workbench/model_scripts/quant_regression/quant_regression.template +0 -279
  135. workbench/model_scripts/quant_regression/requirements.txt +0 -1
  136. workbench/themes/quartz/base_css.url +0 -1
  137. workbench/themes/quartz/custom.css +0 -117
  138. workbench/themes/quartz/plotly.json +0 -642
  139. workbench/themes/quartz_dark/base_css.url +0 -1
  140. workbench/themes/quartz_dark/custom.css +0 -131
  141. workbench/themes/quartz_dark/plotly.json +0 -642
  142. workbench/utils/fast_inference.py +0 -167
  143. workbench/utils/resource_utils.py +0 -39
  144. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/WHEEL +0 -0
  145. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,14 @@ class Projection2D:
22
22
  self.log = logging.getLogger("workbench")
23
23
  self.projection_model = None
24
24
 
25
- def fit_transform(self, input_df: pd.DataFrame, features: list = None, projection: str = "UMAP") -> pd.DataFrame:
25
+ def fit_transform(
26
+ self,
27
+ input_df: pd.DataFrame,
28
+ features: list = None,
29
+ feature_matrix: np.ndarray = None,
30
+ metric: str = "euclidean",
31
+ projection: str = "UMAP",
32
+ ) -> pd.DataFrame:
26
33
  """Fit and transform a DataFrame using the selected dimensionality reduction method.
27
34
 
28
35
  This method creates a copy of the input DataFrame, processes the specified features
@@ -32,6 +39,9 @@ class Projection2D:
32
39
  Args:
33
40
  input_df (pd.DataFrame): The DataFrame containing features to project.
34
41
  features (list, optional): List of feature column names. If None, numeric columns are auto-selected.
42
+ feature_matrix (np.ndarray, optional): Pre-computed feature matrix. If provided, features is ignored
43
+ and no scaling is applied (caller is responsible for appropriate preprocessing).
44
+ metric (str, optional): Distance metric for UMAP (e.g., 'euclidean', 'jaccard'). Default 'euclidean'.
35
45
  projection (str, optional): The projection to use ('UMAP', 'TSNE', 'MDS' or 'PCA'). Default 'UMAP'.
36
46
 
37
47
  Returns:
@@ -40,36 +50,44 @@ class Projection2D:
40
50
  # Create a copy of the input DataFrame
41
51
  df = input_df.copy()
42
52
 
43
- # Auto-identify numeric features if none are provided
44
- if features is None:
45
- features = [col for col in df.select_dtypes(include="number").columns if not col.endswith("id")]
46
- self.log.info(f"Auto-identified numeric features: {features}")
47
-
48
- if len(features) < 2 or df.empty:
49
- self.log.critical("At least two numeric features are required, and DataFrame must not be empty.")
50
- return df
51
-
52
- # Process a copy of the feature data for projection
53
- X = df[features]
54
- X = X.apply(lambda col: col.fillna(col.mean()))
55
- X_scaled = StandardScaler().fit_transform(X)
53
+ # If a feature matrix is provided, use it directly (no scaling)
54
+ if feature_matrix is not None:
55
+ if len(feature_matrix) != len(df):
56
+ self.log.critical("feature_matrix length must match DataFrame length.")
57
+ return df
58
+ X_processed = feature_matrix
59
+ else:
60
+ # Auto-identify numeric features if none are provided
61
+ if features is None:
62
+ features = [col for col in df.select_dtypes(include="number").columns if not col.endswith("id")]
63
+ self.log.info(f"Auto-identified numeric features: {features}")
64
+
65
+ if len(features) < 2 or df.empty:
66
+ self.log.critical("At least two numeric features are required, and DataFrame must not be empty.")
67
+ return df
68
+
69
+ # Process a copy of the feature data for projection
70
+ X = df[features]
71
+ X = X.apply(lambda col: col.fillna(col.mean()))
72
+ X_processed = StandardScaler().fit_transform(X)
56
73
 
57
74
  # Select the projection method (using df for perplexity calculation)
58
- self.projection_model = self._get_projection_model(projection, df)
75
+ self.projection_model = self._get_projection_model(projection, df, metric=metric)
59
76
 
60
- # Apply the projection on the normalized data
61
- projection_result = self.projection_model.fit_transform(X_scaled)
77
+ # Apply the projection on the processed data
78
+ projection_result = self.projection_model.fit_transform(X_processed)
62
79
  df[["x", "y"]] = projection_result
63
80
 
64
81
  # Resolve coincident points and return the new DataFrame
65
82
  return self.resolve_coincident_points(df)
66
83
 
67
- def _get_projection_model(self, projection: str, df: pd.DataFrame):
84
+ def _get_projection_model(self, projection: str, df: pd.DataFrame, metric: str = "euclidean"):
68
85
  """Select and return the appropriate projection model.
69
86
 
70
87
  Args:
71
88
  projection (str): The projection method ('TSNE', 'MDS', 'PCA', or 'UMAP').
72
89
  df (pd.DataFrame): The DataFrame being transformed (used for computing perplexity).
90
+ metric (str): Distance metric for UMAP (default 'euclidean').
73
91
 
74
92
  Returns:
75
93
  A dimensionality reduction model instance.
@@ -88,8 +106,14 @@ class Projection2D:
88
106
  return PCA(n_components=2)
89
107
 
90
108
  if projection == "UMAP" and UMAP_AVAILABLE:
91
- self.log.info("Projection: UMAP")
92
- return umap.UMAP(n_components=2)
109
+ # UMAP default n_neighbors=15, adjust if dataset is smaller
110
+ n_neighbors = min(15, len(df) - 1)
111
+ if n_neighbors < 15:
112
+ self.log.warning(
113
+ f"Dataset size ({len(df)}) smaller than default n_neighbors, using n_neighbors={n_neighbors}"
114
+ )
115
+ self.log.info(f"Projection: UMAP with metric={metric}, n_neighbors={n_neighbors}")
116
+ return umap.UMAP(n_components=2, metric=metric, n_neighbors=n_neighbors)
93
117
 
94
118
  self.log.warning(
95
119
  f"Projection method '{projection}' not recognized or UMAP not available. Falling back to TSNE."
@@ -118,7 +142,6 @@ class Projection2D:
118
142
 
119
143
  # Find duplicates
120
144
  duplicated = rounded.duplicated(subset=["x_round", "y_round"], keep=False)
121
- print("Coincident Points found:", duplicated.sum())
122
145
  if not duplicated.any():
123
146
  return df
124
147