workbench 0.8.177__py3-none-any.whl → 0.8.227__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of workbench might be problematic. Click here for more details.
- workbench/__init__.py +1 -0
- workbench/algorithms/dataframe/__init__.py +1 -2
- workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
- workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
- workbench/algorithms/dataframe/fingerprint_proximity.py +422 -86
- workbench/algorithms/dataframe/projection_2d.py +44 -21
- workbench/algorithms/dataframe/proximity.py +259 -305
- workbench/algorithms/graph/light/proximity_graph.py +12 -11
- workbench/algorithms/models/cleanlab_model.py +382 -0
- workbench/algorithms/models/noise_model.py +388 -0
- workbench/algorithms/sql/column_stats.py +0 -1
- workbench/algorithms/sql/correlations.py +0 -1
- workbench/algorithms/sql/descriptive_stats.py +0 -1
- workbench/algorithms/sql/outliers.py +3 -3
- workbench/api/__init__.py +5 -1
- workbench/api/df_store.py +17 -108
- workbench/api/endpoint.py +14 -12
- workbench/api/feature_set.py +117 -11
- workbench/api/meta.py +0 -1
- workbench/api/meta_model.py +289 -0
- workbench/api/model.py +52 -21
- workbench/api/parameter_store.py +3 -52
- workbench/cached/cached_meta.py +0 -1
- workbench/cached/cached_model.py +49 -11
- workbench/core/artifacts/__init__.py +11 -2
- workbench/core/artifacts/artifact.py +5 -5
- workbench/core/artifacts/df_store_core.py +114 -0
- workbench/core/artifacts/endpoint_core.py +319 -204
- workbench/core/artifacts/feature_set_core.py +249 -45
- workbench/core/artifacts/model_core.py +135 -82
- workbench/core/artifacts/parameter_store_core.py +98 -0
- workbench/core/cloud_platform/cloud_meta.py +0 -1
- workbench/core/pipelines/pipeline_executor.py +1 -1
- workbench/core/transforms/features_to_model/features_to_model.py +60 -44
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +43 -10
- workbench/core/transforms/pandas_transforms/pandas_to_features.py +38 -2
- workbench/core/views/training_view.py +113 -42
- workbench/core/views/view.py +53 -3
- workbench/core/views/view_utils.py +4 -4
- workbench/model_script_utils/model_script_utils.py +339 -0
- workbench/model_script_utils/pytorch_utils.py +405 -0
- workbench/model_script_utils/uq_harness.py +277 -0
- workbench/model_scripts/chemprop/chemprop.template +774 -0
- workbench/model_scripts/chemprop/generated_model_script.py +774 -0
- workbench/model_scripts/chemprop/model_script_utils.py +339 -0
- workbench/model_scripts/chemprop/requirements.txt +3 -0
- workbench/model_scripts/custom_models/chem_info/fingerprints.py +175 -0
- workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +0 -1
- workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +0 -1
- workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -2
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +8 -10
- workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
- workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
- workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
- workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
- workbench/model_scripts/custom_models/uq_models/ngboost.template +15 -16
- workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
- workbench/model_scripts/meta_model/generated_model_script.py +209 -0
- workbench/model_scripts/meta_model/meta_model.template +209 -0
- workbench/model_scripts/pytorch_model/generated_model_script.py +443 -499
- workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
- workbench/model_scripts/pytorch_model/pytorch.template +440 -496
- workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
- workbench/model_scripts/pytorch_model/requirements.txt +1 -1
- workbench/model_scripts/pytorch_model/uq_harness.py +277 -0
- workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
- workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
- workbench/model_scripts/script_generation.py +15 -12
- workbench/model_scripts/uq_models/generated_model_script.py +248 -0
- workbench/model_scripts/xgb_model/generated_model_script.py +371 -403
- workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
- workbench/model_scripts/xgb_model/uq_harness.py +277 -0
- workbench/model_scripts/xgb_model/xgb_model.template +367 -399
- workbench/repl/workbench_shell.py +18 -14
- workbench/resources/open_source_api.key +1 -1
- workbench/scripts/endpoint_test.py +162 -0
- workbench/scripts/lambda_test.py +73 -0
- workbench/scripts/meta_model_sim.py +35 -0
- workbench/scripts/ml_pipeline_sqs.py +122 -6
- workbench/scripts/training_test.py +85 -0
- workbench/themes/dark/custom.css +59 -0
- workbench/themes/dark/plotly.json +5 -5
- workbench/themes/light/custom.css +153 -40
- workbench/themes/light/plotly.json +9 -9
- workbench/themes/midnight_blue/custom.css +59 -0
- workbench/utils/aws_utils.py +0 -1
- workbench/utils/chem_utils/fingerprints.py +87 -46
- workbench/utils/chem_utils/mol_descriptors.py +0 -1
- workbench/utils/chem_utils/projections.py +16 -6
- workbench/utils/chem_utils/vis.py +25 -27
- workbench/utils/chemprop_utils.py +141 -0
- workbench/utils/config_manager.py +2 -6
- workbench/utils/endpoint_utils.py +5 -7
- workbench/utils/license_manager.py +2 -6
- workbench/utils/markdown_utils.py +57 -0
- workbench/utils/meta_model_simulator.py +499 -0
- workbench/utils/metrics_utils.py +256 -0
- workbench/utils/model_utils.py +260 -76
- workbench/utils/pipeline_utils.py +0 -1
- workbench/utils/plot_utils.py +159 -34
- workbench/utils/pytorch_utils.py +87 -0
- workbench/utils/shap_utils.py +11 -57
- workbench/utils/theme_manager.py +95 -30
- workbench/utils/xgboost_local_crossfold.py +267 -0
- workbench/utils/xgboost_model_utils.py +127 -220
- workbench/web_interface/components/experiments/outlier_plot.py +0 -1
- workbench/web_interface/components/model_plot.py +16 -2
- workbench/web_interface/components/plugin_unit_test.py +5 -3
- workbench/web_interface/components/plugins/ag_table.py +2 -4
- workbench/web_interface/components/plugins/confusion_matrix.py +3 -6
- workbench/web_interface/components/plugins/model_details.py +48 -80
- workbench/web_interface/components/plugins/scatter_plot.py +192 -92
- workbench/web_interface/components/settings_menu.py +184 -0
- workbench/web_interface/page_views/main_page.py +0 -1
- {workbench-0.8.177.dist-info → workbench-0.8.227.dist-info}/METADATA +31 -17
- {workbench-0.8.177.dist-info → workbench-0.8.227.dist-info}/RECORD +121 -106
- {workbench-0.8.177.dist-info → workbench-0.8.227.dist-info}/entry_points.txt +4 -0
- {workbench-0.8.177.dist-info → workbench-0.8.227.dist-info}/licenses/LICENSE +1 -1
- workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
- workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
- workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
- workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
- workbench/model_scripts/custom_models/proximity/proximity.py +0 -384
- workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -494
- workbench/model_scripts/custom_models/uq_models/mapie.template +0 -494
- workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -386
- workbench/model_scripts/custom_models/uq_models/proximity.py +0 -384
- workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
- workbench/model_scripts/quant_regression/quant_regression.template +0 -279
- workbench/model_scripts/quant_regression/requirements.txt +0 -1
- workbench/themes/quartz/base_css.url +0 -1
- workbench/themes/quartz/custom.css +0 -117
- workbench/themes/quartz/plotly.json +0 -642
- workbench/themes/quartz_dark/base_css.url +0 -1
- workbench/themes/quartz_dark/custom.css +0 -131
- workbench/themes/quartz_dark/plotly.json +0 -642
- workbench/utils/resource_utils.py +0 -39
- {workbench-0.8.177.dist-info → workbench-0.8.227.dist-info}/WHEEL +0 -0
- {workbench-0.8.177.dist-info → workbench-0.8.227.dist-info}/top_level.txt +0 -0
|
@@ -22,7 +22,14 @@ class Projection2D:
|
|
|
22
22
|
self.log = logging.getLogger("workbench")
|
|
23
23
|
self.projection_model = None
|
|
24
24
|
|
|
25
|
-
def fit_transform(
|
|
25
|
+
def fit_transform(
|
|
26
|
+
self,
|
|
27
|
+
input_df: pd.DataFrame,
|
|
28
|
+
features: list = None,
|
|
29
|
+
feature_matrix: np.ndarray = None,
|
|
30
|
+
metric: str = "euclidean",
|
|
31
|
+
projection: str = "UMAP",
|
|
32
|
+
) -> pd.DataFrame:
|
|
26
33
|
"""Fit and transform a DataFrame using the selected dimensionality reduction method.
|
|
27
34
|
|
|
28
35
|
This method creates a copy of the input DataFrame, processes the specified features
|
|
@@ -32,6 +39,9 @@ class Projection2D:
|
|
|
32
39
|
Args:
|
|
33
40
|
input_df (pd.DataFrame): The DataFrame containing features to project.
|
|
34
41
|
features (list, optional): List of feature column names. If None, numeric columns are auto-selected.
|
|
42
|
+
feature_matrix (np.ndarray, optional): Pre-computed feature matrix. If provided, features is ignored
|
|
43
|
+
and no scaling is applied (caller is responsible for appropriate preprocessing).
|
|
44
|
+
metric (str, optional): Distance metric for UMAP (e.g., 'euclidean', 'jaccard'). Default 'euclidean'.
|
|
35
45
|
projection (str, optional): The projection to use ('UMAP', 'TSNE', 'MDS' or 'PCA'). Default 'UMAP'.
|
|
36
46
|
|
|
37
47
|
Returns:
|
|
@@ -40,36 +50,44 @@ class Projection2D:
|
|
|
40
50
|
# Create a copy of the input DataFrame
|
|
41
51
|
df = input_df.copy()
|
|
42
52
|
|
|
43
|
-
#
|
|
44
|
-
if
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
53
|
+
# If a feature matrix is provided, use it directly (no scaling)
|
|
54
|
+
if feature_matrix is not None:
|
|
55
|
+
if len(feature_matrix) != len(df):
|
|
56
|
+
self.log.critical("feature_matrix length must match DataFrame length.")
|
|
57
|
+
return df
|
|
58
|
+
X_processed = feature_matrix
|
|
59
|
+
else:
|
|
60
|
+
# Auto-identify numeric features if none are provided
|
|
61
|
+
if features is None:
|
|
62
|
+
features = [col for col in df.select_dtypes(include="number").columns if not col.endswith("id")]
|
|
63
|
+
self.log.info(f"Auto-identified numeric features: {features}")
|
|
64
|
+
|
|
65
|
+
if len(features) < 2 or df.empty:
|
|
66
|
+
self.log.critical("At least two numeric features are required, and DataFrame must not be empty.")
|
|
67
|
+
return df
|
|
68
|
+
|
|
69
|
+
# Process a copy of the feature data for projection
|
|
70
|
+
X = df[features]
|
|
71
|
+
X = X.apply(lambda col: col.fillna(col.mean()))
|
|
72
|
+
X_processed = StandardScaler().fit_transform(X)
|
|
56
73
|
|
|
57
74
|
# Select the projection method (using df for perplexity calculation)
|
|
58
|
-
self.projection_model = self._get_projection_model(projection, df)
|
|
75
|
+
self.projection_model = self._get_projection_model(projection, df, metric=metric)
|
|
59
76
|
|
|
60
|
-
# Apply the projection on the
|
|
61
|
-
projection_result = self.projection_model.fit_transform(
|
|
77
|
+
# Apply the projection on the processed data
|
|
78
|
+
projection_result = self.projection_model.fit_transform(X_processed)
|
|
62
79
|
df[["x", "y"]] = projection_result
|
|
63
80
|
|
|
64
81
|
# Resolve coincident points and return the new DataFrame
|
|
65
82
|
return self.resolve_coincident_points(df)
|
|
66
83
|
|
|
67
|
-
def _get_projection_model(self, projection: str, df: pd.DataFrame):
|
|
84
|
+
def _get_projection_model(self, projection: str, df: pd.DataFrame, metric: str = "euclidean"):
|
|
68
85
|
"""Select and return the appropriate projection model.
|
|
69
86
|
|
|
70
87
|
Args:
|
|
71
88
|
projection (str): The projection method ('TSNE', 'MDS', 'PCA', or 'UMAP').
|
|
72
89
|
df (pd.DataFrame): The DataFrame being transformed (used for computing perplexity).
|
|
90
|
+
metric (str): Distance metric for UMAP (default 'euclidean').
|
|
73
91
|
|
|
74
92
|
Returns:
|
|
75
93
|
A dimensionality reduction model instance.
|
|
@@ -88,8 +106,14 @@ class Projection2D:
|
|
|
88
106
|
return PCA(n_components=2)
|
|
89
107
|
|
|
90
108
|
if projection == "UMAP" and UMAP_AVAILABLE:
|
|
91
|
-
|
|
92
|
-
|
|
109
|
+
# UMAP default n_neighbors=15, adjust if dataset is smaller
|
|
110
|
+
n_neighbors = min(15, len(df) - 1)
|
|
111
|
+
if n_neighbors < 15:
|
|
112
|
+
self.log.warning(
|
|
113
|
+
f"Dataset size ({len(df)}) smaller than default n_neighbors, using n_neighbors={n_neighbors}"
|
|
114
|
+
)
|
|
115
|
+
self.log.info(f"Projection: UMAP with metric={metric}, n_neighbors={n_neighbors}")
|
|
116
|
+
return umap.UMAP(n_components=2, metric=metric, n_neighbors=n_neighbors)
|
|
93
117
|
|
|
94
118
|
self.log.warning(
|
|
95
119
|
f"Projection method '{projection}' not recognized or UMAP not available. Falling back to TSNE."
|
|
@@ -118,7 +142,6 @@ class Projection2D:
|
|
|
118
142
|
|
|
119
143
|
# Find duplicates
|
|
120
144
|
duplicated = rounded.duplicated(subset=["x_round", "y_round"], keep=False)
|
|
121
|
-
print("Coincident Points found:", duplicated.sum())
|
|
122
145
|
if not duplicated.any():
|
|
123
146
|
return df
|
|
124
147
|
|