PyPI - workbench - Versions diffs - 0.8.158__py3-none-any.whl → 0.8.159__py3-none-any.whl - Mend

workbench 0.8.158py3-none-any.whl → 0.8.159py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

workbench/model_scripts/scikit_learn/generated_model_script.py ADDED Viewed

@@ -0,0 +1,307 @@
+# Model Imports (this will be replaced with the imports for the template)
+None
+# Template Placeholders
+TEMPLATE_PARAMS = {
+    "model_type": "regressor",
+    "target_column": "solubility",
+    "feature_list": ['molwt', 'mollogp', 'molmr', 'heavyatomcount', 'numhacceptors', 'numhdonors', 'numheteroatoms', 'numrotatablebonds', 'numvalenceelectrons', 'numaromaticrings', 'numsaturatedrings', 'numaliphaticrings', 'ringcount', 'tpsa', 'labuteasa', 'balabanj', 'bertzct'],
+    "model_class": PyTorch,
+    "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/aqsol-pytorch-reg/training",
+    "train_all_data": False
+}
+import awswrangler as wr
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import Pipeline
+from io import StringIO
+import json
+import argparse
+import joblib
+import os
+import pandas as pd
+from typing import List
+# Global model_type for both training and inference
+model_type = TEMPLATE_PARAMS["model_type"]
+# Function to check if dataframe is empty
+def check_dataframe(df: pd.DataFrame, df_name: str) -> None:
+    """Check if the DataFrame is empty and raise an error if so."""
+    if df.empty:
+        msg = f"*** The training data {df_name} has 0 rows! ***STOPPING***"
+        print(msg)
+        raise ValueError(msg)
+# Function to expand probability column into individual class probability columns
+def expand_proba_column(df: pd.DataFrame, class_labels: List[str]) -> pd.DataFrame:
+    """Expand 'pred_proba' column into separate columns for each class label."""
+    proba_column = "pred_proba"
+    if proba_column not in df.columns:
+        raise ValueError('DataFrame does not contain a "pred_proba" column')
+    # Create new columns for each class label's probability
+    new_col_names = [f"{label}_proba" for label in class_labels]
+    proba_df = pd.DataFrame(df[proba_column].tolist(), columns=new_col_names)
+    # Drop the original 'pred_proba' column and reset the index
+    df = df.drop(columns=[proba_column]).reset_index(drop=True)
+    # Concatenate the new probability columns with the original DataFrame
+    df = pd.concat([df, proba_df], axis=1)
+    return df
+# Function to match DataFrame columns to model features (case-insensitive)
+def match_features_case_insensitive(df: pd.DataFrame, model_features: list) -> pd.DataFrame:
+    """Match and rename DataFrame columns to match the model's features, case-insensitively."""
+    # Create a set of exact matches from the DataFrame columns
+    exact_match_set = set(df.columns)
+    # Create a case-insensitive map of DataFrame columns
+    column_map = {col.lower(): col for col in df.columns}
+    rename_dict = {}
+    # Build a dictionary for renaming columns based on case-insensitive matching
+    for feature in model_features:
+        if feature in exact_match_set:
+            rename_dict[feature] = feature
+        elif feature.lower() in column_map:
+            rename_dict[column_map[feature.lower()]] = feature
+    # Rename columns in the DataFrame to match model features
+    return df.rename(columns=rename_dict)
+#
+# Training Section
+#
+if __name__ == "__main__":
+    # Template Parameters
+    target = TEMPLATE_PARAMS["target_column"]  # Can be None for unsupervised models
+    feature_list = TEMPLATE_PARAMS["feature_list"]
+    model_class = TEMPLATE_PARAMS["model_class"]
+    model_metrics_s3_path = TEMPLATE_PARAMS["model_metrics_s3_path"]
+    train_all_data = TEMPLATE_PARAMS["train_all_data"]
+    validation_split = 0.2
+    # Script arguments for input/output directories
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR", "/opt/ml/model"))
+    parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN", "/opt/ml/input/data/train"))
+    parser.add_argument(
+        "--output-data-dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR", "/opt/ml/output/data")
+    )
+    args = parser.parse_args()
+    # Load training data from the specified directory
+    training_files = [
+        os.path.join(args.train, file)
+        for file in os.listdir(args.train) if file.endswith(".csv")
+    ]
+    all_df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
+    # Check if the DataFrame is empty
+    check_dataframe(all_df, "training_df")
+    # Initialize the model using the specified model class
+    model = model_class()
+    # Determine if standardization is needed based on the model type
+    needs_standardization = model_type in ["clusterer", "projection"]
+    if needs_standardization:
+        # Create a pipeline with standardization and the model
+        model = Pipeline([
+            ("scaler", StandardScaler()),
+            ("model", model)
+        ])
+    # Handle logic based on the model_type
+    if model_type in ["classifier", "regressor"]:
+        # Supervised Models: Prepare for training
+        if train_all_data:
+            # Use all data for both training and validation
+            print("Training on all data...")
+            df_train = all_df.copy()
+            df_val = all_df.copy()
+        elif "training" in all_df.columns:
+            # Split data based on a 'training' column if it exists
+            print("Splitting data based on 'training' column...")
+            df_train = all_df[all_df["training"]].copy()
+            df_val = all_df[~all_df["training"]].copy()
+        else:
+            # Perform a random split if no 'training' column is found
+            print("Splitting data randomly...")
+            df_train, df_val = train_test_split(all_df, test_size=validation_split, random_state=42)
+        # Encode the target variable if the model is a classifier
+        label_encoder = None
+        if model_type == "classifier" and target:
+            label_encoder = LabelEncoder()
+            df_train[target] = label_encoder.fit_transform(df_train[target])
+            df_val[target] = label_encoder.transform(df_val[target])
+        # Prepare features and targets for training
+        X_train = df_train[feature_list]
+        X_val = df_val[feature_list]
+        y_train = df_train[target] if target else None
+        y_val = df_val[target] if target else None
+        # Train the model using the training data
+        model.fit(X_train, y_train)
+        # Make predictions and handle classification-specific logic
+        preds = model.predict(X_val)
+        if model_type == "classifier" and target:
+            # Get class probabilities and expand them into separate columns
+            probs = model.predict_proba(X_val)
+            df_val["pred_proba"] = [p.tolist() for p in probs]
+            df_val = expand_proba_column(df_val, label_encoder.classes_)
+            # Decode the target and prediction labels
+            df_val[target] = label_encoder.inverse_transform(df_val[target])
+            preds = label_encoder.inverse_transform(preds)
+        # Add predictions to the validation DataFrame
+        df_val["prediction"] = preds
+        # Save the validation predictions to S3
+        output_columns = [target, "prediction"] + [col for col in df_val.columns if col.endswith("_proba")]
+        wr.s3.to_csv(df_val[output_columns], path=f"{model_metrics_s3_path}/validation_predictions.csv", index=False)
+    elif model_type == "clusterer":
+        # Unsupervised Clustering Models: Assign cluster labels
+        all_df["cluster"] = model.fit_predict(all_df[feature_list])
+    elif model_type == "projection":
+        # Projection Models: Apply transformation and label first three components as x, y, z
+        transformed_data = model.fit_transform(all_df[feature_list])
+        num_components = transformed_data.shape[1]
+        # Special labels for the first three components, if they exist
+        special_labels = ["x", "y", "z"]
+        for i in range(num_components):
+            if i < len(special_labels):
+                all_df[special_labels[i]] = transformed_data[:, i]
+            else:
+                all_df[f"component_{i + 1}"] = transformed_data[:, i]
+    elif model_type == "transformer":
+        # Transformer Models: Apply transformation and use generic component labels
+        transformed_data = model.fit_transform(all_df[feature_list])
+        for i in range(transformed_data.shape[1]):
+            all_df[f"component_{i + 1}"] = transformed_data[:, i]
+    # Save the trained model and any necessary assets
+    joblib.dump(model, os.path.join(args.model_dir, "model.joblib"))
+    if model_type == "classifier" and label_encoder:
+        joblib.dump(label_encoder, os.path.join(args.model_dir, "label_encoder.joblib"))
+    # Save the feature list to validate input during predictions
+    with open(os.path.join(args.model_dir, "feature_columns.json"), "w") as fp:
+        json.dump(feature_list, fp)
+#
+# Inference Section
+#
+def model_fn(model_dir):
+    """Load and return the model from the specified directory."""
+    return joblib.load(os.path.join(model_dir, "model.joblib"))
+def input_fn(input_data, content_type):
+    """Parse input data and return a DataFrame."""
+    if not input_data:
+        raise ValueError("Empty input data is not supported!")
+    # Decode bytes to string if necessary
+    if isinstance(input_data, bytes):
+        input_data = input_data.decode("utf-8")
+    if "text/csv" in content_type:
+        return pd.read_csv(StringIO(input_data))
+    elif "application/json" in content_type:
+        return pd.DataFrame(json.loads(input_data))  # Assumes JSON array of records
+    else:
+        raise ValueError(f"{content_type} not supported!")
+def output_fn(output_df, accept_type):
+    """Supports both CSV and JSON output formats."""
+    if "text/csv" in accept_type:
+        csv_output = output_df.fillna("N/A").to_csv(index=False)  # CSV with N/A for missing values
+        return csv_output, "text/csv"
+    elif "application/json" in accept_type:
+        return output_df.to_json(orient="records"), "application/json"  # JSON array of records (NaNs -> null)
+    else:
+        raise RuntimeError(f"{accept_type} accept type is not supported by this script.")
+def predict_fn(df, model):
+    """Make predictions or apply transformations using the model and return the DataFrame with results."""
+    model_dir = os.environ.get("SM_MODEL_DIR", "/opt/ml/model")
+    # Load feature columns from the saved file
+    with open(os.path.join(model_dir, "feature_columns.json")) as fp:
+        model_features = json.load(fp)
+    # Load label encoder if available (for classification models)
+    label_encoder = None
+    if os.path.exists(os.path.join(model_dir, "label_encoder.joblib")):
+        label_encoder = joblib.load(os.path.join(model_dir, "label_encoder.joblib"))
+    # Match features in a case-insensitive manner
+    matched_df = match_features_case_insensitive(df, model_features)
+    # Initialize a dictionary to store the results
+    results = {}
+    # Determine how to handle the model based on its available methods
+    if hasattr(model, "predict"):
+        # For supervised models (classifier or regressor)
+        predictions = model.predict(matched_df[model_features])
+        results["prediction"] = predictions
+    elif hasattr(model, "fit_predict"):
+        # For clustering models (e.g., DBSCAN)
+        clusters = model.fit_predict(matched_df[model_features])
+        results["cluster"] = clusters
+    elif hasattr(model, "fit_transform") and not hasattr(model, "predict"):
+        # For transformation/projection models (e.g., t-SNE, PCA)
+        transformed_data = model.fit_transform(matched_df[model_features])
+        # Handle 2D projection models specifically
+        if model_type == "projection" and transformed_data.shape[1] == 2:
+            results["x"] = transformed_data[:, 0]
+            results["y"] = transformed_data[:, 1]
+        else:
+            # General case for any number of components
+            for i in range(transformed_data.shape[1]):
+                results[f"component_{i + 1}"] = transformed_data[:, i]
+    else:
+        # Raise an error if the model does not support the expected methods
+        raise ValueError("Model does not support predict, fit_predict, or fit_transform methods.")
+    # Decode predictions if using a label encoder (for classification)
+    if label_encoder and "prediction" in results:
+        results["prediction"] = label_encoder.inverse_transform(results["prediction"])
+    # Add the results to the DataFrame
+    for key, value in results.items():
+        df[key] = value
+    # Add probability columns if the model supports it (for classification)
+    if hasattr(model, "predict_proba"):
+        probs = model.predict_proba(matched_df[model_features])
+        df["pred_proba"] = [p.tolist() for p in probs]
+        df = expand_proba_column(df, label_encoder.classes_)
+    # Return the modified DataFrame
+    return df

workbench/model_scripts/script_generation.py CHANGED Viewed

@@ -101,8 +101,12 @@ def generate_model_script(template_params: dict) -> str:
     # Determine which template to use based on model type
     if template_params.get("model_class"):
-        template_name = "scikit_learn.template"
-        model_script_dir = "scikit_learn"
+        if template_params["model_class"].lower() == "pytorch":
+            template_name = "pytorch.template"
+            model_script_dir = "pytorch_model"
+        else:
+            template_name = "scikit_learn.template"
+            model_script_dir = "scikit_learn"
     elif template_params["model_type"] in [ModelType.REGRESSOR, ModelType.CLASSIFIER]:
         template_name = "xgb_model.template"
         model_script_dir = "xgb_model"

workbench/model_scripts/xgb_model/generated_model_script.py CHANGED Viewed

@@ -28,12 +28,12 @@ from typing import List, Tuple
 # Template Parameters
 TEMPLATE_PARAMS = {
-    "model_type": "regressor",
-    "target_column": "iq_score",
-    "features": ['height', 'weight', 'salary', 'age', 'likes_dogs'],
-    "compressed_features": [],
-    "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/abc-regression/training",
-    "train_all_data": False
+    "model_type": "classifier",
+    "target_column": "solubility_class",
+    "features": ['molwt', 'mollogp', 'molmr', 'heavyatomcount', 'numhacceptors', 'numhdonors', 'numheteroatoms', 'numrotatablebonds', 'numvalenceelectrons', 'numaromaticrings', 'numsaturatedrings', 'numaliphaticrings', 'ringcount', 'tpsa', 'labuteasa', 'balabanj', 'bertzct', 'fingerprint'],
+    "compressed_features": ['fingerprint'],
+    "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/aqsol-fingerprints-plus-class/training",
+    "train_all_data": True
 }
 # Function to check if dataframe is empty

workbench/repl/workbench_shell.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import IPython
 from IPython import start_ipython
 from IPython.terminal.prompts import Prompts
 from IPython.terminal.ipapp import load_default_config
@@ -10,7 +9,6 @@ import botocore
 import webbrowser
 import pandas as pd
 import readline  # noqa
-from distutils.version import LooseVersion
 try:
     import matplotlib.pyplot as plt  # noqa
@@ -34,6 +32,8 @@ from workbench.utils.repl_utils import cprint, Spinner
 from workbench.utils.workbench_logging import IMPORTANT_LEVEL_NUM, TRACE_LEVEL_NUM
 from workbench.utils.config_manager import ConfigManager
 from workbench.utils.log_utils import silence_logs, log_theme
+from workbench.api import Meta
+from workbench.cached.cached_meta import CachedMeta
 # If we have RDKIT/Mordred let's pull in our cheminformatics utils
 try:
@@ -196,10 +196,7 @@ class WorkbenchShell:
         # Start IPython with the config and commands in the namespace
         try:
-            if LooseVersion(IPython.__version__) >= LooseVersion("9.0.0"):
-                ipython_argv = ["--no-tip", "--theme", "linux"]
-            else:
-                ipython_argv = []
+            ipython_argv = ["--no-tip", "--theme", "linux"]
             start_ipython(ipython_argv, user_ns=locs, config=config)
         finally:
             spinner = self.spinner_start("Goodbye to AWS:")
@@ -255,7 +252,7 @@ class WorkbenchShell:
     def import_workbench(self):
         # Import all the Workbench modules
-        spinner = self.spinner_start("Importing Workbench:")
+        spinner = self.spinner_start("Spinning up Workbench:")
         try:
             # These are the classes we want to expose to the REPL
             self.commands["DataSource"] = importlib.import_module("workbench.api.data_source").DataSource
@@ -475,8 +472,6 @@ class WorkbenchShell:
     # Helpers method to switch from direct Meta to Cached Meta
     def try_cached_meta(self):
-        from workbench.api import Meta
-        from workbench.cached.cached_meta import CachedMeta
         with silence_logs():
             self.meta = CachedMeta()

workbench/utils/json_utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """JSON Utilities"""
 import json
+from io import StringIO
 import numpy as np
 import pandas as pd
 import logging
@@ -33,9 +34,7 @@ class CustomEncoder(json.JSONEncoder):
             elif isinstance(obj, pd.DataFrame):
                 return {
                     "__dataframe__": True,
-                    "df": obj.to_dict(),
-                    "index": obj.index.tolist(),
-                    "index_name": obj.index.name,
+                    "df": obj.to_json(orient="table"),
                 }
             return super().default(obj)
         except Exception as e:
@@ -62,10 +61,16 @@ def custom_decoder(dct):
         if "__datetime__" in dct:
             return iso8601_to_datetime(dct["datetime"])
         elif "__dataframe__" in dct:
-            df = pd.DataFrame.from_dict(dct["df"])
-            if "index" in dct:
-                df.index = dct["index"]
-                df.index.name = dct.get("index_name")
+            df_data = dct["df"]
+            if isinstance(df_data, str):
+                df = pd.read_json(StringIO(df_data), orient="table")
+            else:
+                # Old format compatibility
+                log.warning("Decoding old dataframe format...")
+                df = pd.DataFrame.from_dict(df_data)
+                if "index" in dct:
+                    df.index = dct["index"]
+                    df.index.name = dct.get("index_name")
             return df
         return dct
     except Exception as e:
@@ -86,6 +91,7 @@ if __name__ == "__main__":
         "datetime": datetime.now(),
         "date": date.today(),
         "dataframe": pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
+        "list": [1, 2, 3],
     }
     # Encode the test dictionary
@@ -120,4 +126,17 @@ if __name__ == "__main__":
     decoded_df = json.loads(encoded_df, object_hook=custom_decoder)
     print("Original DataFrame index name:", df_with_index.index.name)
-    print("Decoded DataFrame index name:", decoded_df.index.name)  # Likely None
+    print("Decoded DataFrame index name:", decoded_df.index.name)
+    # Dataframe Testing
+    from workbench.api import DFStore
+    df_store = DFStore()
+    df = df_store.get("/testing/json_encoding/smart_sample_bad")
+    encoded = json.dumps(df, cls=CustomEncoder)
+    decoded_df = json.loads(encoded, object_hook=custom_decoder)
+    # Compare original and decoded DataFrame
+    from workbench.utils.pandas_utils import compare_dataframes
+    compare_dataframes(df, decoded_df)

workbench/utils/pandas_utils.py CHANGED Viewed

@@ -94,14 +94,16 @@ def dataframe_delta(func_that_returns_df, previous_hash: Optional[str] = None) -
     return df, current_hash
-def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, display_columns: list):
+def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, display_columns: list = None):
     """Compare two DataFrames and report on differences.
     Args:
         df1 (pd.DataFrame): First DataFrame to compare.
         df2 (pd.DataFrame): Second DataFrame to compare.
-        display_columns (list): Columns to display when differences are found.
+        display_columns (list): Columns to display when differences are found (defaults to all columns).
     """
+    if display_columns is None:
+        display_columns = df1.columns.tolist()
     # Check if the entire dataframes are equal
     if df1.equals(df2):
@@ -130,7 +132,7 @@ def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, display_columns: li
         # Print out the column types
         print("\nColumn Types:")
         print(f"DF1: {df1[common_columns].dtypes.value_counts()}")
-        print(f"DF2: {df2[common_columns].dtypes.value_counts()}")
+        print(f"\nDF2: {df2[common_columns].dtypes.value_counts()}")
     # Count the NaNs in each DataFrame individually (only show columns with > 0 NaNs)
     nan_counts_df1 = df1.isna().sum()
@@ -146,6 +148,7 @@ def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, display_columns: li
     # Define tolerance for float comparisons
     epsilon = 1e-10
+    difference_counts = {}
     # Check for differences in common columns
     for column in common_columns:
@@ -161,18 +164,10 @@ def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, display_columns: li
             # Other types (e.g., int) with NaNs treated as equal
             differences = ~(df1[column].fillna(0) == df2[column].fillna(0))
-        # Create a merged DataFrame showing values from both DataFrames
-        merged_df = pd.DataFrame(
-            {
-                **{col: df1.loc[differences, col] for col in display_columns},
-                f"{column}_1": df1.loc[differences, column],
-                f"{column}_2": df2.loc[differences, column],
-            }
-        )
         # If differences exist, display them
         if differences.any():
-            print(f"\nColumn {column} has differences:")
+            print(f"\nColumn {column} has {differences.sum()} differences")
+            difference_counts[column] = differences.sum()
             # Create a merged DataFrame showing values from both DataFrames
             merged_df = pd.DataFrame(
@@ -186,6 +181,10 @@ def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, display_columns: li
             # Display the merged DataFrame
             print(merged_df)
+    # If there are no differences report that
+    if not difference_counts:
+        print(f"\nNo differences found in common columns within {epsilon}")
 def subnormal_check(df):
     """

workbench/utils/redis_cache.py CHANGED Viewed

@@ -73,12 +73,25 @@ class RedisCache:
     def set(self, key, value):
         """Add an item to the redis_cache, all items are JSON serialized
         Args:
-               key: item key
-               value: the value associated with this key
+            key: item key
+            value: the value associated with this key
         """
         self._set(key, json.dumps(value, cls=CustomEncoder))
+    def atomic_set(self, key, value) -> bool:
+        """Atomically set key to value only if key doesn't exist.
+        Returns:
+            True if the key was set, False if it already existed.
+        """
+        # Serialize the value to JSON
+        serialized_value = json.dumps(value, cls=CustomEncoder)
+        result = self.redis_db.set(self.prefix + str(key) + self.postfix, serialized_value, ex=self.expire, nx=True)
+        log.debug(f"Atomic Set: {key} -> {value} (Result: {result})")
+        return result is True
     def get(self, key):
         """Get an item from the redis_cache, all items are JSON deserialized
         Args:
@@ -165,22 +178,20 @@ class RedisCache:
     def get_memory_config(self):
         """Get Redis memory usage and configuration settings as a dictionary"""
         info = {}
-        try:
-            memory_info = self.redis_db.info("memory")
-            info["used_memory"] = memory_info.get("used_memory", "N/A")
-            info["used_memory_human"] = memory_info.get("used_memory_human", "N/A")
-            info["mem_fragmentation_ratio"] = memory_info.get("mem_fragmentation_ratio", "N/A")
-            info["maxmemory_policy"] = memory_info.get("maxmemory_policy", "N/A")
-        except redis.exceptions.RedisError as e:
-            log.error(f"Error retrieving memory info from Redis: {e}")
+        # Memory info about the Redis database
+        memory_info = self.redis_db.info("memory")
+        info["used_memory"] = memory_info.get("used_memory", "N/A")
+        info["used_memory_human"] = memory_info.get("used_memory_human", "N/A")
+        info["mem_fragmentation_ratio"] = memory_info.get("mem_fragmentation_ratio", "N/A")
+        info["maxmemory_policy"] = memory_info.get("maxmemory_policy", "N/A")
+        # CONFIG commands are disabled in managed Redis services like ElastiCache
         try:
             max_memory = self.redis_db.config_get("maxmemory")
             info["maxmemory"] = max_memory.get("maxmemory", "N/A")
         except redis.exceptions.RedisError as e:
-            log.error(f"Error retrieving config info from Redis (likely unsupported command): {e}")
-            info["maxmemory"] = "Not Available - Command Restricted"
+            log.debug(f"CONFIG GET disabled (likely managed Redis service): {e}")
+            info["maxmemory"] = "Not Available - Managed Service"
         return info
     def report_memory_config(self):
@@ -244,6 +255,10 @@ if __name__ == "__main__":
     # Delete anything in the test database
     my_redis_cache.clear()
+    # Test the atomic set
+    assert my_redis_cache.atomic_set("foo", "bar") is True
+    assert my_redis_cache.atomic_set("foo", "baz") is False
     # Test storage
     my_redis_cache.set("foo", "bar")
     assert my_redis_cache.get("foo") == "bar"

workbench/utils/workbench_cache.py CHANGED Viewed

@@ -3,7 +3,6 @@ use RedisCache if it's available, and fall back to Cache if it's not.
 """
 from pprint import pformat
-from contextlib import contextmanager
 from workbench.utils.cache import Cache
 from workbench.utils.redis_cache import RedisCache
@@ -12,21 +11,8 @@ import logging
 log = logging.getLogger("workbench")
-# Context manager for disabling refresh
-@contextmanager
-def disable_refresh():
-    log.warning("WorkbenchCache: Disabling Refresh")
-    WorkbenchCache.refresh_enabled = False
-    yield
-    log.warning("WorkbenchCache: Enabling Refresh")
-    WorkbenchCache.refresh_enabled = True
 class WorkbenchCache:
-    # Class attribute to control refresh treads (on/off)
-    refresh_enabled = True
     def __init__(self, expire=None, prefix="", postfix=""):
         """WorkbenchCache Initialization
         Args:
@@ -82,6 +68,21 @@ class WorkbenchCache:
     def clear(self):
         return self._actual_cache.clear()
+    def atomic_set(self, key, value) -> bool:
+        """Atomically set key to value only if key doesn't exist.
+        Returns:
+             True if the key was set, False if it already existed.
+        """
+        if self._using_redis:
+            return self._actual_cache.atomic_set(key, value)
+        # In-Memory Cache does not support atomic operations, so we simulate it
+        else:
+            key_exists = self._actual_cache.get(key) is not None
+            self._actual_cache.set(key, value)
+            return not key_exists
     def show_size_details(self, value):
         """Print the size of the sub-parts of the value"""
         try:
@@ -118,6 +119,10 @@ if __name__ == "__main__":
     # Delete anything in the test database
     my_cache.clear()
+    # Test the atomic set
+    assert my_cache.atomic_set("foo", "bar") is True
+    assert my_cache.atomic_set("foo", "baz") is False  # Should not overwrite
     # Test storage
     my_cache.set("foo", "bar")
     assert my_cache.get("foo") == "bar"
@@ -167,3 +172,4 @@ if __name__ == "__main__":
     my_cache.set("df", df)
     df = my_cache.get("df")
     print(df)
+    my_cache.clear()

workbench/web_interface/page_views/endpoints_page_view.py CHANGED Viewed

@@ -25,7 +25,7 @@ class EndpointsPageView(PageView):
     def refresh(self):
         """Refresh the endpoint data from the Cloud Platform"""
         self.log.important("Calling endpoint page view refresh()..")
-        self.endpoints_df = self.meta.endpoints()
+        self.endpoints_df = self.meta.endpoints(details=True)
         # Drop the AWS URL column
         self.endpoints_df.drop(columns=["_aws_url"], inplace=True, errors="ignore")

workbench 0.8.158__py3-none-any.whl → 0.8.159__py3-none-any.whl

workbench 0.8.158py3-none-any.whl → 0.8.159py3-none-any.whl