PyPI - variable-explorer - Versions diffs - 0.1.0__py3-none-any.whl - Mend

variable-explorer 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

variable_explorer/kernel/introspection.py ADDED Viewed

@@ -0,0 +1,186 @@
+"""Namespace introspection — scan user variables and extract metadata."""
+from __future__ import annotations
+import sys
+from typing import Any
+# Types/modules to skip
+_SKIP_TYPES = (type, type(sys), type(lambda: None))
+_SKIP_PREFIXES = ('_', '__')
+_SKIP_NAMES = {
+    'In', 'Out', 'get_ipython', 'exit', 'quit',
+    '_dh', '_ih', '_oh', '_sh', '_i', '_ii', '_iii',
+}
+def get_variable_list(user_ns: dict) -> list[dict]:
+    """Scan the user namespace and return metadata for each variable."""
+    variables = []
+    for name, obj in user_ns.items():
+        if name in _SKIP_NAMES:
+            continue
+        if any(name.startswith(p) for p in _SKIP_PREFIXES):
+            continue
+        if isinstance(obj, _SKIP_TYPES):
+            continue
+        try:
+            info = _inspect_variable(name, obj)
+            if info is not None:
+                variables.append(info)
+        except Exception:
+            pass
+    return variables
+def _inspect_variable(name: str, obj: Any) -> dict | None:
+    """Extract metadata for a single variable."""
+    type_name = type(obj).__name__
+    shape = _get_shape(obj)
+    memory_bytes = _get_memory(obj)
+    short_repr = _get_short_repr(obj)
+    tabular_info = _classify_tabular(obj)
+    return {
+        'name': name,
+        'typeName': type_name,
+        'shape': shape,
+        'memoryBytes': memory_bytes,
+        'shortRepr': short_repr,
+        'isTabular': tabular_info['isTabular'],
+        'tabularKind': tabular_info['kind'],
+        'childCount': tabular_info.get('childCount', 0),
+    }
+def _classify_tabular(obj: Any) -> dict:
+    """Classify what kind of tabular display an object supports."""
+    try:
+        import pandas as pd
+        if isinstance(obj, pd.DataFrame):
+            return {'isTabular': True, 'kind': 'dataframe'}
+        if isinstance(obj, pd.Series):
+            return {'isTabular': True, 'kind': 'series'}
+    except ImportError:
+        pass
+    try:
+        import numpy as np
+        if isinstance(obj, np.ndarray) and obj.ndim <= 2:
+            return {'isTabular': True, 'kind': 'ndarray'}
+    except ImportError:
+        pass
+    if isinstance(obj, dict):
+        values = list(obj.values())
+        if not values:
+            return {'isTabular': False, 'kind': 'dict_empty'}
+        # Dict of lists → tabular
+        if all(isinstance(v, (list, tuple)) for v in values):
+            return {'isTabular': True, 'kind': 'dict_of_lists'}
+        # Dict of DataFrames → container
+        try:
+            import pandas as pd
+            if all(isinstance(v, pd.DataFrame) for v in values):
+                return {'isTabular': True, 'kind': 'dict_of_dataframes', 'childCount': len(values)}
+        except ImportError:
+            pass
+        # Dict of dicts → try to convert to DataFrame
+        if all(isinstance(v, dict) for v in values):
+            return {'isTabular': True, 'kind': 'dict_of_dicts'}
+        # Generic dict with scalar values → single-row tabular
+        return {'isTabular': True, 'kind': 'dict_scalar'}
+    if isinstance(obj, (list, tuple)):
+        if not obj:
+            return {'isTabular': False, 'kind': 'list_empty'}
+        # List of DataFrames → container
+        try:
+            import pandas as pd
+            if all(isinstance(v, pd.DataFrame) for v in obj):
+                return {'isTabular': True, 'kind': 'list_of_dataframes', 'childCount': len(obj)}
+        except ImportError:
+            pass
+        # List of dicts → tabular (common pattern from APIs/JSON)
+        if all(isinstance(v, dict) for v in obj):
+            return {'isTabular': True, 'kind': 'list_of_dicts'}
+        # List of lists/tuples → tabular
+        if all(isinstance(v, (list, tuple)) for v in obj):
+            return {'isTabular': True, 'kind': 'list_of_lists'}
+        # Simple list of scalars → single-column tabular
+        return {'isTabular': True, 'kind': 'list_scalar'}
+    return {'isTabular': False, 'kind': 'other'}
+def _get_shape(obj: Any) -> list[int]:
+    """Get the shape of an object."""
+    try:
+        import pandas as pd
+        if isinstance(obj, pd.DataFrame):
+            return list(obj.shape)
+        if isinstance(obj, pd.Series):
+            return [len(obj)]
+    except ImportError:
+        pass
+    try:
+        import numpy as np
+        if isinstance(obj, np.ndarray):
+            return list(obj.shape)
+    except ImportError:
+        pass
+    if hasattr(obj, '__len__'):
+        try:
+            return [len(obj)]
+        except Exception:
+            pass
+    return []
+def _get_memory(obj: Any) -> int:
+    """Get memory usage in bytes."""
+    try:
+        import pandas as pd
+        if isinstance(obj, pd.DataFrame):
+            return int(obj.memory_usage(deep=True).sum())
+        if isinstance(obj, pd.Series):
+            return int(obj.memory_usage(deep=True))
+    except (ImportError, Exception):
+        pass
+    try:
+        import numpy as np
+        if isinstance(obj, np.ndarray):
+            return int(obj.nbytes)
+    except (ImportError, Exception):
+        pass
+    try:
+        return sys.getsizeof(obj)
+    except Exception:
+        return 0
+def _get_short_repr(obj: Any, max_len: int = 200) -> str:
+    """Get a short string representation."""
+    try:
+        r = repr(obj)
+        if len(r) > max_len:
+            return r[:max_len] + '...'
+        return r
+    except Exception:
+        return f'<{type(obj).__name__}>'

variable_explorer/kernel/serialization.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""JSON-safe serialization for comm messages."""
+from __future__ import annotations
+from typing import Any
+def safe_serialize(data: Any) -> Any:
+    """Recursively convert data to JSON-safe types."""
+    if data is None:
+        return None
+    if isinstance(data, dict):
+        return {str(k): safe_serialize(v) for k, v in data.items()}
+    if isinstance(data, (list, tuple)):
+        return [safe_serialize(item) for item in data]
+    if isinstance(data, bool):
+        return data
+    if isinstance(data, int):
+        return data
+    if isinstance(data, float):
+        import math
+        if math.isnan(data) or math.isinf(data):
+            return None
+        return data
+    if isinstance(data, str):
+        return data
+    # numpy types
+    try:
+        import numpy as np
+        if isinstance(data, np.integer):
+            return int(data)
+        if isinstance(data, np.floating):
+            v = float(data)
+            if np.isnan(v) or np.isinf(v):
+                return None
+            return v
+        if isinstance(data, np.bool_):
+            return bool(data)
+        if isinstance(data, np.ndarray):
+            return data.tolist()
+    except ImportError:
+        pass
+    # pandas types
+    try:
+        import pandas as pd
+        if isinstance(data, pd.Timestamp):
+            return data.isoformat()
+        if pd.isna(data):
+            return None
+    except (ImportError, TypeError, ValueError):
+        pass
+    # datetime
+    if hasattr(data, 'isoformat'):
+        return data.isoformat()
+    # bytes
+    if isinstance(data, bytes):
+        return data.decode('utf-8', errors='replace')
+    # Fallback
+    try:
+        return str(data)
+    except Exception:
+        return '<unserializable>'

variable_explorer/kernel/sorter.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""Kernel-side DataFrame sorting."""
+from __future__ import annotations
+from typing import Any
+def apply_sort(df: Any, sort_model: list[dict]) -> Any:
+    """Apply multi-column sort to a DataFrame.
+    sort_model: [{'colId': 'price', 'sort': 'asc'}, ...]
+    Returns a new sorted DataFrame (does not modify the original).
+    """
+    if not sort_model:
+        return df
+    cols = [s['colId'] for s in sort_model]
+    ascending = [s['sort'] == 'asc' for s in sort_model]
+    # Validate columns exist
+    valid_cols = []
+    valid_asc = []
+    for col, asc in zip(cols, ascending):
+        if col in df.columns:
+            valid_cols.append(col)
+            valid_asc.append(asc)
+    if not valid_cols:
+        return df
+    try:
+        return df.sort_values(by=valid_cols, ascending=valid_asc, na_position='last')
+    except Exception:
+        return df

variable_explorer/kernel/statistics.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""Column statistics and histogram computation."""
+from __future__ import annotations
+from typing import Any
+def compute_column_stats(var_name: str, obj: Any) -> dict:
+    """Compute statistics for all columns of a DataFrame-like object."""
+    import pandas as pd
+    import numpy as np
+    # Convert to DataFrame
+    if isinstance(obj, pd.Series):
+        df = obj.to_frame()
+    elif isinstance(obj, pd.DataFrame):
+        df = obj
+    else:
+        try:
+            df = pd.DataFrame(obj)
+        except Exception:
+            return {
+                'type': 'column_stats',
+                'variable': var_name,
+                'stats': []
+            }
+    stats = []
+    for col in df.columns:
+        series = df[col]
+        info: dict[str, Any] = {
+            'name': str(col),
+            'dtype': str(series.dtype),
+            'nullCount': int(series.isna().sum()),
+            'uniqueCount': int(series.nunique()),
+        }
+        try:
+            info['histogram'] = _compute_histogram(series)
+        except Exception:
+            info['histogram'] = None
+        stats.append(info)
+    return {
+        'type': 'column_stats',
+        'variable': var_name,
+        'stats': stats,
+    }
+def _compute_histogram(series) -> dict | None:
+    """Compute histogram data for a single column."""
+    import pandas as pd
+    import numpy as np
+    dtype = series.dtype
+    # Boolean
+    if pd.api.types.is_bool_dtype(dtype):
+        return {
+            'type': 'boolean',
+            'trueCount': int(series.sum()),
+            'falseCount': int((~series).sum()),
+            'nullCount': int(series.isna().sum()),
+        }
+    # Numeric (not bool)
+    if pd.api.types.is_numeric_dtype(dtype):
+        clean = series.dropna()
+        if len(clean) == 0:
+            return None
+        # Compute histogram bins
+        n_bins = min(20, max(5, len(clean) // 10))
+        try:
+            counts, edges = np.histogram(clean, bins=n_bins)
+            return {
+                'type': 'numeric',
+                'counts': counts.tolist(),
+                'edges': edges.tolist(),
+                'min': float(clean.min()),
+                'max': float(clean.max()),
+                'mean': float(clean.mean()),
+                'std': float(clean.std()),
+            }
+        except Exception:
+            return None
+    # Categorical / object / string
+    if pd.api.types.is_categorical_dtype(dtype) or dtype == object or pd.api.types.is_string_dtype(dtype):
+        vc = series.value_counts().head(10)
+        if len(vc) == 0:
+            return None
+        return {
+            'type': 'categorical',
+            'labels': [str(label) for label in vc.index.tolist()],
+            'counts': vc.values.tolist(),
+        }
+    return None