superleaf 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
superleaf/__init__.py ADDED
File without changes
@@ -0,0 +1,2 @@
1
+ from superleaf.collections.ordered_set import OrderedSet
2
+ from superleaf.collections.summable_dict import SummableDict
@@ -0,0 +1,79 @@
1
+ import itertools
2
+ from typing import Generic, Iterable, Iterator, Self, TypeVar
3
+
4
+ T = TypeVar('T')
5
+
6
+
7
+ class OrderedSet(Generic[T]):
8
+ """Similar interface to the native set class, but with item order maintained, and expanded functionality, including
9
+ addition and summation. Implemented by storing the set items as keys in an internal dict."""
10
+
11
+ def __init__(self, items: Iterable[T] = None):
12
+ self._dict: dict[T, None] = dict(zip(items, itertools.repeat(None))) if items is not None else {}
13
+
14
+ @property
15
+ def _items(self) -> list[T]:
16
+ return list(self._dict.keys())
17
+
18
+ def __iter__(self) -> Iterator[T]:
19
+ return iter(self._items)
20
+
21
+ def copy(self) -> Self:
22
+ return self.__class__(self._items)
23
+
24
+ def union(self, other: Iterable[T]) -> Self:
25
+ return self.__class__(itertools.chain(self, other))
26
+
27
+ def add(self, item: T) -> Self:
28
+ self._dict[item] = None
29
+
30
+ def intersection(self, other: Iterable[T]) -> Self:
31
+ return self.__class__(filter(lambda x: x in other, self))
32
+
33
+ def __add__(self, other: Iterable[T]) -> Self:
34
+ return self.__class__(self.union(other))
35
+
36
+ def __radd__(self, other: Iterable[T]) -> Self:
37
+ if other == 0:
38
+ return self
39
+ else:
40
+ if not isinstance(other, type(self)):
41
+ other = type(self)(other)
42
+ return other + self
43
+
44
+ def __iadd__(self, other: Iterable[T]) -> Self:
45
+ if not isinstance(other, self.__class__):
46
+ other = self.__class__(other)
47
+ self._dict.update(other._dict)
48
+ return self
49
+
50
+ def __sub__(self, other: Iterable[T]) -> Self:
51
+ return self.__class__(filter(lambda x: x not in other, self))
52
+
53
+ def __isub__(self, other: Iterable[T]) -> Self:
54
+ if not isinstance(other, self.__class__):
55
+ other = self.__class__(other)
56
+ for item in other:
57
+ if item in self:
58
+ self._dict.pop(item)
59
+ return self
60
+
61
+ def __contains__(self, item: T) -> bool:
62
+ return item in self._dict
63
+
64
+ def __eq__(self, other: Self | set[T]) -> Self:
65
+ if isinstance(other, set):
66
+ return set(self._items) == other
67
+ elif isinstance(other, OrderedSet):
68
+ return self._dict == other._dict
69
+ else:
70
+ return False
71
+
72
+ def __repr__(self) -> str:
73
+ return "{" + ", ".join([item.__repr__() for item in self._items]) + "}"
74
+
75
+ def __len__(self) -> int:
76
+ return len(self._dict)
77
+
78
+ def __getitem__(self, item: int) -> T:
79
+ return self._items[item]
@@ -0,0 +1,71 @@
1
+ from typing import Self
2
+
3
+
4
+ class SummableDict(dict):
5
+ """A dictionary-like object that supports addition and subtraction of values."""
6
+
7
+ def __add__(self, other) -> Self:
8
+ summed = self.copy()
9
+ if isinstance(other, dict):
10
+ for k, v in other.items():
11
+ if k in summed:
12
+ summed[k] = summed[k] + v
13
+ else:
14
+ summed[k] = v
15
+ else:
16
+ for k, v in self.items():
17
+ summed[k] = summed[k] + other
18
+ return summed
19
+
20
+ def __iadd__(self, other) -> Self:
21
+ if isinstance(other, dict):
22
+ for k, v in other.items():
23
+ if k in self:
24
+ self[k] = self[k] + v
25
+ else:
26
+ self[k] = v
27
+ else:
28
+ for k, v in self.items():
29
+ self[k] = self[k] + other
30
+ return self
31
+
32
+ def __radd__(self, other) -> Self:
33
+ if other == 0:
34
+ return self.copy()
35
+ elif isinstance(other, dict):
36
+ return SummableDict(other) + self
37
+ else:
38
+ summed = self.copy()
39
+ for k, v in self.items():
40
+ summed[k] = other + v
41
+ return summed
42
+
43
+ def __neg__(self) -> Self:
44
+ return SummableDict({k: -v for k, v in self.items()})
45
+
46
+ def __sub__(self, other) -> Self:
47
+ if isinstance(other, dict):
48
+ return self + -SummableDict(other)
49
+ else:
50
+ return self + -other
51
+
52
+ def __isub__(self, other) -> Self:
53
+ if isinstance(other, dict):
54
+ for k, v in other.items():
55
+ if k in self:
56
+ self[k] = self[k] - v
57
+ else:
58
+ self[k] = -v
59
+ else:
60
+ for k, v in self.items():
61
+ self[k] = self[k] - other
62
+ return self
63
+
64
+ def __rsub__(self, other):
65
+ if other == 0:
66
+ return self.copy()
67
+ else:
68
+ return other + -self
69
+
70
+ def copy(self):
71
+ return SummableDict(super().copy())
@@ -0,0 +1,4 @@
1
+ from superleaf.dataframe.column_ops import Col, Values
2
+ from superleaf.dataframe.selection import dfilter, partition, reorder_columns
3
+ from superleaf.dataframe.standardize import standardize_columns
4
+ from superleaf.dataframe.transform import expand_dict_to_cols
@@ -0,0 +1,364 @@
1
+ from abc import ABCMeta, abstractmethod
2
+ from typing import Any, Callable, Iterable, Optional, Union
3
+
4
+ import pandas as pd
5
+
6
+
7
+ class ColOp(metaclass=ABCMeta):
8
+ """Abstract base class for column operations on pandas DataFrames.
9
+
10
+ Subclasses implement transformations or evaluations that produce pandas Series or scalar
11
+ results when applied to DataFrames. Supports chaining and combining using logical and
12
+ arithmetic operators.
13
+
14
+ Operators defined on this class:
15
+ ``|`` (bitwise or), ``&`` (bitwise and), ``~`` (bitwise not), ``==`` (equal to), ``!=`` (not equal to),
16
+ ``<`` (less than), ``<=`` (less than or equal to), ``>`` (greater than), ``>=`` (greater than or equal to),
17
+ ``+`` (addition), ``-`` (subtraction), ``*`` (multiplication), ``/`` (division), ``^`` (power)
18
+ """
19
+
20
+ @abstractmethod
21
+ def __call__(self, df: pd.DataFrame) -> Union[pd.Series, Any]:
22
+ """Evaluate the operation on the DataFrame.
23
+
24
+ Parameters
25
+ ----------
26
+ df : pd.DataFrame
27
+ Input DataFrame on which to apply the operation.
28
+
29
+ Returns
30
+ -------
31
+ Union[pd.Series, Any]
32
+ The resulting pandas Series or scalar value produced by this operation.
33
+ """
34
+ pass
35
+
36
+ def __or__(self, right: "ColOp") -> "ColOp":
37
+ return _OrOp(self, right)
38
+
39
+ def __and__(self, right: "ColOp") -> "ColOp":
40
+ return _AndOp(self, right)
41
+
42
+ def __invert__(self) -> "ColOp":
43
+ return _NotOp(self)
44
+
45
+ def __eq__(self, value: Any) -> "ColOp":
46
+ return _EqOp(self, value)
47
+
48
+ def __ne__(self, value: Any) -> "ColOp":
49
+ return _NotOp(self == value)
50
+
51
+ def __lt__(self, value: Any) -> "ColOp":
52
+ return _LtOp(self, value)
53
+
54
+ def __le__(self, value: Any) -> "ColOp":
55
+ return _LeOp(self, value)
56
+
57
+ def __gt__(self, value: Any) -> "ColOp":
58
+ return _GtOp(self, value)
59
+
60
+ def __ge__(self, value: Any) -> "ColOp":
61
+ return _GeOp(self, value)
62
+
63
+ def __add__(self, right: "ColOp") -> "ColOp":
64
+ return _AddOp(self, right)
65
+
66
+ def __sub__(self, right: "ColOp") -> "ColOp":
67
+ return _SubtractOp(self, right)
68
+
69
+ def __mul__(self, right: "ColOp") -> "ColOp":
70
+ return _MultiplyOp(self, right)
71
+
72
+ def __truediv__(self, right: "ColOp") -> "ColOp":
73
+ return _DivideOp(self, right)
74
+
75
+ def __pow__(self, right: "ColOp") -> "ColOp":
76
+ return _PowOp(self, right)
77
+
78
+ def apply(self, f: Callable[[pd.Series], pd.Series]) -> "ColOp":
79
+ """Apply a transformation function to the result of this operation.
80
+
81
+ Parameters
82
+ ----------
83
+ f : callable
84
+ A function that takes a pandas Series and returns a transformed Series.
85
+
86
+ Returns
87
+ -------
88
+ ColOp
89
+ A new ColOp representing the application of ``f`` to this operation’s output.
90
+ """
91
+ return _ColApplyOp(self, f)
92
+
93
+ def map(self, f: Callable[[Any], Any]) -> "ColOp":
94
+ """Map a function over each element of the Series produced by this operation.
95
+
96
+ Parameters
97
+ ----------
98
+ f : callable
99
+ A function applied element-wise to each value in the Series.
100
+
101
+ Returns
102
+ -------
103
+ ColOp
104
+ A new ColOp representing the mapped operation.
105
+ """
106
+ return _ColMapOp(self, f)
107
+
108
+ def isin(self, values: Iterable[Any]) -> "ColOp":
109
+ """Test whether each element of the Series is in the given values.
110
+
111
+ Parameters
112
+ ----------
113
+ values : iterable
114
+ A collection of values to test membership against.
115
+
116
+ Returns
117
+ -------
118
+ ColOp
119
+ A new ColOp that yields a boolean Series.
120
+ """
121
+ if isinstance(values, ColOp):
122
+ combined_vals = self.to_list() + values.to_list()
123
+ return combined_vals.map(lambda x: x[0] in x[1])
124
+ else:
125
+ return self.apply(lambda s: s.isin(values))
126
+
127
+ def contains(self, value: Any) -> "ColOp":
128
+ """Test whether each element of the Series contains the specified value.
129
+
130
+ Parameters
131
+ ----------
132
+ value : Any
133
+ Value to search for within each element.
134
+
135
+ Returns
136
+ -------
137
+ ColOp
138
+ A new ColOp that yields a boolean Series.
139
+ """
140
+ return self.map(lambda x: value in x)
141
+
142
+ def notna(self) -> "ColOp":
143
+ """Test for non-missing values in the Series.
144
+
145
+ Returns
146
+ -------
147
+ ColOp
148
+ A new ColOp yielding a boolean Series where True indicates non-null values.
149
+ """
150
+ return self.apply(lambda s: s.notna())
151
+
152
+ def isna(self) -> "ColOp":
153
+ """Test for missing values in the Series.
154
+
155
+ Returns
156
+ -------
157
+ ColOp
158
+ A new ColOp yielding a boolean Series where True indicates null values.
159
+ """
160
+ return self.apply(lambda s: s.isna())
161
+
162
+ def astype(self, type_) -> "ColOp":
163
+ """Cast the Series to a specified dtype.
164
+
165
+ Parameters
166
+ ----------
167
+ type_ : type or str
168
+ The target data type for the Series.
169
+
170
+ Returns
171
+ -------
172
+ ColOp
173
+ A new ColOp representing the cast operation.
174
+ """
175
+ return self.apply(lambda s: s.astype(type_))
176
+
177
+ def to_list(self) -> "ColOp":
178
+ """Wrap each element in the Series into a single-element list.
179
+
180
+ Returns
181
+ -------
182
+ ColOp
183
+ A new ColOp that converts each scalar to a list containing that value.
184
+ """
185
+ return self.map(lambda x: [x])
186
+
187
+
188
+ class Index(ColOp):
189
+ """Represent the index of a pandas DataFrame.
190
+
191
+ Parameters
192
+ ----------
193
+ None
194
+
195
+ Examples
196
+ --------
197
+ >>> idx = Index()
198
+ >>> idx(df)
199
+ DatetimeIndex([...])
200
+ """
201
+ def __call__(self, df: pd.DataFrame) -> pd.Index:
202
+ return df.index
203
+
204
+
205
+ class Col(ColOp):
206
+ """Represent a named column in a DataFrame.
207
+
208
+ Parameters
209
+ ----------
210
+ name : str, optional
211
+ Column name to select. If None, selects the entire DataFrame.
212
+
213
+ Examples
214
+ --------
215
+ >>> col = Col('column_name')
216
+ >>> col(df)
217
+ 0 ...
218
+ Name: column_name, dtype: dtype
219
+ """
220
+ def __init__(self, name: Optional[str]):
221
+ self._name = name
222
+
223
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
224
+ if self._name is None:
225
+ return df.iloc[:]
226
+ else:
227
+ return df[self._name]
228
+
229
+
230
+ class Values(Col):
231
+ """Represent the values of a pandas Series.
232
+
233
+ Notes
234
+ -----
235
+ This raises a TypeError if called on a DataFrame instead of a Series.
236
+
237
+ Examples
238
+ --------
239
+ >>> values = Values()
240
+ >>> values(series)
241
+ array([...])
242
+ """
243
+ def __init__(self):
244
+ super().__init__(None)
245
+
246
+ def __call__(self, s: pd.Series) -> pd.Series:
247
+ if isinstance(s, pd.DataFrame):
248
+ raise TypeError("Values can only be called on a Series")
249
+ return s.iloc[:]
250
+
251
+
252
+ class _LiteralOp(ColOp):
253
+ def __init__(self, value: Any) -> None:
254
+ self._value = value
255
+
256
+ def __call__(self, df: pd.DataFrame) -> Any:
257
+ return self._value
258
+
259
+
260
+ class _ComparisonOp(ColOp):
261
+ def __init__(self, col: ColOp, value: Union[ColOp, Any]) -> None:
262
+ self._col = col
263
+ if isinstance(value, ColOp):
264
+ self._value = value
265
+ else:
266
+ self._value = _LiteralOp(value)
267
+
268
+
269
+ class _EqOp(_ComparisonOp):
270
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
271
+ return self._col(df) == self._value(df)
272
+
273
+
274
+ class _LtOp(_ComparisonOp):
275
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
276
+ return self._col(df) < self._value(df)
277
+
278
+
279
+ class _LeOp(_ComparisonOp):
280
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
281
+ return self._col(df) <= self._value(df)
282
+
283
+
284
+ class _GtOp(_ComparisonOp):
285
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
286
+ return self._col(df) > self._value(df)
287
+
288
+
289
+ class _GeOp(_ComparisonOp):
290
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
291
+ return self._col(df) >= self._value(df)
292
+
293
+
294
+ class _BinaryOp(ColOp):
295
+ def __init__(self, left: Union[ColOp, Any], right: Union[ColOp, Any]) -> None:
296
+ if isinstance(left, ColOp):
297
+ self._left = left
298
+ else:
299
+ self._left = _LiteralOp(left)
300
+ if isinstance(right, ColOp):
301
+ self._right = right
302
+ else:
303
+ self._right = _LiteralOp(right)
304
+
305
+
306
+ class _OrOp(_BinaryOp):
307
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
308
+ return self._left(df) | self._right(df)
309
+
310
+
311
+ class _AndOp(_BinaryOp):
312
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
313
+ return self._left(df) & self._right(df)
314
+
315
+
316
+ class _AddOp(_BinaryOp):
317
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
318
+ return self._left(df) + self._right(df)
319
+
320
+
321
+ class _SubtractOp(_BinaryOp):
322
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
323
+ return self._left(df) - self._right(df)
324
+
325
+
326
+ class _MultiplyOp(_BinaryOp):
327
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
328
+ return self._left(df) * self._right(df)
329
+
330
+
331
+ class _DivideOp(_BinaryOp):
332
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
333
+ return self._left(df) / self._right(df)
334
+
335
+
336
+ class _PowOp(_BinaryOp):
337
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
338
+ return self._left(df) ** self._right(df)
339
+
340
+
341
+ class _NotOp(ColOp):
342
+ def __init__(self, col: ColOp) -> None:
343
+ self._col = col
344
+
345
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
346
+ return ~self._col(df)
347
+
348
+
349
+ class _ColApplyOp(ColOp):
350
+ def __init__(self, col: ColOp, f: Callable[[pd.Series], pd.Series]) -> None:
351
+ self._col = col
352
+ self._fun = f
353
+
354
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
355
+ return self._fun(self._col(df))
356
+
357
+
358
+ class _ColMapOp(ColOp):
359
+ def __init__(self, col: ColOp, f: Callable[[Any], Any]) -> None:
360
+ self._col = col
361
+ self._fun = f
362
+
363
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
364
+ return self._col(df).map(self._fun)
@@ -0,0 +1,80 @@
1
+ import pandas as pd
2
+ from IPython.display import display
3
+
4
+
5
+ def set_max_columns(max_columns=None):
6
+ """Set pandas’ display.max_columns option.
7
+
8
+ Temporarily controls how many columns pandas will print when formatting DataFrames.
9
+
10
+ Parameters
11
+ ----------
12
+ max_columns : int or None
13
+ Maximum number of columns to show. If None, pandas will use its own default (usually 20).
14
+ """
15
+ pd.set_option('display.max_columns', max_columns)
16
+
17
+
18
+ def set_max_rows(max_rows=None):
19
+ """Set pandas’ display.max_rows option.
20
+
21
+ Temporarily controls how many rows pandas will print when formatting DataFrames.
22
+
23
+ Parameters
24
+ ----------
25
+ max_rows : int or None
26
+ Maximum number of rows to show. If None, pandas will use its own default (usually 60).
27
+ """
28
+ pd.set_option('display.max_rows', max_rows)
29
+
30
+
31
+ class _PandasDisplayCM:
32
+ def __init__(self, all_columns=False, all_rows=False):
33
+ self._value_store = {}
34
+ self.all_columns = all_columns
35
+ self.all_rows = all_rows
36
+
37
+ def __enter__(self):
38
+ if self.all_columns:
39
+ self._value_store['max_columns'] = pd.get_option('display.max_columns')
40
+ set_max_columns()
41
+ if self.all_rows:
42
+ self._value_store['max_rows'] = pd.get_option('display.max_rows')
43
+ set_max_rows()
44
+
45
+ def __exit__(self, exc_type, exc_val, exc_tb):
46
+ if self.all_columns:
47
+ pd.set_option('display.max_columns', self._value_store['max_columns'])
48
+ if self.all_rows:
49
+ pd.set_option('display.max_rows', self._value_store['max_rows'])
50
+ return False
51
+
52
+
53
+ def show_all(df, mode=None, columns=True, rows=True):
54
+ """Display a DataFrame with all rows and/or columns visible.
55
+
56
+ Uses a context manager to temporarily override pandas’ display.max_columns
57
+ and display.max_rows options, then calls IPython.display.display.
58
+
59
+ Parameters
60
+ ----------
61
+ df : pd.DataFrame
62
+ The DataFrame to render in the notebook.
63
+ mode : str, optional
64
+ One of:
65
+ - 'columns': expand only columns
66
+ - 'rows': expand only rows
67
+ - None (default): use the ``columns`` and ``rows`` flags below
68
+ columns : bool, optional
69
+ When mode is None, if True (default) all columns are shown.
70
+ rows : bool, optional
71
+ When mode is None, if True (default) all rows are shown.
72
+ """
73
+ if mode == 'columns':
74
+ cm = _PandasDisplayCM(all_columns=True)
75
+ elif mode == 'rows':
76
+ cm = _PandasDisplayCM(all_rows=True)
77
+ else:
78
+ cm = _PandasDisplayCM(all_columns=columns, all_rows=rows)
79
+ with cm:
80
+ display(df)