superleaf 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- superleaf-0.2.0/LICENSE +21 -0
- superleaf-0.2.0/PKG-INFO +27 -0
- superleaf-0.2.0/README.md +81 -0
- superleaf-0.2.0/pyproject.toml +32 -0
- superleaf-0.2.0/setup.cfg +4 -0
- superleaf-0.2.0/src/superleaf/__init__.py +0 -0
- superleaf-0.2.0/src/superleaf/collections/__init__.py +2 -0
- superleaf-0.2.0/src/superleaf/collections/ordered_set.py +79 -0
- superleaf-0.2.0/src/superleaf/collections/summable_dict.py +71 -0
- superleaf-0.2.0/src/superleaf/dataframe/__init__.py +4 -0
- superleaf-0.2.0/src/superleaf/dataframe/column_ops.py +364 -0
- superleaf-0.2.0/src/superleaf/dataframe/display.py +80 -0
- superleaf-0.2.0/src/superleaf/dataframe/selection.py +155 -0
- superleaf-0.2.0/src/superleaf/dataframe/standardize.py +91 -0
- superleaf-0.2.0/src/superleaf/dataframe/transform.py +199 -0
- superleaf-0.2.0/src/superleaf/operators/__init__.py +5 -0
- superleaf-0.2.0/src/superleaf/operators/base.py +95 -0
- superleaf-0.2.0/src/superleaf/operators/comparison.py +92 -0
- superleaf-0.2.0/src/superleaf/operators/getters.py +23 -0
- superleaf-0.2.0/src/superleaf/operators/string.py +9 -0
- superleaf-0.2.0/src/superleaf/operators/wrappers.py +22 -0
- superleaf-0.2.0/src/superleaf/plotting/__init__.py +0 -0
- superleaf-0.2.0/src/superleaf/plotting/intervals.py +188 -0
- superleaf-0.2.0/src/superleaf/sequences/__init__.py +30 -0
- superleaf-0.2.0/src/superleaf/sequences/serial.py +40 -0
- superleaf-0.2.0/src/superleaf/stats/__init__.py +0 -0
- superleaf-0.2.0/src/superleaf/stats/circular.py +36 -0
- superleaf-0.2.0/src/superleaf/stats/count.py +78 -0
- superleaf-0.2.0/src/superleaf/timeseries/__init__.py +0 -0
- superleaf-0.2.0/src/superleaf/timeseries/datetime_utils.py +248 -0
- superleaf-0.2.0/src/superleaf/timeseries/time_intervals.py +273 -0
- superleaf-0.2.0/src/superleaf/utils/__init__.py +0 -0
- superleaf-0.2.0/src/superleaf/utils/hashing.py +79 -0
- superleaf-0.2.0/src/superleaf/utils/ipython.py +10 -0
- superleaf-0.2.0/src/superleaf/utils/parallel.py +447 -0
- superleaf-0.2.0/src/superleaf.egg-info/PKG-INFO +27 -0
- superleaf-0.2.0/src/superleaf.egg-info/SOURCES.txt +38 -0
- superleaf-0.2.0/src/superleaf.egg-info/dependency_links.txt +1 -0
- superleaf-0.2.0/src/superleaf.egg-info/requires.txt +20 -0
- superleaf-0.2.0/src/superleaf.egg-info/top_level.txt +1 -0
superleaf-0.2.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Erik Schomburg
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
superleaf-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: superleaf
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: A library for intuitive and readable data manipulation, using functional and pipeable syntax.
|
|
5
|
+
Author-email: Erik Schomburg <eschomburg@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/eschombu/superleaf
|
|
7
|
+
Project-URL: Issues, https://github.com/eschombu/superleaf/issues
|
|
8
|
+
Requires-Python: >=3.8
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: matplotlib
|
|
11
|
+
Requires-Dist: numpy
|
|
12
|
+
Requires-Dist: pandas
|
|
13
|
+
Requires-Dist: pendulum>=3.0.0
|
|
14
|
+
Requires-Dist: scipy
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: flake8; extra == "dev"
|
|
17
|
+
Requires-Dist: pydata-sphinx-theme; extra == "dev"
|
|
18
|
+
Requires-Dist: pydoclint; extra == "dev"
|
|
19
|
+
Requires-Dist: pytest; extra == "dev"
|
|
20
|
+
Requires-Dist: pytest-mock; extra == "dev"
|
|
21
|
+
Provides-Extra: notebook
|
|
22
|
+
Requires-Dist: jupyter; extra == "notebook"
|
|
23
|
+
Provides-Extra: parallel
|
|
24
|
+
Requires-Dist: multiprocess; extra == "parallel"
|
|
25
|
+
Requires-Dist: pyarrow; extra == "parallel"
|
|
26
|
+
Requires-Dist: tqdm; extra == "parallel"
|
|
27
|
+
Dynamic: license-file
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# `superleaf`
|
|
2
|
+
|
|
3
|
+
<h3> <em>NOTE:</em> This package is still under development, it needs more documentation, and some components not related
|
|
4
|
+
to its intended core uses may be removed.</h3>
|
|
5
|
+
|
|
6
|
+
## A library for intuitive and readable data filtering and manipulation, using functional and pipeable syntax.
|
|
7
|
+
|
|
8
|
+
When trying to select and filter `pandas` dataframes in somewhat complicated ways, the syntax can quickly get
|
|
9
|
+
cumbersome. This library provides utilities for quickly and intuitively specifying ways to select and filter based on
|
|
10
|
+
column and row conditions. It also includes utilities for piping and composing value accessor and manipulation operators
|
|
11
|
+
on more general datatypes.
|
|
12
|
+
|
|
13
|
+
## Operators
|
|
14
|
+
|
|
15
|
+
Some of the operators in this library have analogues in the Python standard library `operator` module, but the ones here
|
|
16
|
+
can be more flexibly composed and piped.
|
|
17
|
+
|
|
18
|
+
For example, let's say you want to take the logarithm of all of the values inside one of the fields of a sequence of data
|
|
19
|
+
containers. You can achieve this with the following code:
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
import numpy as np
|
|
23
|
+
from superleaf.operators import operator
|
|
24
|
+
from superlead.getters import attr_getter
|
|
25
|
+
|
|
26
|
+
exp_field_op = attr_getter("field_a") >> operator(np.log) # "right shift" operator used for piping
|
|
27
|
+
results_iter = map(exp_field_op, data_containers)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
This produces an iterator with the same results as the following list comprehension:
|
|
31
|
+
```
|
|
32
|
+
results_list = [np.log(datum.field_a) for datum in data_containers]
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## DataFrame column operators and selection/filtering
|
|
36
|
+
|
|
37
|
+
Row filtering, especially for complicated combinations of conditions, of pandas dataframes can have cumbersome and
|
|
38
|
+
seemingly repetitive syntax. The `dfilter` and `Col` utilities can be used to more succinctly achieve complicated
|
|
39
|
+
filtering and selection of portions of dataframes.
|
|
40
|
+
|
|
41
|
+
For example, consider the following dataframe:
|
|
42
|
+
```
|
|
43
|
+
import pandas as pd
|
|
44
|
+
|
|
45
|
+
df = pd.DataFrame({
|
|
46
|
+
"col1": [ 0, 1, 0, 1, 1, 1, 0],
|
|
47
|
+
"col2": [ -5.1, 2.2, 0.2, 1.7, -1.1, np.nan, 0.9],
|
|
48
|
+
"col3": [ "elephant", "giraffe", "Elephant", "Zebra", "Emu", "lion", "lion"],
|
|
49
|
+
})
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Let's say we want to select the rows where col1 == 1, col2 is NaN or negative, and col3 begins with the letter 'e':
|
|
53
|
+
```
|
|
54
|
+
sub_df = df[(df["col1"] == 1) & (df["col2"].isna() | (df["col2"] < 0)) & df["col3"].map(lambda s: s.lower().startswith('e'))]
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Using `superleaf`, we could do this with the following:
|
|
58
|
+
```
|
|
59
|
+
from superleaf.dataframe.selection import dfilter
|
|
60
|
+
from superleaf.operator import ComparisonFunctions as F, str_op
|
|
61
|
+
|
|
62
|
+
sub_df = dfilter(df, col1=1, col2=(F.isna | F.lt(0)), col3=(str_op("lower") >> F.startswith("e")))
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
This would fail if the values in col3 might not be strings. In this case, a fallback value can be set:
|
|
66
|
+
```
|
|
67
|
+
from superleaf.operator import with_fallback
|
|
68
|
+
|
|
69
|
+
sub_df = dfilter(df, col1=1, col2=(F.isna | F.lt(0)), col3=with_fallback(str_op("lower") >> F.startswith("e"), False))
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Utilities
|
|
73
|
+
|
|
74
|
+
### Parallelization
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
### Hash strings
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
## Data structures and collections
|
|
81
|
+
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "superleaf"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
authors = [
|
|
9
|
+
{name = "Erik Schomburg", email = "eschomburg@gmail.com"},
|
|
10
|
+
]
|
|
11
|
+
description = "A library for intuitive and readable data manipulation, using functional and pipeable syntax."
|
|
12
|
+
requires-python = ">=3.8"
|
|
13
|
+
dependencies = [
|
|
14
|
+
"matplotlib",
|
|
15
|
+
"numpy",
|
|
16
|
+
"pandas",
|
|
17
|
+
"pendulum>=3.0.0",
|
|
18
|
+
"scipy",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.optional-dependencies]
|
|
22
|
+
dev = ["flake8", "pydata-sphinx-theme", "pydoclint", "pytest", "pytest-mock"]
|
|
23
|
+
notebook = ["jupyter"]
|
|
24
|
+
parallel = ["multiprocess", "pyarrow", "tqdm"]
|
|
25
|
+
|
|
26
|
+
[tool.pytest.ini_options]
|
|
27
|
+
pythonpath = ["src"]
|
|
28
|
+
testpaths = ["tests"]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/eschombu/superleaf"
|
|
32
|
+
Issues = "https://github.com/eschombu/superleaf/issues"
|
|
File without changes
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
from typing import Generic, Iterable, Iterator, Self, TypeVar
|
|
3
|
+
|
|
4
|
+
T = TypeVar('T')
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OrderedSet(Generic[T]):
|
|
8
|
+
"""Similar interface to the native set class, but with item order maintained, and expanded functionality, including
|
|
9
|
+
addition and summation. Implemented by storing the set items as keys in an internal dict."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, items: Iterable[T] = None):
|
|
12
|
+
self._dict: dict[T, None] = dict(zip(items, itertools.repeat(None))) if items is not None else {}
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def _items(self) -> list[T]:
|
|
16
|
+
return list(self._dict.keys())
|
|
17
|
+
|
|
18
|
+
def __iter__(self) -> Iterator[T]:
|
|
19
|
+
return iter(self._items)
|
|
20
|
+
|
|
21
|
+
def copy(self) -> Self:
|
|
22
|
+
return self.__class__(self._items)
|
|
23
|
+
|
|
24
|
+
def union(self, other: Iterable[T]) -> Self:
|
|
25
|
+
return self.__class__(itertools.chain(self, other))
|
|
26
|
+
|
|
27
|
+
def add(self, item: T) -> Self:
|
|
28
|
+
self._dict[item] = None
|
|
29
|
+
|
|
30
|
+
def intersection(self, other: Iterable[T]) -> Self:
|
|
31
|
+
return self.__class__(filter(lambda x: x in other, self))
|
|
32
|
+
|
|
33
|
+
def __add__(self, other: Iterable[T]) -> Self:
|
|
34
|
+
return self.__class__(self.union(other))
|
|
35
|
+
|
|
36
|
+
def __radd__(self, other: Iterable[T]) -> Self:
|
|
37
|
+
if other == 0:
|
|
38
|
+
return self
|
|
39
|
+
else:
|
|
40
|
+
if not isinstance(other, type(self)):
|
|
41
|
+
other = type(self)(other)
|
|
42
|
+
return other + self
|
|
43
|
+
|
|
44
|
+
def __iadd__(self, other: Iterable[T]) -> Self:
|
|
45
|
+
if not isinstance(other, self.__class__):
|
|
46
|
+
other = self.__class__(other)
|
|
47
|
+
self._dict.update(other._dict)
|
|
48
|
+
return self
|
|
49
|
+
|
|
50
|
+
def __sub__(self, other: Iterable[T]) -> Self:
|
|
51
|
+
return self.__class__(filter(lambda x: x not in other, self))
|
|
52
|
+
|
|
53
|
+
def __isub__(self, other: Iterable[T]) -> Self:
|
|
54
|
+
if not isinstance(other, self.__class__):
|
|
55
|
+
other = self.__class__(other)
|
|
56
|
+
for item in other:
|
|
57
|
+
if item in self:
|
|
58
|
+
self._dict.pop(item)
|
|
59
|
+
return self
|
|
60
|
+
|
|
61
|
+
def __contains__(self, item: T) -> bool:
|
|
62
|
+
return item in self._dict
|
|
63
|
+
|
|
64
|
+
def __eq__(self, other: Self | set[T]) -> Self:
|
|
65
|
+
if isinstance(other, set):
|
|
66
|
+
return set(self._items) == other
|
|
67
|
+
elif isinstance(other, OrderedSet):
|
|
68
|
+
return self._dict == other._dict
|
|
69
|
+
else:
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
def __repr__(self) -> str:
|
|
73
|
+
return "{" + ", ".join([item.__repr__() for item in self._items]) + "}"
|
|
74
|
+
|
|
75
|
+
def __len__(self) -> int:
|
|
76
|
+
return len(self._dict)
|
|
77
|
+
|
|
78
|
+
def __getitem__(self, item: int) -> T:
|
|
79
|
+
return self._items[item]
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from typing import Self
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class SummableDict(dict):
|
|
5
|
+
"""A dictionary-like object that supports addition and subtraction of values."""
|
|
6
|
+
|
|
7
|
+
def __add__(self, other) -> Self:
|
|
8
|
+
summed = self.copy()
|
|
9
|
+
if isinstance(other, dict):
|
|
10
|
+
for k, v in other.items():
|
|
11
|
+
if k in summed:
|
|
12
|
+
summed[k] = summed[k] + v
|
|
13
|
+
else:
|
|
14
|
+
summed[k] = v
|
|
15
|
+
else:
|
|
16
|
+
for k, v in self.items():
|
|
17
|
+
summed[k] = summed[k] + other
|
|
18
|
+
return summed
|
|
19
|
+
|
|
20
|
+
def __iadd__(self, other) -> Self:
|
|
21
|
+
if isinstance(other, dict):
|
|
22
|
+
for k, v in other.items():
|
|
23
|
+
if k in self:
|
|
24
|
+
self[k] = self[k] + v
|
|
25
|
+
else:
|
|
26
|
+
self[k] = v
|
|
27
|
+
else:
|
|
28
|
+
for k, v in self.items():
|
|
29
|
+
self[k] = self[k] + other
|
|
30
|
+
return self
|
|
31
|
+
|
|
32
|
+
def __radd__(self, other) -> Self:
|
|
33
|
+
if other == 0:
|
|
34
|
+
return self.copy()
|
|
35
|
+
elif isinstance(other, dict):
|
|
36
|
+
return SummableDict(other) + self
|
|
37
|
+
else:
|
|
38
|
+
summed = self.copy()
|
|
39
|
+
for k, v in self.items():
|
|
40
|
+
summed[k] = other + v
|
|
41
|
+
return summed
|
|
42
|
+
|
|
43
|
+
def __neg__(self) -> Self:
|
|
44
|
+
return SummableDict({k: -v for k, v in self.items()})
|
|
45
|
+
|
|
46
|
+
def __sub__(self, other) -> Self:
|
|
47
|
+
if isinstance(other, dict):
|
|
48
|
+
return self + -SummableDict(other)
|
|
49
|
+
else:
|
|
50
|
+
return self + -other
|
|
51
|
+
|
|
52
|
+
def __isub__(self, other) -> Self:
|
|
53
|
+
if isinstance(other, dict):
|
|
54
|
+
for k, v in other.items():
|
|
55
|
+
if k in self:
|
|
56
|
+
self[k] = self[k] - v
|
|
57
|
+
else:
|
|
58
|
+
self[k] = -v
|
|
59
|
+
else:
|
|
60
|
+
for k, v in self.items():
|
|
61
|
+
self[k] = self[k] - other
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
def __rsub__(self, other):
|
|
65
|
+
if other == 0:
|
|
66
|
+
return self.copy()
|
|
67
|
+
else:
|
|
68
|
+
return other + -self
|
|
69
|
+
|
|
70
|
+
def copy(self):
|
|
71
|
+
return SummableDict(super().copy())
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
from abc import ABCMeta, abstractmethod
|
|
2
|
+
from typing import Any, Callable, Iterable, Optional, Union
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ColOp(metaclass=ABCMeta):
|
|
8
|
+
"""Abstract base class for column operations on pandas DataFrames.
|
|
9
|
+
|
|
10
|
+
Subclasses implement transformations or evaluations that produce pandas Series or scalar
|
|
11
|
+
results when applied to DataFrames. Supports chaining and combining using logical and
|
|
12
|
+
arithmetic operators.
|
|
13
|
+
|
|
14
|
+
Operators defined on this class:
|
|
15
|
+
``|`` (bitwise or), ``&`` (bitwise and), ``~`` (bitwise not), ``==`` (equal to), ``!=`` (not equal to),
|
|
16
|
+
``<`` (less than), ``<=`` (less than or equal to), ``>`` (greater than), ``>=`` (greater than or equal to),
|
|
17
|
+
``+`` (addition), ``-`` (subtraction), ``*`` (multiplication), ``/`` (division), ``^`` (power)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def __call__(self, df: pd.DataFrame) -> Union[pd.Series, Any]:
|
|
22
|
+
"""Evaluate the operation on the DataFrame.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
df : pd.DataFrame
|
|
27
|
+
Input DataFrame on which to apply the operation.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
Union[pd.Series, Any]
|
|
32
|
+
The resulting pandas Series or scalar value produced by this operation.
|
|
33
|
+
"""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
def __or__(self, right: "ColOp") -> "ColOp":
|
|
37
|
+
return _OrOp(self, right)
|
|
38
|
+
|
|
39
|
+
def __and__(self, right: "ColOp") -> "ColOp":
|
|
40
|
+
return _AndOp(self, right)
|
|
41
|
+
|
|
42
|
+
def __invert__(self) -> "ColOp":
|
|
43
|
+
return _NotOp(self)
|
|
44
|
+
|
|
45
|
+
def __eq__(self, value: Any) -> "ColOp":
|
|
46
|
+
return _EqOp(self, value)
|
|
47
|
+
|
|
48
|
+
def __ne__(self, value: Any) -> "ColOp":
|
|
49
|
+
return _NotOp(self == value)
|
|
50
|
+
|
|
51
|
+
def __lt__(self, value: Any) -> "ColOp":
|
|
52
|
+
return _LtOp(self, value)
|
|
53
|
+
|
|
54
|
+
def __le__(self, value: Any) -> "ColOp":
|
|
55
|
+
return _LeOp(self, value)
|
|
56
|
+
|
|
57
|
+
def __gt__(self, value: Any) -> "ColOp":
|
|
58
|
+
return _GtOp(self, value)
|
|
59
|
+
|
|
60
|
+
def __ge__(self, value: Any) -> "ColOp":
|
|
61
|
+
return _GeOp(self, value)
|
|
62
|
+
|
|
63
|
+
def __add__(self, right: "ColOp") -> "ColOp":
|
|
64
|
+
return _AddOp(self, right)
|
|
65
|
+
|
|
66
|
+
def __sub__(self, right: "ColOp") -> "ColOp":
|
|
67
|
+
return _SubtractOp(self, right)
|
|
68
|
+
|
|
69
|
+
def __mul__(self, right: "ColOp") -> "ColOp":
|
|
70
|
+
return _MultiplyOp(self, right)
|
|
71
|
+
|
|
72
|
+
def __truediv__(self, right: "ColOp") -> "ColOp":
|
|
73
|
+
return _DivideOp(self, right)
|
|
74
|
+
|
|
75
|
+
def __pow__(self, right: "ColOp") -> "ColOp":
|
|
76
|
+
return _PowOp(self, right)
|
|
77
|
+
|
|
78
|
+
def apply(self, f: Callable[[pd.Series], pd.Series]) -> "ColOp":
|
|
79
|
+
"""Apply a transformation function to the result of this operation.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
f : callable
|
|
84
|
+
A function that takes a pandas Series and returns a transformed Series.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
ColOp
|
|
89
|
+
A new ColOp representing the application of ``f`` to this operation’s output.
|
|
90
|
+
"""
|
|
91
|
+
return _ColApplyOp(self, f)
|
|
92
|
+
|
|
93
|
+
def map(self, f: Callable[[Any], Any]) -> "ColOp":
|
|
94
|
+
"""Map a function over each element of the Series produced by this operation.
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
f : callable
|
|
99
|
+
A function applied element-wise to each value in the Series.
|
|
100
|
+
|
|
101
|
+
Returns
|
|
102
|
+
-------
|
|
103
|
+
ColOp
|
|
104
|
+
A new ColOp representing the mapped operation.
|
|
105
|
+
"""
|
|
106
|
+
return _ColMapOp(self, f)
|
|
107
|
+
|
|
108
|
+
def isin(self, values: Iterable[Any]) -> "ColOp":
|
|
109
|
+
"""Test whether each element of the Series is in the given values.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
values : iterable
|
|
114
|
+
A collection of values to test membership against.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
ColOp
|
|
119
|
+
A new ColOp that yields a boolean Series.
|
|
120
|
+
"""
|
|
121
|
+
if isinstance(values, ColOp):
|
|
122
|
+
combined_vals = self.to_list() + values.to_list()
|
|
123
|
+
return combined_vals.map(lambda x: x[0] in x[1])
|
|
124
|
+
else:
|
|
125
|
+
return self.apply(lambda s: s.isin(values))
|
|
126
|
+
|
|
127
|
+
def contains(self, value: Any) -> "ColOp":
|
|
128
|
+
"""Test whether each element of the Series contains the specified value.
|
|
129
|
+
|
|
130
|
+
Parameters
|
|
131
|
+
----------
|
|
132
|
+
value : Any
|
|
133
|
+
Value to search for within each element.
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
ColOp
|
|
138
|
+
A new ColOp that yields a boolean Series.
|
|
139
|
+
"""
|
|
140
|
+
return self.map(lambda x: value in x)
|
|
141
|
+
|
|
142
|
+
def notna(self) -> "ColOp":
|
|
143
|
+
"""Test for non-missing values in the Series.
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
ColOp
|
|
148
|
+
A new ColOp yielding a boolean Series where True indicates non-null values.
|
|
149
|
+
"""
|
|
150
|
+
return self.apply(lambda s: s.notna())
|
|
151
|
+
|
|
152
|
+
def isna(self) -> "ColOp":
|
|
153
|
+
"""Test for missing values in the Series.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
ColOp
|
|
158
|
+
A new ColOp yielding a boolean Series where True indicates null values.
|
|
159
|
+
"""
|
|
160
|
+
return self.apply(lambda s: s.isna())
|
|
161
|
+
|
|
162
|
+
def astype(self, type_) -> "ColOp":
|
|
163
|
+
"""Cast the Series to a specified dtype.
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
type_ : type or str
|
|
168
|
+
The target data type for the Series.
|
|
169
|
+
|
|
170
|
+
Returns
|
|
171
|
+
-------
|
|
172
|
+
ColOp
|
|
173
|
+
A new ColOp representing the cast operation.
|
|
174
|
+
"""
|
|
175
|
+
return self.apply(lambda s: s.astype(type_))
|
|
176
|
+
|
|
177
|
+
def to_list(self) -> "ColOp":
|
|
178
|
+
"""Wrap each element in the Series into a single-element list.
|
|
179
|
+
|
|
180
|
+
Returns
|
|
181
|
+
-------
|
|
182
|
+
ColOp
|
|
183
|
+
A new ColOp that converts each scalar to a list containing that value.
|
|
184
|
+
"""
|
|
185
|
+
return self.map(lambda x: [x])
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class Index(ColOp):
|
|
189
|
+
"""Represent the index of a pandas DataFrame.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
None
|
|
194
|
+
|
|
195
|
+
Examples
|
|
196
|
+
--------
|
|
197
|
+
>>> idx = Index()
|
|
198
|
+
>>> idx(df)
|
|
199
|
+
DatetimeIndex([...])
|
|
200
|
+
"""
|
|
201
|
+
def __call__(self, df: pd.DataFrame) -> pd.Index:
|
|
202
|
+
return df.index
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class Col(ColOp):
|
|
206
|
+
"""Represent a named column in a DataFrame.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
name : str, optional
|
|
211
|
+
Column name to select. If None, selects the entire DataFrame.
|
|
212
|
+
|
|
213
|
+
Examples
|
|
214
|
+
--------
|
|
215
|
+
>>> col = Col('column_name')
|
|
216
|
+
>>> col(df)
|
|
217
|
+
0 ...
|
|
218
|
+
Name: column_name, dtype: dtype
|
|
219
|
+
"""
|
|
220
|
+
def __init__(self, name: Optional[str]):
|
|
221
|
+
self._name = name
|
|
222
|
+
|
|
223
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
224
|
+
if self._name is None:
|
|
225
|
+
return df.iloc[:]
|
|
226
|
+
else:
|
|
227
|
+
return df[self._name]
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class Values(Col):
|
|
231
|
+
"""Represent the values of a pandas Series.
|
|
232
|
+
|
|
233
|
+
Notes
|
|
234
|
+
-----
|
|
235
|
+
This raises a TypeError if called on a DataFrame instead of a Series.
|
|
236
|
+
|
|
237
|
+
Examples
|
|
238
|
+
--------
|
|
239
|
+
>>> values = Values()
|
|
240
|
+
>>> values(series)
|
|
241
|
+
array([...])
|
|
242
|
+
"""
|
|
243
|
+
def __init__(self):
|
|
244
|
+
super().__init__(None)
|
|
245
|
+
|
|
246
|
+
def __call__(self, s: pd.Series) -> pd.Series:
|
|
247
|
+
if isinstance(s, pd.DataFrame):
|
|
248
|
+
raise TypeError("Values can only be called on a Series")
|
|
249
|
+
return s.iloc[:]
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class _LiteralOp(ColOp):
|
|
253
|
+
def __init__(self, value: Any) -> None:
|
|
254
|
+
self._value = value
|
|
255
|
+
|
|
256
|
+
def __call__(self, df: pd.DataFrame) -> Any:
|
|
257
|
+
return self._value
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class _ComparisonOp(ColOp):
|
|
261
|
+
def __init__(self, col: ColOp, value: Union[ColOp, Any]) -> None:
|
|
262
|
+
self._col = col
|
|
263
|
+
if isinstance(value, ColOp):
|
|
264
|
+
self._value = value
|
|
265
|
+
else:
|
|
266
|
+
self._value = _LiteralOp(value)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class _EqOp(_ComparisonOp):
|
|
270
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
271
|
+
return self._col(df) == self._value(df)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class _LtOp(_ComparisonOp):
|
|
275
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
276
|
+
return self._col(df) < self._value(df)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
class _LeOp(_ComparisonOp):
|
|
280
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
281
|
+
return self._col(df) <= self._value(df)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class _GtOp(_ComparisonOp):
|
|
285
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
286
|
+
return self._col(df) > self._value(df)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class _GeOp(_ComparisonOp):
|
|
290
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
291
|
+
return self._col(df) >= self._value(df)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
class _BinaryOp(ColOp):
|
|
295
|
+
def __init__(self, left: Union[ColOp, Any], right: Union[ColOp, Any]) -> None:
|
|
296
|
+
if isinstance(left, ColOp):
|
|
297
|
+
self._left = left
|
|
298
|
+
else:
|
|
299
|
+
self._left = _LiteralOp(left)
|
|
300
|
+
if isinstance(right, ColOp):
|
|
301
|
+
self._right = right
|
|
302
|
+
else:
|
|
303
|
+
self._right = _LiteralOp(right)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class _OrOp(_BinaryOp):
|
|
307
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
308
|
+
return self._left(df) | self._right(df)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class _AndOp(_BinaryOp):
|
|
312
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
313
|
+
return self._left(df) & self._right(df)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
class _AddOp(_BinaryOp):
|
|
317
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
318
|
+
return self._left(df) + self._right(df)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
class _SubtractOp(_BinaryOp):
|
|
322
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
323
|
+
return self._left(df) - self._right(df)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
class _MultiplyOp(_BinaryOp):
|
|
327
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
328
|
+
return self._left(df) * self._right(df)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class _DivideOp(_BinaryOp):
|
|
332
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
333
|
+
return self._left(df) / self._right(df)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class _PowOp(_BinaryOp):
|
|
337
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
338
|
+
return self._left(df) ** self._right(df)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
class _NotOp(ColOp):
|
|
342
|
+
def __init__(self, col: ColOp) -> None:
|
|
343
|
+
self._col = col
|
|
344
|
+
|
|
345
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
346
|
+
return ~self._col(df)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
class _ColApplyOp(ColOp):
|
|
350
|
+
def __init__(self, col: ColOp, f: Callable[[pd.Series], pd.Series]) -> None:
|
|
351
|
+
self._col = col
|
|
352
|
+
self._fun = f
|
|
353
|
+
|
|
354
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
355
|
+
return self._fun(self._col(df))
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
class _ColMapOp(ColOp):
|
|
359
|
+
def __init__(self, col: ColOp, f: Callable[[Any], Any]) -> None:
|
|
360
|
+
self._col = col
|
|
361
|
+
self._fun = f
|
|
362
|
+
|
|
363
|
+
def __call__(self, df: pd.DataFrame) -> pd.Series:
|
|
364
|
+
return self._col(df).map(self._fun)
|