sw1pers-l 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sw1pers_l-0.1.0/CHANGES.txt +6 -0
- sw1pers_l-0.1.0/LICENSE.txt +12 -0
- sw1pers_l-0.1.0/MANIFEST.in +8 -0
- sw1pers_l-0.1.0/PKG-INFO +37 -0
- sw1pers_l-0.1.0/README.md +7 -0
- sw1pers_l-0.1.0/pyproject.toml +28 -0
- sw1pers_l-0.1.0/setup.cfg +33 -0
- sw1pers_l-0.1.0/setup.py +3 -0
- sw1pers_l-0.1.0/src/sw1pers_l/__init__.py +5 -0
- sw1pers_l-0.1.0/src/sw1pers_l/core.py +60 -0
- sw1pers_l-0.1.0/src/sw1pers_l/data_processing/__init__.py +7 -0
- sw1pers_l-0.1.0/src/sw1pers_l/data_processing/cubic_spline.py +14 -0
- sw1pers_l-0.1.0/src/sw1pers_l/data_processing/moving_average.py +7 -0
- sw1pers_l-0.1.0/src/sw1pers_l/diagrams/__init__.py +6 -0
- sw1pers_l-0.1.0/src/sw1pers_l/diagrams/make_diagrams.py +15 -0
- sw1pers_l-0.1.0/src/sw1pers_l/parameter_selection/__init__.py +10 -0
- sw1pers_l-0.1.0/src/sw1pers_l/parameter_selection/compute_delay.py +43 -0
- sw1pers_l-0.1.0/src/sw1pers_l/parameter_selection/compute_dim.py +48 -0
- sw1pers_l-0.1.0/src/sw1pers_l/sw1pers_scores/__init__.py +9 -0
- sw1pers_l-0.1.0/src/sw1pers_l/sw1pers_scores/_density.py +5 -0
- sw1pers_l-0.1.0/src/sw1pers_l/sw1pers_scores/max_features.py +19 -0
- sw1pers_l-0.1.0/src/sw1pers_l/sw1pers_scores/plot.py +115 -0
- sw1pers_l-0.1.0/src/sw1pers_l/sw1pers_scores/score.py +14 -0
- sw1pers_l-0.1.0/src/sw1pers_l/time_series/__init__.py +8 -0
- sw1pers_l-0.1.0/src/sw1pers_l/time_series/sliding_windows.py +4 -0
- sw1pers_l-0.1.0/src/sw1pers_l/time_series/sw_embedding.py +5 -0
- sw1pers_l-0.1.0/src/sw1pers_l/visualize/__init__.py +6 -0
- sw1pers_l-0.1.0/src/sw1pers_l/visualize/visualize.py +33 -0
- sw1pers_l-0.1.0/src/sw1pers_l/windows/__init__.py +12 -0
- sw1pers_l-0.1.0/src/sw1pers_l/windows/make_emb_ts.py +26 -0
- sw1pers_l-0.1.0/src/sw1pers_l/windows/make_emb_windows.py +47 -0
- sw1pers_l-0.1.0/src/sw1pers_l/windows/point_cloud_tools.py +28 -0
- sw1pers_l-0.1.0/src/sw1pers_l.egg-info/PKG-INFO +37 -0
- sw1pers_l-0.1.0/src/sw1pers_l.egg-info/SOURCES.txt +37 -0
- sw1pers_l-0.1.0/src/sw1pers_l.egg-info/dependency_links.txt +1 -0
- sw1pers_l-0.1.0/src/sw1pers_l.egg-info/requires.txt +7 -0
- sw1pers_l-0.1.0/src/sw1pers_l.egg-info/top_level.txt +1 -0
- sw1pers_l-0.1.0/tests/test_pipeline.py +16 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Copyright (c) 2026 Miguel Almeida
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "sw1pers-l"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software.
|
|
8
|
+
|
|
9
|
+
The above copyright notice and this permission notice shall be included in all
|
|
10
|
+
copies or substantial portions of the Software.
|
|
11
|
+
|
|
12
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
|
sw1pers_l-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sw1pers_l
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: SW1Pers landscape for time series periodicity analysis
|
|
5
|
+
Author: Miguel Almeida
|
|
6
|
+
Author-email: Miguel Almeida <migpinalm@gmail.com>
|
|
7
|
+
License: Copyright (c) 2026 Miguel Almeida
|
|
8
|
+
|
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
of this software and associated documentation files (the "sw1pers-l"), to deal
|
|
11
|
+
in the Software without restriction, including without limitation the rights
|
|
12
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
copies of the Software.
|
|
14
|
+
|
|
15
|
+
The above copyright notice and this permission notice shall be included in all
|
|
16
|
+
copies or substantial portions of the Software.
|
|
17
|
+
|
|
18
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE.txt
|
|
22
|
+
Requires-Dist: numpy
|
|
23
|
+
Requires-Dist: matplotlib
|
|
24
|
+
Requires-Dist: scipy
|
|
25
|
+
Requires-Dist: scikit-learn
|
|
26
|
+
Requires-Dist: tqdm
|
|
27
|
+
Requires-Dist: ripser
|
|
28
|
+
Requires-Dist: plotly
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
The algorithm SW1PerS yields a scalar periodicity score for univariate time series.
|
|
32
|
+
The aim of this project is to make extend the algorithm in order to correspond an array of periodicity scores to univariate time series:
|
|
33
|
+
The time series is divided into overlapping snippets (sub-time-series), to each of which we apply SW1PerS.
|
|
34
|
+
The size of the snippet and the overlapping size are hyper-parameters (dependent on the data)
|
|
35
|
+
|
|
36
|
+
This is useful to locate periodic behaviour in general time series.
|
|
37
|
+
On the other hand, this is useful to locate aperiodic behaviour in periodc time series.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
The algorithm SW1PerS yields a scalar periodicity score for univariate time series.
|
|
2
|
+
The aim of this project is to make extend the algorithm in order to correspond an array of periodicity scores to univariate time series:
|
|
3
|
+
The time series is divided into overlapping snippets (sub-time-series), to each of which we apply SW1PerS.
|
|
4
|
+
The size of the snippet and the overlapping size are hyper-parameters (dependent on the data)
|
|
5
|
+
|
|
6
|
+
This is useful to locate periodic behaviour in general time series.
|
|
7
|
+
On the other hand, this is useful to locate aperiodic behaviour in periodc time series.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sw1pers_l"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "SW1Pers landscape for time series periodicity analysis"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE.txt" }
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Miguel Almeida", email = "migpinalm@gmail.com"}
|
|
13
|
+
]
|
|
14
|
+
requires-python = ">=3.11"
|
|
15
|
+
|
|
16
|
+
dependencies = [
|
|
17
|
+
"numpy",
|
|
18
|
+
"matplotlib",
|
|
19
|
+
"scipy",
|
|
20
|
+
"scikit-learn",
|
|
21
|
+
"tqdm",
|
|
22
|
+
"ripser",
|
|
23
|
+
"plotly"
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[tool.setuptools.packages.find]
|
|
27
|
+
where = ["src"]
|
|
28
|
+
include = ["sw1pers_l*"]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[metadata]
|
|
2
|
+
name = sw1pers_l
|
|
3
|
+
version = 0.1.0
|
|
4
|
+
description = SW1Pers landscape for time series periodicity analysis
|
|
5
|
+
long_description = file: README.md
|
|
6
|
+
long_description_content_type = text/markdown
|
|
7
|
+
author = Miguel Almeida
|
|
8
|
+
license = MIT
|
|
9
|
+
license_files = LICENSE.txt
|
|
10
|
+
|
|
11
|
+
[options]
|
|
12
|
+
package_dir =
|
|
13
|
+
= src
|
|
14
|
+
packages = find:
|
|
15
|
+
python_requires = >=3.11
|
|
16
|
+
include_package_data = True
|
|
17
|
+
install_requires =
|
|
18
|
+
numpy
|
|
19
|
+
matplotlib
|
|
20
|
+
scipy
|
|
21
|
+
scikit-learn
|
|
22
|
+
tqdm
|
|
23
|
+
ripser
|
|
24
|
+
plotly
|
|
25
|
+
|
|
26
|
+
[options.packages.find]
|
|
27
|
+
where = src
|
|
28
|
+
include = sw1pers_l*
|
|
29
|
+
|
|
30
|
+
[egg_info]
|
|
31
|
+
tag_build =
|
|
32
|
+
tag_date = 0
|
|
33
|
+
|
sw1pers_l-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .time_series import embed
|
|
3
|
+
from .windows import make_embedded_windows, make_embedded_ts
|
|
4
|
+
from .data_processing import moving_avg, make_spline
|
|
5
|
+
from .visualize import attractor
|
|
6
|
+
from .diagrams import make_pers_diagrams, make_pers_diagram
|
|
7
|
+
from .sw1pers_scores import compute_scores, density, plot_score_landscape
|
|
8
|
+
|
|
9
|
+
def SW1PerS(values, rolling_size=1, factor=2):
|
|
10
|
+
t_ma, ma = moving_avg(values, rolling_size)
|
|
11
|
+
|
|
12
|
+
t_fine, finer_spline = make_spline(t_ma, ma, factor*len(values))
|
|
13
|
+
|
|
14
|
+
emb_spline, _, _ = make_embedded_ts(finer_spline)
|
|
15
|
+
|
|
16
|
+
pers_dgm = make_pers_diagram(emb_spline)
|
|
17
|
+
|
|
18
|
+
score = compute_scores([pers_dgm])
|
|
19
|
+
|
|
20
|
+
return score
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
#---------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
def SW1PerS_L(values, rolling_size=1, factor=2, choose_hyper_param=False):
|
|
26
|
+
|
|
27
|
+
t_ma, ma = moving_avg(values, rolling_size)
|
|
28
|
+
|
|
29
|
+
t_fine, finer_spline = make_spline(t_ma, ma, factor*len(values))
|
|
30
|
+
|
|
31
|
+
#-----------------------------------
|
|
32
|
+
if choose_hyper_param:
|
|
33
|
+
size_coeff = int(input("Window size = length of time series / __"))
|
|
34
|
+
stride_coeff = int(input("Window stride = window size / __"))
|
|
35
|
+
size = len(finer_spline)//size_coeff
|
|
36
|
+
stride = size//stride_coeff
|
|
37
|
+
print(f"window_size = len(time_series)/{size_coeff}")
|
|
38
|
+
print(f"window_stride = window_size/{stride_coeff}")
|
|
39
|
+
print()
|
|
40
|
+
else:
|
|
41
|
+
size = len(finer_spline)//8
|
|
42
|
+
stride = size//4
|
|
43
|
+
|
|
44
|
+
emb_windows, _, _ = make_embedded_windows(finer_spline, size, stride)
|
|
45
|
+
|
|
46
|
+
#-----------------------------------
|
|
47
|
+
|
|
48
|
+
pers_dgms = make_pers_diagrams(emb_windows)
|
|
49
|
+
|
|
50
|
+
scores = compute_scores(pers_dgms)
|
|
51
|
+
|
|
52
|
+
#-----------------------------------
|
|
53
|
+
|
|
54
|
+
rolling_size_scores = int(size/stride)
|
|
55
|
+
|
|
56
|
+
score_density = density(scores, rolling_size_scores)
|
|
57
|
+
|
|
58
|
+
plot_score_landscape(scores, score_density, finer_spline, size, stride, None, rolling_size_scores)
|
|
59
|
+
|
|
60
|
+
return scores
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from scipy.interpolate import CubicSpline
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
def make_spline(x, y, instances=2000,):
|
|
5
|
+
factor = round(instances/len(y))
|
|
6
|
+
print(f"Factor = {factor}\n")
|
|
7
|
+
|
|
8
|
+
t = np.arange(len(y) * factor)
|
|
9
|
+
|
|
10
|
+
cs = CubicSpline(x, y)
|
|
11
|
+
|
|
12
|
+
t_fine = np.linspace(x[0], x[-1], len(t))
|
|
13
|
+
finer_spline = cs(t_fine)
|
|
14
|
+
return t_fine, finer_spline
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from tqdm import tqdm
|
|
2
|
+
from ripser import ripser
|
|
3
|
+
|
|
4
|
+
def make_pers_diagram(data, thresh = 1):
|
|
5
|
+
result = ripser(data, maxdim=1, thresh=1, coeff=13) # coeff must not divide window size!
|
|
6
|
+
return result['dgms']
|
|
7
|
+
|
|
8
|
+
def make_pers_diagrams(emb_windows):
|
|
9
|
+
diagrams = []
|
|
10
|
+
print()
|
|
11
|
+
print("Forming persistence diagrams...\n")
|
|
12
|
+
for window in tqdm(emb_windows):
|
|
13
|
+
dgm = make_pers_diagram(window, thresh = 1)
|
|
14
|
+
diagrams.append(dgm)
|
|
15
|
+
return diagrams
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
|
|
2
|
+
# parameter_selection
|
|
3
|
+
|
|
4
|
+
from .compute_delay import compute_optimal_delay, average_mutual_information
|
|
5
|
+
from .compute_dim import compute_optimal_dim, false_nearest_neighbors
|
|
6
|
+
|
|
7
|
+
__all__ = ["average_mutual_information",
|
|
8
|
+
"compute_optimal_delay",
|
|
9
|
+
"false_nearest_neighbors",
|
|
10
|
+
"compute_optimal_dim"]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from sklearn.metrics import mutual_info_score
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
def average_mutual_information(ts, max_lag=100, bins=64, plot=False):
|
|
5
|
+
max_lag = len(ts)//2
|
|
6
|
+
if ts.shape == (ts.shape[0], 1):
|
|
7
|
+
ts = ts.squeeze(1)
|
|
8
|
+
|
|
9
|
+
if np.max(ts) == np.min(ts):
|
|
10
|
+
return np.zeros(max_lag)
|
|
11
|
+
|
|
12
|
+
ts = (ts - np.min(ts)) / (np.max(ts) - np.min(ts)) # normalize
|
|
13
|
+
ami = []
|
|
14
|
+
for lag in range(1, max_lag + 1):
|
|
15
|
+
x = ts[:-lag]
|
|
16
|
+
y = ts[lag:]
|
|
17
|
+
# histogram binning
|
|
18
|
+
c_xy = np.histogram2d(x, y, bins)[0]
|
|
19
|
+
c_xy = c_xy[c_xy.sum(axis=1) > 0][:, c_xy.sum(axis=0) > 0]
|
|
20
|
+
|
|
21
|
+
if c_xy.size == 0:
|
|
22
|
+
ami.append(0.0)
|
|
23
|
+
else:
|
|
24
|
+
ami.append(mutual_info_score(None, None, contingency=c_xy))
|
|
25
|
+
# if plot:
|
|
26
|
+
# plt.plot(range(1, max_lag+1), ami)
|
|
27
|
+
# plt.xlabel("Lag")
|
|
28
|
+
# plt.ylabel("AMI")
|
|
29
|
+
# plt.title("Average Mutual Information")
|
|
30
|
+
# plt.show()
|
|
31
|
+
return ami
|
|
32
|
+
|
|
33
|
+
def compute_optimal_delay(ami, msg_bool=False):
|
|
34
|
+
ami = np.asarray(ami)
|
|
35
|
+
|
|
36
|
+
for i in range(1, len(ami) - 1):
|
|
37
|
+
if ami[i] < ami[i - 1] and ami[i] <= ami[i + 1]:
|
|
38
|
+
if msg_bool:
|
|
39
|
+
print(f"Optimal delay found at τ = {i+1}")
|
|
40
|
+
return i + 1
|
|
41
|
+
if msg_bool:
|
|
42
|
+
print("No local minimum found, using delay = 1")
|
|
43
|
+
return 1
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from scipy.spatial.distance import cdist
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
def false_nearest_neighbors(ts, max_dim, delay, R_thresh=10, A_thresh=2, plot=False):
|
|
5
|
+
if ts.shape == (ts.shape[0], 1):
|
|
6
|
+
ts = ts.squeeze(1)
|
|
7
|
+
# N = len(ts) - max_dim * delay
|
|
8
|
+
fnn_percentages = []
|
|
9
|
+
eps = 1e-10 # small value to avoid divide-by-zero
|
|
10
|
+
|
|
11
|
+
d = 1
|
|
12
|
+
N = len(ts) - (d) * delay
|
|
13
|
+
while d < max_dim+1 and N > len(ts)/3: # ensure N is larger than one third the window size
|
|
14
|
+
embedded = np.array([ts[i:i + d * delay:delay] for i in range(N)])
|
|
15
|
+
dists = cdist(embedded, embedded)
|
|
16
|
+
np.fill_diagonal(dists, np.inf)
|
|
17
|
+
nearest_idx = np.argmin(dists, axis=1)
|
|
18
|
+
next_vals = ts[delay * d:N + delay * d]
|
|
19
|
+
next_vals_nn = ts[nearest_idx + delay * d]
|
|
20
|
+
dist_increase = np.abs(next_vals - next_vals_nn)
|
|
21
|
+
R = np.std(ts)
|
|
22
|
+
denom = dists[np.arange(N), nearest_idx] + eps
|
|
23
|
+
false_neighbors = ((dist_increase / denom) > R_thresh) | ((dist_increase / R) > A_thresh)
|
|
24
|
+
fnn_percentages.append(np.mean(false_neighbors) * 100)
|
|
25
|
+
d += 1
|
|
26
|
+
N = len(ts) - d * delay
|
|
27
|
+
|
|
28
|
+
# if plot:
|
|
29
|
+
# plt.plot(range(1, d+1), fnn_percentages)
|
|
30
|
+
# plt.xlabel("Embedding Dimension")
|
|
31
|
+
# plt.ylabel("False Nearest Neighbors (%)")
|
|
32
|
+
# plt.title("FNN vs Embedding Dimension")
|
|
33
|
+
# plt.show()
|
|
34
|
+
|
|
35
|
+
return fnn_percentages
|
|
36
|
+
|
|
37
|
+
def compute_optimal_dim(fnn, msg_bool = True):
|
|
38
|
+
optimal_dim = 0
|
|
39
|
+
threshhold = 3 # 5% by default
|
|
40
|
+
while optimal_dim==0:
|
|
41
|
+
for i, value in enumerate(fnn):
|
|
42
|
+
if value < threshhold:
|
|
43
|
+
optimal_dim = i + 1
|
|
44
|
+
if msg_bool:
|
|
45
|
+
print(f"Found optimal dimension: {optimal_dim}\n")
|
|
46
|
+
break
|
|
47
|
+
threshhold += 1 # increase threshhold by 1% if no fnn value satisfies condition
|
|
48
|
+
return optimal_dim
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
|
|
2
|
+
# sw1pers_scores
|
|
3
|
+
|
|
4
|
+
from .max_features import select_max_features
|
|
5
|
+
from .score import compute_scores
|
|
6
|
+
from .plot import plot_score_landscape, plot_scores_comparison
|
|
7
|
+
from ._density import density
|
|
8
|
+
|
|
9
|
+
__all__ = ["select_max_features", "compute_scores", "plot_score_landscape", "plot_scores_comparison", "density"]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def select_max_features(diagrams):
|
|
4
|
+
max_feature_arr = []
|
|
5
|
+
for diagram in diagrams:
|
|
6
|
+
if len(diagram[1])==0:
|
|
7
|
+
max_feature_arr.append([0,0])
|
|
8
|
+
else:
|
|
9
|
+
max_feature = []
|
|
10
|
+
max = 0
|
|
11
|
+
for feature in diagram[1]:
|
|
12
|
+
if max < feature[1] - feature[0] and not np.isinf(feature[1]):
|
|
13
|
+
max = feature[1] - feature[0]
|
|
14
|
+
max_feature = [feature[0], feature[1]]
|
|
15
|
+
elif np.isinf(feature[1]) or max > np.sqrt(3):
|
|
16
|
+
max = np.sqrt(3)
|
|
17
|
+
max_feature = [feature[0], np.sqrt(3)]
|
|
18
|
+
max_feature_arr.append(max_feature)
|
|
19
|
+
return max_feature_arr
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import matplotlib.dates as mdates
|
|
5
|
+
from sw1pers_l.sw1pers_scores._density import density
|
|
6
|
+
|
|
7
|
+
def plot_score_landscape(scores, score_density, finer_spline, window_size, window_stride, dates, rolling_size_scores):
|
|
8
|
+
|
|
9
|
+
offset = round(window_size/2)
|
|
10
|
+
scale_factor = np.max(finer_spline)/(np.max(scores))
|
|
11
|
+
fig, ax1 = plt.subplots(figsize=(11, 4))
|
|
12
|
+
x = range(0, len(finer_spline))
|
|
13
|
+
|
|
14
|
+
# Secondary y-axis (scores) ---------------------------------------------------
|
|
15
|
+
ax2 = ax1.twinx()
|
|
16
|
+
bar_y = scores
|
|
17
|
+
bar_x = np.arange(offset, offset + window_stride * len(scores), window_stride)
|
|
18
|
+
bar_width = window_size
|
|
19
|
+
ax2.bar(bar_x, bar_y, width=bar_width, alpha=0.15, label="Periodicity Score", color = "orange")
|
|
20
|
+
ax2.set_ylabel("Periodicity Score", color = "orange")
|
|
21
|
+
ax2.tick_params(axis='y', labelcolor = "orange")
|
|
22
|
+
ax2.set_ylim(0, np.max(bar_y)*1.05)
|
|
23
|
+
|
|
24
|
+
# Primary y-axis (time series) ------------------------------------------------
|
|
25
|
+
ax1.plot(x, finer_spline, label="Time Series Spline", color="C0", alpha=0.4)
|
|
26
|
+
ax1.set_ylabel("Time Series Spline", color="C0")
|
|
27
|
+
ax1.tick_params(axis='y', labelcolor='C0')
|
|
28
|
+
# ax1.set_ylim(np.min(finer_spline), np.max(finer_spline))
|
|
29
|
+
|
|
30
|
+
#Window indexing --------------------------------------------------------------
|
|
31
|
+
secax = ax1.secondary_xaxis("top")
|
|
32
|
+
step = 20
|
|
33
|
+
tick_positions = bar_x[::step]
|
|
34
|
+
tick_labels = [str(i) for i in range(0, len(bar_x), step)]
|
|
35
|
+
secax.set_xticks(tick_positions)
|
|
36
|
+
secax.set_xticklabels(tick_labels, rotation=45)
|
|
37
|
+
secax.set_xlabel("Windows")
|
|
38
|
+
|
|
39
|
+
# Date indexing (and primary time series) ---------------------------------------------------------------
|
|
40
|
+
if dates is not None:
|
|
41
|
+
n_ticks = int(len(finer_spline)/window_size/2)
|
|
42
|
+
tick_positions = np.linspace(0, len(finer_spline)-1, n_ticks, dtype=int)
|
|
43
|
+
tick_dates = pd.date_range(start=dates[0], end=dates[-1], periods=n_ticks)
|
|
44
|
+
ax1.set_xticks(tick_positions)
|
|
45
|
+
ax1.set_xticklabels(tick_dates.strftime("%Y-%m-%d"), rotation=45, ha="right") #%Y-%m-%d [%H:%M]
|
|
46
|
+
ax1.set_xlabel("Date")
|
|
47
|
+
ax1.set_ylabel("Time Series Spline", color="C0")
|
|
48
|
+
ax1.tick_params(axis='y', labelcolor='C0')
|
|
49
|
+
|
|
50
|
+
# Density plot ----------------------------------------------------------------
|
|
51
|
+
ma_x = bar_x[rolling_size_scores//2 : -(rolling_size_scores//2)] # center alignment
|
|
52
|
+
ma_y = score_density[rolling_size_scores//2 : -(rolling_size_scores//2)] #* scale_factor
|
|
53
|
+
ax2.plot(ma_x, ma_y, color="red", linewidth=2, label="Anomaly Density (MA)", alpha=0.6)
|
|
54
|
+
|
|
55
|
+
# Labels: combine from both axes-----------------------------------------------
|
|
56
|
+
lines_1, labels_1 = ax1.get_legend_handles_labels()
|
|
57
|
+
lines_2, labels_2 = ax2.get_legend_handles_labels()
|
|
58
|
+
ax1.legend(lines_1 + lines_2, labels_1 + labels_2, loc="lower right")
|
|
59
|
+
plt.tight_layout()
|
|
60
|
+
plt.grid()
|
|
61
|
+
plt.show()
|
|
62
|
+
|
|
63
|
+
def plot_scores_comparison(scores, secondary_scores, window_size, window_stride, dates, factor=1, score_rolling_size=1, sec_score_rolling_size=1, is_score_windowed = False, show_bars = False, label_msg = "Alternative anomaly score"):
|
|
64
|
+
fig, ax1 = plt.subplots(figsize=(11, 4))
|
|
65
|
+
offset = round(window_size/2)
|
|
66
|
+
|
|
67
|
+
# Window x-axis set up -------------------------
|
|
68
|
+
windows_x = np.arange(offset, offset + window_stride * len(scores), window_stride)
|
|
69
|
+
|
|
70
|
+
# Secondary scores plot ----------------
|
|
71
|
+
sec_score_density = density(secondary_scores, sec_score_rolling_size)
|
|
72
|
+
if is_score_windowed:
|
|
73
|
+
ax1.plot(windows_x, sec_score_density, color="C0", linewidth=1, label="Secondary Scores")
|
|
74
|
+
ax1.set_ylabel(label_msg, color = "C0")
|
|
75
|
+
ax1.tick_params(axis='y', labelcolor = 'C0')
|
|
76
|
+
else:
|
|
77
|
+
x = range(0, len(density(secondary_scores, sec_score_rolling_size)))
|
|
78
|
+
ax1.plot(x, sec_score_density, linewidth=1, label="Secondary Score Landscape", color="C0")
|
|
79
|
+
ax1.set_ylabel(label_msg, color="C0")
|
|
80
|
+
ax1.tick_params(axis='y', labelcolor='C0')
|
|
81
|
+
|
|
82
|
+
ax2 = ax1.twinx()
|
|
83
|
+
|
|
84
|
+
#sw1pers bars plot
|
|
85
|
+
if show_bars:
|
|
86
|
+
windows_y = scores
|
|
87
|
+
bar_width = window_size
|
|
88
|
+
ax2.bar(windows_x, windows_y, width=bar_width, alpha=0.15, label="Periodicity Scores", color = "orange")
|
|
89
|
+
ax2.set_ylabel("Periodicity Scores", color = "orange")
|
|
90
|
+
ax2.tick_params(axis='y', labelcolor = "orange")
|
|
91
|
+
|
|
92
|
+
# sw1pers plot -----------------
|
|
93
|
+
ma_x = windows_x[-int(-score_rolling_size//2) : int(-score_rolling_size//2)] # center alignment
|
|
94
|
+
ma_y = density(scores, score_rolling_size)[-int(-score_rolling_size//2) : int(-score_rolling_size//2)] # math.ceil(score_rolling_size//2)) also works here
|
|
95
|
+
ax2.plot(ma_x, ma_y, color="orange", linewidth=1, label="Periodicity Density")
|
|
96
|
+
ax2.set_ylabel("Periodicity score", color="orange")
|
|
97
|
+
ax2.tick_params(axis='y', labelcolor='orange')
|
|
98
|
+
|
|
99
|
+
#Window indexing -----------------------
|
|
100
|
+
secax = ax2.secondary_xaxis("top")
|
|
101
|
+
step = 20
|
|
102
|
+
tick_positions = windows_x[::step]
|
|
103
|
+
tick_labels = [str(i) for i in range(0, len(windows_x), step)]
|
|
104
|
+
secax.set_xticks(tick_positions)
|
|
105
|
+
secax.set_xticklabels(tick_labels, rotation=45)
|
|
106
|
+
secax.set_xlabel("Windows")
|
|
107
|
+
|
|
108
|
+
# Date indexing (and primary time series) ---------------------------------------------------------------
|
|
109
|
+
if dates is not None:
|
|
110
|
+
n_ticks = int(len(dates)*factor/(window_size))
|
|
111
|
+
tick_positions = np.linspace(0, len(dates)*factor-1, n_ticks, dtype=int)
|
|
112
|
+
tick_dates = pd.date_range(start=dates[0], end=dates[-1], periods=n_ticks)
|
|
113
|
+
ax1.set_xticks(tick_positions)
|
|
114
|
+
ax1.set_xticklabels(tick_dates.strftime("%Y-%m-%d"), rotation=45, ha="right") #%Y-%m-%d [%H:%M]
|
|
115
|
+
ax1.set_xlabel("Date")
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .max_features import select_max_features
|
|
3
|
+
|
|
4
|
+
def compute_scores(diagrams, n=2, m=2): # n >= m
|
|
5
|
+
scores = []
|
|
6
|
+
max_feat = select_max_features(diagrams)
|
|
7
|
+
for feat in max_feat:
|
|
8
|
+
score = 1 - (feat[1]**n - feat[0]**m)/(np.sqrt(3))**n
|
|
9
|
+
if score < 0:
|
|
10
|
+
score = 0
|
|
11
|
+
if score > 1:
|
|
12
|
+
score = 1
|
|
13
|
+
scores.append(score)
|
|
14
|
+
return np.array(scores)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import plotly.graph_objs as go
|
|
3
|
+
|
|
4
|
+
def attractor(data, arr = [0,1,2], msg="Takens Embedding"):
|
|
5
|
+
print(f"Window shape: {data.shape}")
|
|
6
|
+
|
|
7
|
+
if data.shape[1]>2:
|
|
8
|
+
x,y,z = data[:, arr[0]], data[:, arr[1]], data[:, arr[2]]
|
|
9
|
+
else:
|
|
10
|
+
x,y,z = data[:, 0], data[:, 1], np.zeros(len(data), )
|
|
11
|
+
|
|
12
|
+
print("Unique x:", len(np.unique(x)))
|
|
13
|
+
print("Unique y:", len(np.unique(y)))
|
|
14
|
+
print("Unique z:", len(np.unique(z)))
|
|
15
|
+
|
|
16
|
+
fig = go.Figure(data=[go.Scatter3d(
|
|
17
|
+
x=x, y=y, z=z,
|
|
18
|
+
mode='markers',
|
|
19
|
+
marker=dict(
|
|
20
|
+
size=5,
|
|
21
|
+
color=x, # Set color to vary by Y value
|
|
22
|
+
colorscale='Viridis', # Choose a colorscale
|
|
23
|
+
opacity=0.8
|
|
24
|
+
)
|
|
25
|
+
)])
|
|
26
|
+
|
|
27
|
+
fig.update_layout(
|
|
28
|
+
title=msg,
|
|
29
|
+
scene = dict(aspectmode = 'manual')
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
fig.show()
|
|
33
|
+
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
|
|
2
|
+
# windows
|
|
3
|
+
|
|
4
|
+
from .make_emb_windows import make_embedded_windows
|
|
5
|
+
from .make_emb_ts import make_embedded_ts
|
|
6
|
+
from .point_cloud_tools import mean_center, normalize, meanshift_pointcloud
|
|
7
|
+
|
|
8
|
+
__all__ = ["make_embedded_windows",
|
|
9
|
+
"make_embedded_ts",
|
|
10
|
+
"mean_center",
|
|
11
|
+
"normalize",
|
|
12
|
+
"meanshift_pointcloud"]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from tqdm import tqdm
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from sw1pers_l import parameter_selection
|
|
5
|
+
from sw1pers_l import time_series
|
|
6
|
+
|
|
7
|
+
from .point_cloud_tools import mean_center, normalize, meanshift_pointcloud
|
|
8
|
+
|
|
9
|
+
def make_embedded_ts(X):
|
|
10
|
+
|
|
11
|
+
X = X.reshape(len(X), )
|
|
12
|
+
|
|
13
|
+
ami = parameter_selection.average_mutual_information(X, bins=int(np.sqrt(len(X)))+1)
|
|
14
|
+
delay = parameter_selection.compute_optimal_delay(ami, msg_bool=False)
|
|
15
|
+
fnn = parameter_selection.false_nearest_neighbors(X, max_dim=10, delay=delay)
|
|
16
|
+
dim = parameter_selection.compute_optimal_dim(fnn, msg_bool=False)
|
|
17
|
+
|
|
18
|
+
emb_cloud = time_series.embed(X, dim, delay)
|
|
19
|
+
|
|
20
|
+
emb_cloud = mean_center(emb_cloud)
|
|
21
|
+
emb_cloud = normalize(emb_cloud)
|
|
22
|
+
|
|
23
|
+
emb_cloud = meanshift_pointcloud(emb_cloud, np.pi/16) # pi/16 is radius used in Perea's paper
|
|
24
|
+
emb_cloud = normalize(emb_cloud)
|
|
25
|
+
|
|
26
|
+
return emb_cloud, dim, delay
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from tqdm import tqdm
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from sw1pers_l import parameter_selection
|
|
5
|
+
from sw1pers_l import time_series
|
|
6
|
+
|
|
7
|
+
from .point_cloud_tools import mean_center, normalize, meanshift_pointcloud
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def make_embedded_windows(X, window_size, window_stride):
|
|
11
|
+
|
|
12
|
+
X = X.reshape(len(X), )
|
|
13
|
+
|
|
14
|
+
windows = time_series.make_sliding_windows(X, window_size, window_stride)
|
|
15
|
+
|
|
16
|
+
emb_windows=[]
|
|
17
|
+
dim, delay = 1, 1
|
|
18
|
+
input_dimensions = []
|
|
19
|
+
input_delays = []
|
|
20
|
+
|
|
21
|
+
print("Finding best parameters...\n")
|
|
22
|
+
for i, window in enumerate(tqdm(windows)):
|
|
23
|
+
ami = parameter_selection.average_mutual_information(window, bins=int(np.sqrt(len(window)))+1)
|
|
24
|
+
delay = parameter_selection.compute_optimal_delay(ami, msg_bool=False)
|
|
25
|
+
input_delays.append(delay)
|
|
26
|
+
|
|
27
|
+
fnn = parameter_selection.false_nearest_neighbors(window, max_dim=10, delay=delay)
|
|
28
|
+
dim = parameter_selection.compute_optimal_dim(fnn, msg_bool=False)
|
|
29
|
+
input_dimensions.append(dim)
|
|
30
|
+
|
|
31
|
+
print()
|
|
32
|
+
print("Forming point clouds...\n")
|
|
33
|
+
for i, window in enumerate(tqdm(windows)):
|
|
34
|
+
input_dim = input_dimensions[i] #max(dim, 3) # make this (dim + 1) to not squish some cycles trivial
|
|
35
|
+
input_delay = input_delays[i]
|
|
36
|
+
|
|
37
|
+
emb_window = time_series.embed(window, input_dim, delay)
|
|
38
|
+
|
|
39
|
+
emb_window = mean_center(emb_window)
|
|
40
|
+
emb_window = normalize(emb_window)
|
|
41
|
+
|
|
42
|
+
emb_window = meanshift_pointcloud(emb_window, np.pi/16) # pi/16 is radius used in Perea's paper
|
|
43
|
+
emb_window = normalize(emb_window) # normalize again because meanshift changes this
|
|
44
|
+
|
|
45
|
+
emb_windows.append(emb_window)
|
|
46
|
+
|
|
47
|
+
return emb_windows, input_dimensions, input_delays
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def mean_center(X):
|
|
4
|
+
return X - np.mean(X, axis = 0)
|
|
5
|
+
|
|
6
|
+
def normalize(X):
|
|
7
|
+
return X/np.linalg.norm(X, axis = 1, keepdims=True)
|
|
8
|
+
|
|
9
|
+
# def mean_center_normalize(X):
|
|
10
|
+
# return normalize(mean_center(X))
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
from sklearn.metrics.pairwise import pairwise_distances
|
|
14
|
+
|
|
15
|
+
def meanshift_pointcloud(SW_cloud, epsilon, metric='cosine'):
|
|
16
|
+
|
|
17
|
+
D = pairwise_distances(SW_cloud, metric=metric)
|
|
18
|
+
n_points, dim = SW_cloud.shape
|
|
19
|
+
|
|
20
|
+
threshhold = 1 - np.cos(epsilon)
|
|
21
|
+
|
|
22
|
+
cloud_denoised = np.zeros_like(SW_cloud)
|
|
23
|
+
|
|
24
|
+
for k in range(n_points):
|
|
25
|
+
mask = D[k] <= threshhold
|
|
26
|
+
cloud_denoised[k] = SW_cloud[mask].mean(axis=0)
|
|
27
|
+
|
|
28
|
+
return cloud_denoised
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sw1pers_l
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: SW1Pers landscape for time series periodicity analysis
|
|
5
|
+
Author: Miguel Almeida
|
|
6
|
+
Author-email: Miguel Almeida <migpinalm@gmail.com>
|
|
7
|
+
License: Copyright (c) 2026 Miguel Almeida
|
|
8
|
+
|
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
of this software and associated documentation files (the "sw1pers-l"), to deal
|
|
11
|
+
in the Software without restriction, including without limitation the rights
|
|
12
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
copies of the Software.
|
|
14
|
+
|
|
15
|
+
The above copyright notice and this permission notice shall be included in all
|
|
16
|
+
copies or substantial portions of the Software.
|
|
17
|
+
|
|
18
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE.txt
|
|
22
|
+
Requires-Dist: numpy
|
|
23
|
+
Requires-Dist: matplotlib
|
|
24
|
+
Requires-Dist: scipy
|
|
25
|
+
Requires-Dist: scikit-learn
|
|
26
|
+
Requires-Dist: tqdm
|
|
27
|
+
Requires-Dist: ripser
|
|
28
|
+
Requires-Dist: plotly
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
The algorithm SW1PerS yields a scalar periodicity score for univariate time series.
|
|
32
|
+
The aim of this project is to make extend the algorithm in order to correspond an array of periodicity scores to univariate time series:
|
|
33
|
+
The time series is divided into overlapping snippets (sub-time-series), to each of which we apply SW1PerS.
|
|
34
|
+
The size of the snippet and the overlapping size are hyper-parameters (dependent on the data)
|
|
35
|
+
|
|
36
|
+
This is useful to locate periodic behaviour in general time series.
|
|
37
|
+
On the other hand, this is useful to locate aperiodic behaviour in periodc time series.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
CHANGES.txt
|
|
2
|
+
LICENSE.txt
|
|
3
|
+
MANIFEST.in
|
|
4
|
+
README.md
|
|
5
|
+
pyproject.toml
|
|
6
|
+
setup.cfg
|
|
7
|
+
setup.py
|
|
8
|
+
src/sw1pers_l/__init__.py
|
|
9
|
+
src/sw1pers_l/core.py
|
|
10
|
+
src/sw1pers_l.egg-info/PKG-INFO
|
|
11
|
+
src/sw1pers_l.egg-info/SOURCES.txt
|
|
12
|
+
src/sw1pers_l.egg-info/dependency_links.txt
|
|
13
|
+
src/sw1pers_l.egg-info/requires.txt
|
|
14
|
+
src/sw1pers_l.egg-info/top_level.txt
|
|
15
|
+
src/sw1pers_l/data_processing/__init__.py
|
|
16
|
+
src/sw1pers_l/data_processing/cubic_spline.py
|
|
17
|
+
src/sw1pers_l/data_processing/moving_average.py
|
|
18
|
+
src/sw1pers_l/diagrams/__init__.py
|
|
19
|
+
src/sw1pers_l/diagrams/make_diagrams.py
|
|
20
|
+
src/sw1pers_l/parameter_selection/__init__.py
|
|
21
|
+
src/sw1pers_l/parameter_selection/compute_delay.py
|
|
22
|
+
src/sw1pers_l/parameter_selection/compute_dim.py
|
|
23
|
+
src/sw1pers_l/sw1pers_scores/__init__.py
|
|
24
|
+
src/sw1pers_l/sw1pers_scores/_density.py
|
|
25
|
+
src/sw1pers_l/sw1pers_scores/max_features.py
|
|
26
|
+
src/sw1pers_l/sw1pers_scores/plot.py
|
|
27
|
+
src/sw1pers_l/sw1pers_scores/score.py
|
|
28
|
+
src/sw1pers_l/time_series/__init__.py
|
|
29
|
+
src/sw1pers_l/time_series/sliding_windows.py
|
|
30
|
+
src/sw1pers_l/time_series/sw_embedding.py
|
|
31
|
+
src/sw1pers_l/visualize/__init__.py
|
|
32
|
+
src/sw1pers_l/visualize/visualize.py
|
|
33
|
+
src/sw1pers_l/windows/__init__.py
|
|
34
|
+
src/sw1pers_l/windows/make_emb_ts.py
|
|
35
|
+
src/sw1pers_l/windows/make_emb_windows.py
|
|
36
|
+
src/sw1pers_l/windows/point_cloud_tools.py
|
|
37
|
+
tests/test_pipeline.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sw1pers_l
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
|
|
2
|
+
# run this test: go to root directory -> python tests/test_pipeline.py
|
|
3
|
+
|
|
4
|
+
#------------------------------------------------
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
|
|
9
|
+
from sw1pers_l.core import SW1PerS, SW1PerS_L
|
|
10
|
+
|
|
11
|
+
#------------------------------------------------
|
|
12
|
+
|
|
13
|
+
x = np.arange(0, 10*2*np.pi, 0.1)
|
|
14
|
+
test_ts = np.cos(x)
|
|
15
|
+
|
|
16
|
+
scores = SW1PerS_L(test_ts)
|