spotforecast2 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spotforecast2/data/fetch_data.py +44 -21
- spotforecast2/processing/n2n_predict.py +12 -3
- spotforecast2/processing/n2n_predict_with_covariates.py +13 -1
- {spotforecast2-0.1.1.dist-info → spotforecast2-0.2.1.dist-info}/METADATA +1 -1
- {spotforecast2-0.1.1.dist-info → spotforecast2-0.2.1.dist-info}/RECORD +6 -6
- {spotforecast2-0.1.1.dist-info → spotforecast2-0.2.1.dist-info}/WHEEL +0 -0
spotforecast2/data/fetch_data.py
CHANGED
|
@@ -44,57 +44,80 @@ def get_data_home(data_home: Optional[Union[str, Path]] = None) -> Path:
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def fetch_data(
|
|
47
|
-
filename: str = "data_in.csv",
|
|
47
|
+
filename: Union[str, pd.DataFrame] = "data_in.csv",
|
|
48
48
|
columns: Optional[list] = None,
|
|
49
49
|
index_col: int = 0,
|
|
50
50
|
parse_dates: bool = True,
|
|
51
51
|
dayfirst: bool = False,
|
|
52
52
|
timezone: str = "UTC",
|
|
53
53
|
) -> pd.DataFrame:
|
|
54
|
-
"""Fetches the integrated raw dataset from a CSV file.
|
|
54
|
+
"""Fetches the integrated raw dataset from a CSV file or processes a DataFrame.
|
|
55
55
|
|
|
56
56
|
Args:
|
|
57
|
-
filename (str):
|
|
58
|
-
Filename of the CSV file containing the dataset
|
|
57
|
+
filename (str or pd.DataFrame):
|
|
58
|
+
Filename of the CSV file containing the dataset (located in data home directory)
|
|
59
|
+
or a pandas DataFrame. If DataFrame is provided, it will be processed with
|
|
60
|
+
proper timezone handling. Default: "data_in.csv".
|
|
59
61
|
columns (list, optional):
|
|
60
62
|
List of columns to be included in the dataset. If None, all columns are included.
|
|
61
63
|
If an empty list is provided, a ValueError is blocked.
|
|
62
64
|
index_col (int):
|
|
63
|
-
Column index to be used as the index.
|
|
65
|
+
Column index to be used as the index (only used when loading from CSV).
|
|
64
66
|
parse_dates (bool):
|
|
65
|
-
Whether to parse dates in the index column.
|
|
67
|
+
Whether to parse dates in the index column (only used when loading from CSV).
|
|
66
68
|
dayfirst (bool):
|
|
67
|
-
Whether the day comes first in date parsing.
|
|
69
|
+
Whether the day comes first in date parsing (only used when loading from CSV).
|
|
68
70
|
timezone (str):
|
|
69
|
-
Timezone to set for the datetime index.
|
|
71
|
+
Timezone to set for the datetime index. If a DataFrame with naive index is provided,
|
|
72
|
+
it will be localized to this timezone then converted to UTC. Default: "UTC".
|
|
70
73
|
|
|
71
74
|
Returns:
|
|
72
|
-
pd.DataFrame: The integrated raw dataset.
|
|
75
|
+
pd.DataFrame: The integrated raw dataset with UTC timezone.
|
|
73
76
|
|
|
74
77
|
Raises:
|
|
75
78
|
ValueError: If columns is an empty list.
|
|
79
|
+
FileNotFoundError: If CSV file does not exist.
|
|
76
80
|
|
|
77
81
|
Examples:
|
|
82
|
+
Load from CSV:
|
|
78
83
|
>>> from spotforecast2.data.fetch_data import fetch_data
|
|
79
84
|
>>> data = fetch_data(columns=["col1", "col2"])
|
|
80
85
|
>>> data.head()
|
|
81
86
|
Header1 Header2 Header3
|
|
87
|
+
|
|
88
|
+
Process a DataFrame:
|
|
89
|
+
>>> import pandas as pd
|
|
90
|
+
>>> df = pd.DataFrame({"value": [1, 2, 3]},
|
|
91
|
+
... index=pd.date_range("2024-01-01", periods=3, freq="h"))
|
|
92
|
+
>>> data = fetch_data(filename=df, timezone="Europe/Berlin")
|
|
93
|
+
>>> data.index.tz
|
|
94
|
+
<UTC>
|
|
82
95
|
"""
|
|
83
96
|
if columns is not None and len(columns) == 0:
|
|
84
97
|
raise ValueError("columns must be specified and cannot be empty.")
|
|
85
98
|
|
|
86
|
-
|
|
87
|
-
if
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
99
|
+
# Check if filename is actually a DataFrame
|
|
100
|
+
if isinstance(filename, pd.DataFrame):
|
|
101
|
+
# Use Data.from_dataframe for proper timezone handling
|
|
102
|
+
dataset = Data.from_dataframe(
|
|
103
|
+
df=filename,
|
|
104
|
+
timezone=timezone,
|
|
105
|
+
columns=columns,
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
# Load from CSV file
|
|
109
|
+
csv_path = get_data_home() / filename
|
|
110
|
+
if not Path(csv_path).is_file():
|
|
111
|
+
raise FileNotFoundError(f"The file {csv_path} does not exist.")
|
|
112
|
+
|
|
113
|
+
dataset = Data.from_csv(
|
|
114
|
+
csv_path=csv_path,
|
|
115
|
+
index_col=index_col,
|
|
116
|
+
parse_dates=parse_dates,
|
|
117
|
+
dayfirst=dayfirst,
|
|
118
|
+
timezone=timezone,
|
|
119
|
+
columns=columns,
|
|
120
|
+
)
|
|
98
121
|
|
|
99
122
|
return dataset.data
|
|
100
123
|
|
|
@@ -210,6 +210,7 @@ def _model_directory_exists(model_dir: Union[str, Path]) -> bool:
|
|
|
210
210
|
|
|
211
211
|
|
|
212
212
|
def n2n_predict(
|
|
213
|
+
data: Optional[pd.DataFrame] = None,
|
|
213
214
|
columns: Optional[List[str]] = None,
|
|
214
215
|
forecast_horizon: int = 24,
|
|
215
216
|
contamination: float = 0.01,
|
|
@@ -233,6 +234,8 @@ def n2n_predict(
|
|
|
233
234
|
Existing models are reused for prediction unless force_train=True.
|
|
234
235
|
|
|
235
236
|
Args:
|
|
237
|
+
data: Optional DataFrame with target time series data. If None, fetches data automatically.
|
|
238
|
+
Default: None.
|
|
236
239
|
columns: List of target columns to forecast. If None, uses all available columns.
|
|
237
240
|
Default: None.
|
|
238
241
|
forecast_horizon: Number of time steps to forecast ahead. Default: 24.
|
|
@@ -311,10 +314,16 @@ def n2n_predict(
|
|
|
311
314
|
|
|
312
315
|
if verbose:
|
|
313
316
|
print("--- Starting n2n_predict ---")
|
|
314
|
-
print("Fetching data...")
|
|
315
317
|
|
|
316
|
-
#
|
|
317
|
-
data
|
|
318
|
+
# Handle data input - fetch_data handles both CSV and DataFrame
|
|
319
|
+
if data is not None:
|
|
320
|
+
if verbose:
|
|
321
|
+
print("Using provided dataframe...")
|
|
322
|
+
data = fetch_data(filename=data, columns=TARGET)
|
|
323
|
+
else:
|
|
324
|
+
if verbose:
|
|
325
|
+
print("Fetching data from CSV...")
|
|
326
|
+
data = fetch_data(columns=TARGET)
|
|
318
327
|
|
|
319
328
|
START, END, COV_START, COV_END = get_start_end(
|
|
320
329
|
data=data,
|
|
@@ -727,6 +727,7 @@ def _model_directory_exists(model_dir: Union[str, Path]) -> bool:
|
|
|
727
727
|
|
|
728
728
|
|
|
729
729
|
def n2n_predict_with_covariates(
|
|
730
|
+
data: Optional[pd.DataFrame] = None,
|
|
730
731
|
forecast_horizon: int = 24,
|
|
731
732
|
contamination: float = 0.01,
|
|
732
733
|
window_size: int = 72,
|
|
@@ -763,6 +764,8 @@ def n2n_predict_with_covariates(
|
|
|
763
764
|
Existing models are reused for prediction unless force_train=True.
|
|
764
765
|
|
|
765
766
|
Args:
|
|
767
|
+
data: Optional DataFrame with target time series data. If None, fetches data automatically.
|
|
768
|
+
Default: None.
|
|
766
769
|
forecast_horizon: Number of time steps to forecast ahead. Default: 24.
|
|
767
770
|
contamination: Contamination parameter for outlier detection. Default: 0.01.
|
|
768
771
|
window_size: Rolling window size for gap detection. Default: 72.
|
|
@@ -865,7 +868,16 @@ def n2n_predict_with_covariates(
|
|
|
865
868
|
if verbose:
|
|
866
869
|
print("\n[1/9] Loading and preparing target data...")
|
|
867
870
|
|
|
868
|
-
data
|
|
871
|
+
# Handle data input - fetch_data handles both CSV and DataFrame
|
|
872
|
+
if data is None:
|
|
873
|
+
if verbose:
|
|
874
|
+
print(" Fetching data from CSV...")
|
|
875
|
+
data = fetch_data(timezone=timezone)
|
|
876
|
+
else:
|
|
877
|
+
if verbose:
|
|
878
|
+
print(" Using provided dataframe...")
|
|
879
|
+
data = fetch_data(filename=data, timezone=timezone)
|
|
880
|
+
|
|
869
881
|
target_columns = data.columns.tolist()
|
|
870
882
|
|
|
871
883
|
if verbose:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
spotforecast2/__init__.py,sha256=X9sBx15iz8yqr9iDJcrGJM5nhvnpaczXto4XV_GtfhE,59
|
|
2
2
|
spotforecast2/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
spotforecast2/data/data.py,sha256=HEgr-FULaqHvuMeKTviOgYyo3GbxpGRTo3ZnmIU9w2Y,4422
|
|
4
|
-
spotforecast2/data/fetch_data.py,sha256=
|
|
4
|
+
spotforecast2/data/fetch_data.py,sha256=whefI8g3y7rQzzhOp3uB6L3VQdvu4p2qc6aWvkKSgD0,8010
|
|
5
5
|
spotforecast2/exceptions.py,sha256=6gOji-3cP-YAisPoxXCcrEEbjTnfPN1YqEhGYhmyZ8Y,20499
|
|
6
6
|
spotforecast2/forecaster/__init__.py,sha256=BbCOS2ouKcPC9VzcdprllVyqlZIyAWXCOvUAiInxDi4,140
|
|
7
7
|
spotforecast2/forecaster/base.py,sha256=rXhcjY4AMpyQhkpbtLIA8OOrGEb8fU57SQiyeR9c9DQ,16748
|
|
@@ -31,8 +31,8 @@ spotforecast2/preprocessing/imputation.py,sha256=lmH-HumI_QLLm9aMESe_oZq84Axn60w
|
|
|
31
31
|
spotforecast2/preprocessing/outlier.py,sha256=jZxAR870QtYner7b4gXk6LLGJw0juLq1VU4CGklYd3c,4208
|
|
32
32
|
spotforecast2/preprocessing/split.py,sha256=mzzt5ltUZdVzfWtBBTQjp8E2MyqVdWUFtz7nN11urbU,5011
|
|
33
33
|
spotforecast2/processing/agg_predict.py,sha256=VKlruB0x-eJKokkHyJxR87rZ4m53si3ODbrd0ibPlow,2378
|
|
34
|
-
spotforecast2/processing/n2n_predict.py,sha256=
|
|
35
|
-
spotforecast2/processing/n2n_predict_with_covariates.py,sha256=
|
|
34
|
+
spotforecast2/processing/n2n_predict.py,sha256=eClK0uccg_pKCnDfzWckFV6TWprTxskaY3whH_vJGs0,14911
|
|
35
|
+
spotforecast2/processing/n2n_predict_with_covariates.py,sha256=xP7V_JB0xF7I5KlAqeR-U0ENwxZNSvSQvsPOLHjZWV8,40913
|
|
36
36
|
spotforecast2/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
spotforecast2/utils/__init__.py,sha256=NrMt_xJLe4rbTFbsbgSQYeREohEOiYG5S-97e6Jj07I,1018
|
|
38
38
|
spotforecast2/utils/convert_to_utc.py,sha256=hz8mJUHK9jDLUiN5LdNX5l3KZuOKlklyycB4zFdB9Ng,1405
|
|
@@ -42,6 +42,6 @@ spotforecast2/utils/generate_holiday.py,sha256=SHaPvPMt-abis95cChHf5ObyPwCTrzJ87
|
|
|
42
42
|
spotforecast2/utils/validation.py,sha256=x9ypQzcneDhWJA_piiY4Q3_ogoGd1LTsZ7__MFeG9Fc,21618
|
|
43
43
|
spotforecast2/weather/__init__.py,sha256=1Jco88pl0deNESgNATin83Nf5i9c58pxN7G-vNiOiu0,120
|
|
44
44
|
spotforecast2/weather/weather_client.py,sha256=Ec_ywug6uoa71MfXM8RNbXEvtBtBzr-SUS5xq_HKtZE,9837
|
|
45
|
-
spotforecast2-0.
|
|
46
|
-
spotforecast2-0.
|
|
47
|
-
spotforecast2-0.
|
|
45
|
+
spotforecast2-0.2.1.dist-info/WHEEL,sha256=5DEXXimM34_d4Gx1AuF9ysMr1_maoEtGKjaILM3s4w4,80
|
|
46
|
+
spotforecast2-0.2.1.dist-info/METADATA,sha256=CZtz7eiMtA8zOF85_sLrApPp085fYOYPsI7btv7UdtY,3481
|
|
47
|
+
spotforecast2-0.2.1.dist-info/RECORD,,
|
|
File without changes
|