spotforecast2 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,57 +44,80 @@ def get_data_home(data_home: Optional[Union[str, Path]] = None) -> Path:
44
44
 
45
45
 
46
46
  def fetch_data(
47
- filename: str = "data_in.csv",
47
+ filename: Union[str, pd.DataFrame] = "data_in.csv",
48
48
  columns: Optional[list] = None,
49
49
  index_col: int = 0,
50
50
  parse_dates: bool = True,
51
51
  dayfirst: bool = False,
52
52
  timezone: str = "UTC",
53
53
  ) -> pd.DataFrame:
54
- """Fetches the integrated raw dataset from a CSV file.
54
+ """Fetches the integrated raw dataset from a CSV file or processes a DataFrame.
55
55
 
56
56
  Args:
57
- filename (str):
58
- Filename of the CSV file containing the dataset. It must be located in the data home directory, which can be get or set using `get_data_home()`.
57
+ filename (str or pd.DataFrame):
58
+ Filename of the CSV file containing the dataset (located in data home directory)
59
+ or a pandas DataFrame. If DataFrame is provided, it will be processed with
60
+ proper timezone handling. Default: "data_in.csv".
59
61
  columns (list, optional):
60
62
  List of columns to be included in the dataset. If None, all columns are included.
61
63
  If an empty list is provided, a ValueError is blocked.
62
64
  index_col (int):
63
- Column index to be used as the index.
65
+ Column index to be used as the index (only used when loading from CSV).
64
66
  parse_dates (bool):
65
- Whether to parse dates in the index column.
67
+ Whether to parse dates in the index column (only used when loading from CSV).
66
68
  dayfirst (bool):
67
- Whether the day comes first in date parsing.
69
+ Whether the day comes first in date parsing (only used when loading from CSV).
68
70
  timezone (str):
69
- Timezone to set for the datetime index.
71
+ Timezone to set for the datetime index. If a DataFrame with naive index is provided,
72
+ it will be localized to this timezone then converted to UTC. Default: "UTC".
70
73
 
71
74
  Returns:
72
- pd.DataFrame: The integrated raw dataset.
75
+ pd.DataFrame: The integrated raw dataset with UTC timezone.
73
76
 
74
77
  Raises:
75
78
  ValueError: If columns is an empty list.
79
+ FileNotFoundError: If CSV file does not exist.
76
80
 
77
81
  Examples:
82
+ Load from CSV:
78
83
  >>> from spotforecast2.data.fetch_data import fetch_data
79
84
  >>> data = fetch_data(columns=["col1", "col2"])
80
85
  >>> data.head()
81
86
  Header1 Header2 Header3
87
+
88
+ Process a DataFrame:
89
+ >>> import pandas as pd
90
+ >>> df = pd.DataFrame({"value": [1, 2, 3]},
91
+ ... index=pd.date_range("2024-01-01", periods=3, freq="h"))
92
+ >>> data = fetch_data(filename=df, timezone="Europe/Berlin")
93
+ >>> data.index.tz
94
+ <UTC>
82
95
  """
83
96
  if columns is not None and len(columns) == 0:
84
97
  raise ValueError("columns must be specified and cannot be empty.")
85
98
 
86
- csv_path = get_data_home() / filename
87
- if not Path(csv_path).is_file():
88
- raise FileNotFoundError(f"The file {csv_path} does not exist.")
89
-
90
- dataset = Data.from_csv(
91
- csv_path=csv_path,
92
- index_col=index_col,
93
- parse_dates=parse_dates,
94
- dayfirst=dayfirst,
95
- timezone=timezone,
96
- columns=columns,
97
- )
99
+ # Check if filename is actually a DataFrame
100
+ if isinstance(filename, pd.DataFrame):
101
+ # Use Data.from_dataframe for proper timezone handling
102
+ dataset = Data.from_dataframe(
103
+ df=filename,
104
+ timezone=timezone,
105
+ columns=columns,
106
+ )
107
+ else:
108
+ # Load from CSV file
109
+ csv_path = get_data_home() / filename
110
+ if not Path(csv_path).is_file():
111
+ raise FileNotFoundError(f"The file {csv_path} does not exist.")
112
+
113
+ dataset = Data.from_csv(
114
+ csv_path=csv_path,
115
+ index_col=index_col,
116
+ parse_dates=parse_dates,
117
+ dayfirst=dayfirst,
118
+ timezone=timezone,
119
+ columns=columns,
120
+ )
98
121
 
99
122
  return dataset.data
100
123
 
@@ -210,6 +210,7 @@ def _model_directory_exists(model_dir: Union[str, Path]) -> bool:
210
210
 
211
211
 
212
212
  def n2n_predict(
213
+ data: Optional[pd.DataFrame] = None,
213
214
  columns: Optional[List[str]] = None,
214
215
  forecast_horizon: int = 24,
215
216
  contamination: float = 0.01,
@@ -233,6 +234,8 @@ def n2n_predict(
233
234
  Existing models are reused for prediction unless force_train=True.
234
235
 
235
236
  Args:
237
+ data: Optional DataFrame with target time series data. If None, fetches data automatically.
238
+ Default: None.
236
239
  columns: List of target columns to forecast. If None, uses all available columns.
237
240
  Default: None.
238
241
  forecast_horizon: Number of time steps to forecast ahead. Default: 24.
@@ -311,10 +314,16 @@ def n2n_predict(
311
314
 
312
315
  if verbose:
313
316
  print("--- Starting n2n_predict ---")
314
- print("Fetching data...")
315
317
 
316
- # Fetch data
317
- data = fetch_data(columns=TARGET)
318
+ # Handle data input - fetch_data handles both CSV and DataFrame
319
+ if data is not None:
320
+ if verbose:
321
+ print("Using provided dataframe...")
322
+ data = fetch_data(filename=data, columns=TARGET)
323
+ else:
324
+ if verbose:
325
+ print("Fetching data from CSV...")
326
+ data = fetch_data(columns=TARGET)
318
327
 
319
328
  START, END, COV_START, COV_END = get_start_end(
320
329
  data=data,
@@ -727,6 +727,7 @@ def _model_directory_exists(model_dir: Union[str, Path]) -> bool:
727
727
 
728
728
 
729
729
  def n2n_predict_with_covariates(
730
+ data: Optional[pd.DataFrame] = None,
730
731
  forecast_horizon: int = 24,
731
732
  contamination: float = 0.01,
732
733
  window_size: int = 72,
@@ -763,6 +764,8 @@ def n2n_predict_with_covariates(
763
764
  Existing models are reused for prediction unless force_train=True.
764
765
 
765
766
  Args:
767
+ data: Optional DataFrame with target time series data. If None, fetches data automatically.
768
+ Default: None.
766
769
  forecast_horizon: Number of time steps to forecast ahead. Default: 24.
767
770
  contamination: Contamination parameter for outlier detection. Default: 0.01.
768
771
  window_size: Rolling window size for gap detection. Default: 72.
@@ -865,7 +868,16 @@ def n2n_predict_with_covariates(
865
868
  if verbose:
866
869
  print("\n[1/9] Loading and preparing target data...")
867
870
 
868
- data = fetch_data()
871
+ # Handle data input - fetch_data handles both CSV and DataFrame
872
+ if data is None:
873
+ if verbose:
874
+ print(" Fetching data from CSV...")
875
+ data = fetch_data(timezone=timezone)
876
+ else:
877
+ if verbose:
878
+ print(" Using provided dataframe...")
879
+ data = fetch_data(filename=data, timezone=timezone)
880
+
869
881
  target_columns = data.columns.tolist()
870
882
 
871
883
  if verbose:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: spotforecast2
3
- Version: 0.1.1
3
+ Version: 0.2.1
4
4
  Summary: Forecasting with spot
5
5
  Author: bartzbeielstein
6
6
  Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
@@ -1,7 +1,7 @@
1
1
  spotforecast2/__init__.py,sha256=X9sBx15iz8yqr9iDJcrGJM5nhvnpaczXto4XV_GtfhE,59
2
2
  spotforecast2/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  spotforecast2/data/data.py,sha256=HEgr-FULaqHvuMeKTviOgYyo3GbxpGRTo3ZnmIU9w2Y,4422
4
- spotforecast2/data/fetch_data.py,sha256=LcHowE6tnjKPNMTCGr8h29ioGHT4xmj6l6iZmZkJdLU,6842
4
+ spotforecast2/data/fetch_data.py,sha256=whefI8g3y7rQzzhOp3uB6L3VQdvu4p2qc6aWvkKSgD0,8010
5
5
  spotforecast2/exceptions.py,sha256=6gOji-3cP-YAisPoxXCcrEEbjTnfPN1YqEhGYhmyZ8Y,20499
6
6
  spotforecast2/forecaster/__init__.py,sha256=BbCOS2ouKcPC9VzcdprllVyqlZIyAWXCOvUAiInxDi4,140
7
7
  spotforecast2/forecaster/base.py,sha256=rXhcjY4AMpyQhkpbtLIA8OOrGEb8fU57SQiyeR9c9DQ,16748
@@ -31,8 +31,8 @@ spotforecast2/preprocessing/imputation.py,sha256=lmH-HumI_QLLm9aMESe_oZq84Axn60w
31
31
  spotforecast2/preprocessing/outlier.py,sha256=jZxAR870QtYner7b4gXk6LLGJw0juLq1VU4CGklYd3c,4208
32
32
  spotforecast2/preprocessing/split.py,sha256=mzzt5ltUZdVzfWtBBTQjp8E2MyqVdWUFtz7nN11urbU,5011
33
33
  spotforecast2/processing/agg_predict.py,sha256=VKlruB0x-eJKokkHyJxR87rZ4m53si3ODbrd0ibPlow,2378
34
- spotforecast2/processing/n2n_predict.py,sha256=dAj5yXD2JGXSqtl0VDkq0O_8FO_K9BCYG6osbJbWDFg,14494
35
- spotforecast2/processing/n2n_predict_with_covariates.py,sha256=5a1lYIQE1d-t4ZvSQDoW87G705eiIZxtrCn4w7U2bVw,40420
34
+ spotforecast2/processing/n2n_predict.py,sha256=eClK0uccg_pKCnDfzWckFV6TWprTxskaY3whH_vJGs0,14911
35
+ spotforecast2/processing/n2n_predict_with_covariates.py,sha256=xP7V_JB0xF7I5KlAqeR-U0ENwxZNSvSQvsPOLHjZWV8,40913
36
36
  spotforecast2/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  spotforecast2/utils/__init__.py,sha256=NrMt_xJLe4rbTFbsbgSQYeREohEOiYG5S-97e6Jj07I,1018
38
38
  spotforecast2/utils/convert_to_utc.py,sha256=hz8mJUHK9jDLUiN5LdNX5l3KZuOKlklyycB4zFdB9Ng,1405
@@ -42,6 +42,6 @@ spotforecast2/utils/generate_holiday.py,sha256=SHaPvPMt-abis95cChHf5ObyPwCTrzJ87
42
42
  spotforecast2/utils/validation.py,sha256=x9ypQzcneDhWJA_piiY4Q3_ogoGd1LTsZ7__MFeG9Fc,21618
43
43
  spotforecast2/weather/__init__.py,sha256=1Jco88pl0deNESgNATin83Nf5i9c58pxN7G-vNiOiu0,120
44
44
  spotforecast2/weather/weather_client.py,sha256=Ec_ywug6uoa71MfXM8RNbXEvtBtBzr-SUS5xq_HKtZE,9837
45
- spotforecast2-0.1.1.dist-info/WHEEL,sha256=5DEXXimM34_d4Gx1AuF9ysMr1_maoEtGKjaILM3s4w4,80
46
- spotforecast2-0.1.1.dist-info/METADATA,sha256=TMwW-WMXSoNRVw7oDLU3Ys_8JXhODgvXxXjSeokWaXs,3481
47
- spotforecast2-0.1.1.dist-info/RECORD,,
45
+ spotforecast2-0.2.1.dist-info/WHEEL,sha256=5DEXXimM34_d4Gx1AuF9ysMr1_maoEtGKjaILM3s4w4,80
46
+ spotforecast2-0.2.1.dist-info/METADATA,sha256=CZtz7eiMtA8zOF85_sLrApPp085fYOYPsI7btv7UdtY,3481
47
+ spotforecast2-0.2.1.dist-info/RECORD,,