timesat-cli 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of timesat-cli might be problematic. Click here for more details.

@@ -0,0 +1,18 @@
1
+ """TIMESAT Runner package.
2
+
3
+ This package provides a Python interface and CLI wrapper for running TIMESAT
4
+ processing pipelines.
5
+
6
+ Authors:
7
+ Zhanzhang Cai (Lund University)
8
+ Lars Eklundh (Lund University)
9
+ Per Jönsson (Malmö University)
10
+
11
+ Email:
12
+ zhanzhang.cai@nateko.lu.se
13
+ """
14
+
15
+ __version__ = "0.1.0"
16
+ __all__ = ["run"]
17
+
18
+ from .processing import run
@@ -0,0 +1,12 @@
1
+ # src/timesat_cli/__main__.py
2
+ import argparse
3
+ from .processing import run
4
+
5
+ def main():
6
+ parser = argparse.ArgumentParser(description="Run TIMESAT processing pipeline.")
7
+ parser.add_argument("settings_json", help="Path to the JSON configuration file.")
8
+ args = parser.parse_args()
9
+ run(args.settings_json)
10
+
11
+ if __name__ == "__main__":
12
+ main()
timesat_cli/config.py ADDED
@@ -0,0 +1,160 @@
1
+ from __future__ import annotations
2
+ import json
3
+ from dataclasses import dataclass
4
+ from typing import List, Sequence, Tuple
5
+ import numpy as np
6
+
7
+
8
+ @dataclass
9
+ class ClassParams:
10
+ landuse: int
11
+ p_fitmethod: int
12
+ p_smooth: float
13
+ p_nenvi: int
14
+ p_wfactnum: float
15
+ p_startmethod: int
16
+ p_startcutoff: Tuple[float, float]
17
+ p_low_percentile: float
18
+ p_fillbase: int
19
+ p_seasonmethod: int
20
+ p_seapar: float
21
+
22
+
23
+ @dataclass
24
+ class Settings:
25
+ image_file_list: str
26
+ quality_file_list: str
27
+ tv_list: str
28
+ lc_file: str
29
+ outputfolder: str
30
+ imwindow: Sequence[int]
31
+
32
+ p_band_id: int
33
+ p_ignoreday: int
34
+ p_ylu: np.ndarray
35
+ p_a: List[List[float]]
36
+ p_st_timestep: int
37
+ p_nodata: float
38
+ p_davailwin: int
39
+ p_outlier: int
40
+ p_printflag: int
41
+ max_memory_gb: float
42
+ para_check: int
43
+ ray_dir: str
44
+ scale: float
45
+ offset: float
46
+ p_hrvppformat: int
47
+ p_nclasses: int
48
+ classes: List[ClassParams]
49
+
50
+
51
+ @dataclass
52
+ class Config:
53
+ settings: Settings
54
+
55
+
56
+ def _as_array(value, dtype=float, fortran=False):
57
+ arr = np.array(value, dtype=dtype)
58
+ if fortran:
59
+ arr = np.asfortranarray(arr)
60
+ return arr
61
+
62
+
63
+ def load_config(jsfile: str) -> Config:
64
+ with open(jsfile, "r") as f:
65
+ data = json.load(f)
66
+
67
+ s = data["settings"]
68
+ nclasses = int(s["p_nclasses"]["value"])
69
+
70
+ classes: List[ClassParams] = []
71
+ for i in range(nclasses):
72
+ k = f"class{i+1}"
73
+ c = data[k]
74
+ classes.append(
75
+ ClassParams(
76
+ landuse=int(c["landuse"]["value"]),
77
+ p_fitmethod=int(c["p_fitmethod"]["value"]),
78
+ p_smooth=float(_as_array(c["p_smooth"]["value"], dtype="double")),
79
+ p_nenvi=int(c["p_nenvi"]["value"]),
80
+ p_wfactnum=float(_as_array(c["p_wfactnum"]["value"], dtype="double")),
81
+ p_startmethod=int(c["p_startmethod"]["value"]),
82
+ p_startcutoff=tuple(_as_array(c["p_startcutoff"]["value"], dtype="double", fortran=True)),
83
+ p_low_percentile=float(_as_array(c["p_low_percentile"]["value"], dtype="double")),
84
+ p_fillbase=int(c["p_fillbase"]["value"]),
85
+ p_seasonmethod=int(c["p_seasonmethod"]["value"]),
86
+ p_seapar=float(_as_array(c["p_seapar"]["value"], dtype="double")),
87
+ )
88
+ )
89
+
90
+ settings = Settings(
91
+ image_file_list=s["image_file_list"]["value"],
92
+ quality_file_list=s["quality_file_list"]["value"],
93
+ tv_list=s["tv_list"]["value"],
94
+ lc_file=s["lc_file"]["value"],
95
+ outputfolder=s["outputfolder"]["value"],
96
+ imwindow=s["imwindow"]["value"],
97
+ p_band_id=int(s["p_band_id"]["value"]),
98
+ p_ignoreday=int(s["p_ignoreday"]["value"]),
99
+ p_ylu=_as_array(s["p_ylu"]["value"], dtype="double", fortran=True),
100
+ p_a=s["p_a"]["value"],
101
+ p_st_timestep=int(s["p_st_timestep"]["value"]),
102
+ p_nodata=float(s["p_nodata"]["value"]),
103
+ p_davailwin=int(s["p_davailwin"]["value"]),
104
+ p_outlier=int(s["p_outlier"]["value"]),
105
+ p_printflag=int(s["p_printflag"]["value"]),
106
+ max_memory_gb=float(s["max_memory_gb"]["value"]),
107
+ para_check=int(s["para_check"]["value"]),
108
+ ray_dir=s["ray_dir"]["value"],
109
+ scale=float(s["scale"]["value"]),
110
+ offset=float(s["offset"]["value"]),
111
+ p_hrvppformat=int(s["p_hrvppformat"]["value"]),
112
+ p_nclasses=nclasses,
113
+ classes=classes,
114
+ )
115
+
116
+ return Config(settings=settings)
117
+
118
+
119
+ def build_param_array(
120
+ s,
121
+ attr: str,
122
+ dtype,
123
+ size: int = 255,
124
+ shape: Tuple[int, ...] | None = None,
125
+ fortran_2d: bool = False
126
+ ):
127
+ """
128
+ Build a parameter array for TIMESAT class settings.
129
+
130
+ Parameters
131
+ ----------
132
+ s : object
133
+ Settings container with `classes` iterable.
134
+ attr : str
135
+ Attribute on each class object in `s.classes` (e.g., 'p_smooth').
136
+ dtype : numpy dtype or dtype string (e.g., 'uint8', 'double').
137
+ size : int
138
+ Length of the first dimension (TIMESAT expects 255).
139
+ shape : tuple[int, ...] | None
140
+ Extra trailing shape for per-class vectors (e.g., (2,) for p_startcutoff).
141
+ fortran_2d : bool
142
+ If True and `shape==(2,)`, allocate (size,2) with order='F' to mirror legacy layout.
143
+
144
+ Returns
145
+ -------
146
+ np.ndarray
147
+ Filled parameter array.
148
+ """
149
+ if shape is None:
150
+ arr = np.zeros(size, dtype=dtype)
151
+ for i, c in enumerate(s.classes):
152
+ arr[i] = getattr(c, attr)
153
+ return arr
154
+
155
+ full_shape = (size, *shape)
156
+ order = 'F' if fortran_2d and len(shape) == 1 and shape[0] > 1 else 'C'
157
+ arr = np.zeros(full_shape, dtype=dtype, order=order)
158
+ for i, c in enumerate(s.classes):
159
+ arr[i, ...] = getattr(c, attr)
160
+ return arr
@@ -0,0 +1,93 @@
1
+ # csvutils.py
2
+ from __future__ import annotations
3
+ from typing import Tuple, Iterable, Optional
4
+ import numpy as np
5
+ import pandas as pd
6
+ import datetime as dt
7
+
8
+ __all__ = ["read_timeseries_csv", "write_timesat_csv_outputs"]
9
+
10
+ def _parse_time_column(col: Iterable[str | int]) -> np.ndarray:
11
+ """
12
+ Accepts YYYYDOY (e.g., 2020123) or YYYYMMDD (e.g., 20200123) or ISO 'YYYY-MM-DD'.
13
+ Returns uint32 vector in YYYYDOY.
14
+ """
15
+ out = []
16
+ for v in col:
17
+ s = str(v)
18
+ if len(s) == 7: # YYYYDOY
19
+ # will raise if invalid
20
+ dt.datetime.strptime(s, "%Y%j")
21
+ out.append(int(s))
22
+ elif len(s) == 8 and s.isdigit(): # YYYYMMDD
23
+ d = dt.datetime.strptime(s, "%Y%m%d")
24
+ out.append(int(f"{d.year}{d.timetuple().tm_yday:03d}"))
25
+ else: # try ISO
26
+ try:
27
+ d = dt.datetime.strptime(s, "%Y-%m-%d")
28
+ out.append(int(f"{d.year}{d.timetuple().tm_yday:03d}"))
29
+ except Exception as e:
30
+ raise ValueError(f"Unrecognized date format: {s}") from e
31
+ return np.array(out, dtype="uint32")
32
+
33
+ def read_timeseries_csv(path: str) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
34
+ """
35
+ Read a per-site (single pixel) time series CSV.
36
+
37
+ Expected columns:
38
+ - 'time' : YYYYDOY or YYYYMMDD or YYYY-MM-DD
39
+ - 'vi' : vegetation index values (float)
40
+ - 'qa' : optional; quality or weights (float/int). If missing, set to 1.
41
+ - 'lc' : optional; land cover code (int). If missing, set to 1.
42
+
43
+ Returns:
44
+ vi : array shaped (1, 1, T)
45
+ qa : array shaped (1, 1, T)
46
+ timevector : 1-D uint32 YYYYDOY of length T
47
+ """
48
+ df = pd.read_csv(path)
49
+ if "time" not in df or "vi" not in df:
50
+ raise ValueError("CSV must contain at least 'time' and 'vi' columns.")
51
+ timevector = _parse_time_column(df["time"])
52
+ vi = df["vi"].to_numpy(dtype="float64")
53
+ qa = df["qa"].to_numpy(dtype="float64") if "qa" in df else np.ones_like(vi, dtype="float64")
54
+ # shape to (y=1, x=1, z=T)
55
+ vi = vi.reshape(1, 1, -1, order="F")
56
+ qa = qa.reshape(1, 1, -1, order="F")
57
+ return vi, qa, timevector
58
+
59
+ def write_timesat_csv_outputs(
60
+ out_folder: str,
61
+ timevector_out: np.ndarray, # p_outindex dates in YYYYDOY
62
+ yfit: np.ndarray, # shape (T_out,) for single site
63
+ vpp: Optional[np.ndarray], # shape (13*2*yr,) flattened for single site
64
+ nseason: Optional[int]
65
+ ) -> None:
66
+ """
67
+ Writes three CSVs:
68
+ - yfit.csv: columns [time(YYYYDOY), yfit]
69
+ - vpp.csv : 13*2*yr parameters as columns VPP_1 ... VPP_N (optional if vpp is None)
70
+ - nseason.csv: single row with nseason (optional if nseason is None)
71
+ """
72
+ import os
73
+ os.makedirs(out_folder, exist_ok=True)
74
+
75
+ # yfit
76
+ yfit_df = pd.DataFrame({
77
+ "time": timevector_out.astype("uint32"),
78
+ "yfit": yfit.astype("float64")
79
+ })
80
+ yfit_df.to_csv(os.path.join(out_folder, "yfit.csv"), index=False)
81
+
82
+ # vpp
83
+ if vpp is not None:
84
+ vpp = vpp.ravel(order="F").astype("float64")
85
+ cols = [f"VPP_{i+1}" for i in range(vpp.size)]
86
+ vpp_df = pd.DataFrame([vpp], columns=cols)
87
+ vpp_df.to_csv(os.path.join(out_folder, "vpp.csv"), index=False)
88
+
89
+ # nseason
90
+ if nseason is not None:
91
+ pd.DataFrame({"nseason": [int(nseason)]}).to_csv(
92
+ os.path.join(out_folder, "nseason.csv"), index=False
93
+ )
timesat_cli/fsutils.py ADDED
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+ import os
3
+ import math
4
+ from typing import Tuple
5
+
6
+ __all__ = ["create_output_folders"]
7
+
8
+
9
+ def create_output_folders(outfolder: str) -> Tuple[str, str]:
10
+ vpp_folder = os.path.join(outfolder, "VPP")
11
+ st_folder = os.path.join(outfolder, "ST")
12
+ os.makedirs(vpp_folder, exist_ok=True)
13
+ os.makedirs(st_folder, exist_ok=True)
14
+ return st_folder, vpp_folder
15
+
16
+
17
+ def memory_plan(dx: int, dy: int, z: int, p_outindex_num: int, yr: int, max_memory_gb: float) -> Tuple[int, int]:
18
+ num_layers = p_outindex_num + z * 2 + (13 * 2) * yr
19
+ bytes_per = 4 # float32
20
+ max_bytes = max_memory_gb * (2 ** 30)
21
+ dy_max = max_bytes / (dx * num_layers * bytes_per)
22
+ y_slice_size = int(min(math.floor(dy_max), dy)) if dy_max > 0 else dy
23
+ y_slice_size = max(1, y_slice_size)
24
+ num_block = int(math.ceil(dy / y_slice_size))
25
+ return y_slice_size, num_block
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ __all__ = ["maybe_init_ray"]
4
+
5
+
6
+ def maybe_init_ray(para_check: int, ray_dir: str | None = None) -> bool:
7
+ """
8
+ Initialize Ray if parallelism is requested.
9
+
10
+ Parameters
11
+ ----------
12
+ para_check : int
13
+ Number of CPUs to use (if >1, Ray will be initialized).
14
+ ray_dir : str or None, optional
15
+ Temporary directory for Ray logs/state. If None or empty,
16
+ Ray will use its default temp location.
17
+
18
+ Returns
19
+ -------
20
+ bool
21
+ True if Ray was initialized, False otherwise.
22
+ """
23
+ if para_check > 1:
24
+ import ray
25
+ kwargs = {"num_cpus": para_check}
26
+ if ray_dir: # only include if user provided a valid path
27
+ kwargs["_temp_dir"] = ray_dir
28
+
29
+ ray.init(**kwargs)
30
+ return True
31
+
32
+ return False
@@ -0,0 +1,169 @@
1
+ from __future__ import annotations
2
+ import math, os, datetime
3
+ from typing import List, Tuple
4
+
5
+ import numpy as np
6
+ import rasterio
7
+
8
+ import timesat # external dependency
9
+
10
+ from .config import load_config, build_param_array
11
+ from .readers import read_file_lists, open_image_data
12
+ from .fsutils import create_output_folders, memory_plan
13
+ from .writers import prepare_profiles, write_vpp_layers, write_st_layers
14
+ from .parallel import maybe_init_ray
15
+
16
+ VPP_NAMES = ["SOSD","SOSV","LSLOPE","EOSD","EOSV","RSLOPE","LENGTH",
17
+ "MINV","MAXD","MAXV","AMPL","TPROD","SPROD"]
18
+
19
+ def _build_output_filenames(st_folder: str, vpp_folder: str, p_outindex, yrstart: int, yrend: int):
20
+ outyfitfn = []
21
+ for i_tv in p_outindex:
22
+ yfitdate = datetime.date(yrstart, 1, 1) + datetime.timedelta(days=int(i_tv)) - datetime.timedelta(days=1)
23
+ outyfitfn.append(os.path.join(st_folder, f"TIMESAT_{yfitdate.strftime('%Y%m%d')}.tif"))
24
+
25
+ outvppfn = []
26
+ for i_yr in range(yrstart, yrend + 1):
27
+ for i_seas in range(2):
28
+ for name in VPP_NAMES:
29
+ outvppfn.append(os.path.join(vpp_folder, f"TIMESAT_{name}_{i_yr}_season_{i_seas+1}.tif"))
30
+ outnsfn = os.path.join(vpp_folder, 'TIMESAT_nsperyear.tif')
31
+ return outyfitfn, outvppfn, outnsfn
32
+
33
+
34
+ def run(jsfile: str) -> None:
35
+ print(jsfile)
36
+ cfg = load_config(jsfile)
37
+ s = cfg.settings
38
+
39
+ if s.outputfolder == '':
40
+ print('Nothing to do...')
41
+ return
42
+
43
+ # Precompute arrays once per block to pass into timesat
44
+ landuse_arr = build_param_array(s, 'landuse', 'uint8')
45
+ p_fitmethod_arr = build_param_array(s, 'p_fitmethod', 'uint8')
46
+ p_smooth_arr = build_param_array(s, 'p_smooth', 'double')
47
+ p_nenvi_arr = build_param_array(s, 'p_nenvi', 'uint8')
48
+ p_wfactnum_arr = build_param_array(s, 'p_wfactnum', 'double')
49
+ p_startmethod_arr = build_param_array(s, 'p_startmethod', 'uint8')
50
+ p_startcutoff_arr = build_param_array(s, 'p_startcutoff', 'double', shape=(2,), fortran_2d=True)
51
+ p_low_percentile_arr = build_param_array(s, 'p_low_percentile', 'double')
52
+ p_fillbase_arr = build_param_array(s, 'p_fillbase', 'uint8')
53
+ p_seasonmethod_arr = build_param_array(s, 'p_seasonmethod', 'uint8')
54
+ p_seapar_arr = build_param_array(s, 'p_seapar', 'double')
55
+
56
+ print(landuse_arr)
57
+
58
+ ray_inited = maybe_init_ray(s.para_check, s.ray_dir)
59
+
60
+ timevector, flist, qlist, yr, yrstart, yrend = read_file_lists(s.tv_list, s.image_file_list, s.quality_file_list)
61
+
62
+ z = len(flist)
63
+ print(f'num of images: {z}')
64
+ print('First image: ' + os.path.basename(flist[0]))
65
+ print('Last image: ' + os.path.basename(flist[-1]))
66
+ print(yrstart)
67
+
68
+ p_outindex = np.arange(
69
+ (datetime.datetime(yrstart, 1, 1) - datetime.datetime(yrstart, 1, 1)).days + 1,
70
+ (datetime.datetime(yrstart + yr - 1, 12, 31) - datetime.datetime(yrstart, 1, 1)).days + 1
71
+ )[:: int(s.p_st_timestep)]
72
+ p_outindex_num = len(p_outindex)
73
+
74
+ with rasterio.open(flist[0], 'r') as temp:
75
+ img_profile = temp.profile
76
+
77
+ if sum(s.imwindow) == 0:
78
+ dx, dy = img_profile['width'], img_profile['height']
79
+ else:
80
+ dx, dy = int(s.imwindow[2]), int(s.imwindow[3])
81
+
82
+ imgprocessing = not (s.imwindow[2] + s.imwindow[3] == 2)
83
+
84
+ if imgprocessing:
85
+ st_folder, vpp_folder = create_output_folders(s.outputfolder)
86
+ outyfitfn, outvppfn, outnsfn = _build_output_filenames(st_folder, vpp_folder, p_outindex, yrstart, yrend)
87
+ img_profile_st, img_profile_vpp, img_profile_ns = prepare_profiles(img_profile, s.p_nodata, s.scale, s.offset)
88
+ # pre-create files
89
+ for path in outvppfn:
90
+ with rasterio.open(path, 'w', **img_profile_vpp):
91
+ pass
92
+ for path in outyfitfn:
93
+ with rasterio.open(path, 'w', **img_profile_st):
94
+ pass
95
+
96
+ # compute memory blocks
97
+ y_slice_size, num_block = memory_plan(dx, dy, z, p_outindex_num, yr, s.max_memory_gb)
98
+ y_slice_end = dy % y_slice_size if (dy % y_slice_size) > 0 else y_slice_size
99
+ print('y_slice_size = ' + str(y_slice_size))
100
+
101
+ for iblock in range(num_block):
102
+ print(f'Processing block: {iblock + 1}/{num_block} starttime: {datetime.datetime.now()}')
103
+ x = dx
104
+ y = int(y_slice_size) if iblock != num_block - 1 else int(y_slice_end)
105
+ x_map = int(s.imwindow[0])
106
+ y_map = int(iblock * y_slice_size + s.imwindow[1])
107
+
108
+ vi, qa, lc = open_image_data(
109
+ x_map, y_map, x, y, flist, qlist if qlist else '', s.lc_file,
110
+ img_profile['dtype'], s.p_a, s.para_check, s.p_band_id
111
+ )
112
+
113
+ print('--- start TIMESAT processing --- starttime: ' + str(datetime.datetime.now()))
114
+
115
+ if s.scale != 1 or s.offset != 0:
116
+ vi = vi * s.scale + s.offset
117
+
118
+ if s.para_check > 1 and ray_inited:
119
+ import ray
120
+
121
+ @ray.remote
122
+ def runtimesat(vi_temp, qa_temp, lc_temp):
123
+ vpp_para, vppqa, nseason_para, yfit_para, yfitqa, seasonfit, tseq = timesat.tsf2py(
124
+ yr, vi_temp, qa_temp, timevector, lc_temp, s.p_nclasses,landuse_arr, p_outindex,
125
+ s.p_ignoreday, s.p_ylu, s.p_printflag, p_fitmethod_arr, p_smooth_arr,
126
+ s.p_nodata, s.p_davailwin, s.p_outlier,
127
+ p_nenvi_arr, p_wfactnum_arr, p_startmethod_arr, p_startcutoff_arr,
128
+ p_low_percentile_arr, p_fillbase_arr, s.p_hrvppformat,
129
+ p_seasonmethod_arr, p_seapar_arr,
130
+ 1, x, len(flist), p_outindex_num
131
+ )
132
+ vpp_para = vpp_para[0, :, :]
133
+ yfit_para = yfit_para[0, :, :]
134
+ nseason_para = nseason_para[0, :]
135
+ return vpp_para, yfit_para, nseason_para
136
+
137
+ futures = [
138
+ runtimesat.remote(
139
+ np.expand_dims(vi[i, :, :], axis=0),
140
+ np.expand_dims(qa[i, :, :], axis=0),
141
+ np.expand_dims(lc[i, :], axis=0)
142
+ ) for i in range(y)
143
+ ]
144
+ results = ray.get(futures)
145
+ vpp = np.stack([r[0] for r in results], axis=0)
146
+ yfit = np.stack([r[1] for r in results], axis=0)
147
+ nseason = np.stack([r[2] for r in results], axis=0)
148
+ else:
149
+ vpp, vppqa, nseason, yfit, yfitqa, seasonfit, tseq = timesat.tsf2py(
150
+ yr, vi, qa, timevector, lc, s.p_nclasses, landuse_arr, p_outindex,
151
+ s.p_ignoreday, s.p_ylu, s.p_printflag, p_fitmethod_arr, p_smooth_arr,
152
+ s.p_nodata, s.p_davailwin, s.p_outlier,
153
+ p_nenvi_arr, p_wfactnum_arr, p_startmethod_arr, p_startcutoff_arr,
154
+ p_low_percentile_arr, p_fillbase_arr, s.p_hrvppformat,
155
+ p_seasonmethod_arr, p_seapar_arr,
156
+ y, x, len(flist), p_outindex_num)
157
+
158
+ vpp = np.moveaxis(vpp, -1, 0)
159
+ if s.scale == 0 and s.offset == 0:
160
+ yfit = np.moveaxis(yfit, -1, 0).astype(img_profile['dtype'])
161
+ else:
162
+ yfit = np.moveaxis(yfit, -1, 0).astype('float32')
163
+
164
+ print('--- start writing geotif --- starttime: ' + str(datetime.datetime.now()))
165
+ window = (x_map, y_map, x, y)
166
+ write_vpp_layers(outvppfn, vpp, window, img_profile_vpp)
167
+ write_st_layers(outyfitfn, yfit, window, img_profile_st)
168
+
169
+ print(f'Block: {iblock + 1}/{num_block} finishedtime: {datetime.datetime.now()}')
timesat_cli/qa.py ADDED
@@ -0,0 +1,25 @@
1
+ import numpy as np
2
+
3
+ __all__ = ["assign_qa_weight"]
4
+
5
+
6
+ def assign_qa_weight(p_a, qa: np.ndarray) -> np.ndarray:
7
+ """Map QA values to weights using rules in p_a."""
8
+ p_a = np.asarray(p_a)
9
+ if qa.size == 0:
10
+ return qa
11
+ qa_out = np.zeros_like(qa, dtype=float)
12
+ if p_a.size == 0:
13
+ return qa_out
14
+
15
+ if p_a.shape[1] == 2:
16
+ for qa_value, weight in p_a:
17
+ mask = (qa == qa_value)
18
+ qa_out[mask] = weight
19
+ elif p_a.shape[1] == 3:
20
+ for min_val, max_val, weight in p_a:
21
+ mask = (qa >= min_val) & (qa <= max_val)
22
+ qa_out[mask] = weight
23
+ else:
24
+ raise ValueError("p_a must have either 2 or 3 columns.")
25
+ return qa_out
timesat_cli/readers.py ADDED
@@ -0,0 +1,154 @@
1
+ from __future__ import annotations
2
+ import os, re, math, datetime
3
+ from typing import List, Tuple
4
+ import numpy as np
5
+ import rasterio
6
+ from rasterio.windows import Window
7
+ from .qa import assign_qa_weight
8
+
9
+ try:
10
+ import ray
11
+ except Exception: # optional
12
+ ray = None
13
+
14
+ __all__ = ["read_file_lists", "open_image_data"]
15
+
16
+
17
+ def _parse_dates_from_name(name: str) -> Tuple[int, int, int]:
18
+ date_regex1 = r"\d{4}-\d{2}-\d{2}"
19
+ date_regex2 = r"\d{4}\d{2}\d{2}"
20
+ try:
21
+ dates = re.findall(date_regex1, name)
22
+ position = name.find(dates[0])
23
+ y = int(name[position:position+4])
24
+ m = int(name[position+5:position+7])
25
+ d = int(name[position+8:position+10])
26
+ return y, m, d
27
+ except Exception:
28
+ try:
29
+ dates = re.findall(date_regex2, name)
30
+ position = name.find(dates[0])
31
+ y = int(name[position:position+4])
32
+ m = int(name[position+4:position+6])
33
+ d = int(name[position+6:position+8])
34
+ return y, m, d
35
+ except Exception as e:
36
+ raise ValueError(f"No date found in filename: {name}") from e
37
+
38
+
39
+ def _read_time_vector(tlist: str, filepaths: List[str]):
40
+ """Return (timevector, yr, yrstart, yrend) in YYYYDOY format."""
41
+ flist = [os.path.basename(p) for p in filepaths]
42
+ timevector = np.ndarray(len(flist), order='F', dtype='uint32')
43
+ if tlist == '':
44
+ for i, fname in enumerate(flist):
45
+ y, m, d = _parse_dates_from_name(fname)
46
+ doy = (datetime.date(y, m, d) - datetime.date(y, 1, 1)).days + 1
47
+ timevector[i] = y * 1000 + doy
48
+ else:
49
+ with open(tlist, 'r') as f:
50
+ lines = f.read().splitlines()
51
+ for idx, val in enumerate(lines):
52
+ n = len(val)
53
+ if n == 8: # YYYYMMDD
54
+ dt = datetime.datetime.strptime(val, "%Y%m%d")
55
+ timevector[idx] = int(f"{dt.year}{dt.timetuple().tm_yday:03d}")
56
+ elif n == 7: # YYYYDOY
57
+ _ = datetime.datetime.strptime(val, "%Y%j")
58
+ timevector[idx] = int(val)
59
+ else:
60
+ raise ValueError(f"Unrecognized date format: {val}")
61
+
62
+ yrstart = int(np.floor(timevector.min() / 1000))
63
+ yrend = int(np.floor(timevector.max() / 1000))
64
+ yr = yrend - yrstart + 1
65
+ return timevector, yr, yrstart, yrend
66
+
67
+
68
+ def _unique_by_timevector(flist: List[str], qlist: List[str], timevector):
69
+ tv_unique, indices = np.unique(timevector, return_index=True)
70
+ flist2 = [flist[i] for i in indices]
71
+ qlist2 = [qlist[i] for i in indices] if qlist else []
72
+ return tv_unique, flist2, qlist2
73
+
74
+
75
+ def read_file_lists(tlist: str, data_list: str, qa_list: str) -> Tuple[np.ndarray, List[str], List[str], int, int, int]:
76
+ qlist: List[str] | str = ''
77
+ with open(data_list, 'r') as f:
78
+ flist = f.read().splitlines()
79
+ if qa_list != '':
80
+ with open(qa_list, 'r') as f:
81
+ qlist = f.read().splitlines()
82
+ if len(flist) != len(qlist):
83
+ raise ValueError("No. of Data and QA are not consistent")
84
+
85
+ timevector, yr, yrstart, yrend = _read_time_vector(tlist, flist)
86
+ timevector, flist, qlist = _unique_by_timevector(flist, qlist, timevector)
87
+ return timevector, flist, (qlist if isinstance(qlist, list) else []), yr, yrstart, yrend
88
+
89
+
90
+ def open_image_data(
91
+ x_map: int,
92
+ y_map: int,
93
+ x: int,
94
+ y: int,
95
+ yflist: List[str],
96
+ wflist: List[str] | str,
97
+ lcfile: str,
98
+ data_type: str,
99
+ p_a,
100
+ para_check: int,
101
+ layer: int,
102
+ ):
103
+ """Read VI, QA, and LC blocks as arrays."""
104
+ z = len(yflist)
105
+ vi = np.ndarray((y, x, z), order='F', dtype=data_type)
106
+ qa = np.ndarray((y, x, z), order='F', dtype=data_type)
107
+ lc = np.ndarray((y, x, z), order='F', dtype=np.uint8)
108
+
109
+ # VI stack
110
+ if para_check > 1 and ray is not None:
111
+ vi_para = np.ndarray((y, x), order='F', dtype=data_type)
112
+
113
+ @ray.remote
114
+ def _readimgpara_(yfname):
115
+ with rasterio.open(yfname, 'r') as temp:
116
+ vi_para[:, :] = temp.read(layer, window=Window(x_map, y_map, x, y))
117
+ return vi_para
118
+
119
+ futures = [_readimgpara_.remote(i) for i in yflist]
120
+ vi = np.stack(ray.get(futures), axis=2)
121
+ else:
122
+ for i, yfname in enumerate(yflist):
123
+ with rasterio.open(yfname, 'r') as temp:
124
+ vi[:, :, i] = temp.read(layer, window=Window(x_map, y_map, x, y))
125
+
126
+ # QA stack
127
+ if wflist == '' or wflist == []:
128
+ qa.fill(1)
129
+ else:
130
+ if para_check > 1 and ray is not None:
131
+ qa_para = np.ndarray((y, x), order='F', dtype=data_type)
132
+
133
+ @ray.remote
134
+ def _readqapara_(wfname):
135
+ with rasterio.open(wfname, 'r') as temp:
136
+ qa_para[:, :] = temp.read(layer, window=Window(x_map, y_map, x, y))
137
+ return qa_para
138
+
139
+ futures = [_readqapara_.remote(i) for i in wflist]
140
+ qa = np.stack(ray.get(futures), axis=2)
141
+ else:
142
+ for i, wfname in enumerate(wflist):
143
+ with rasterio.open(wfname, 'r') as temp2:
144
+ qa[:, :, i] = temp2.read(1, window=Window(x_map, y_map, x, y))
145
+ qa = assign_qa_weight(p_a, qa)
146
+
147
+ # LC
148
+ if lcfile == '':
149
+ lc.fill(1)
150
+ else:
151
+ with rasterio.open(lcfile, 'r') as temp3:
152
+ lc = temp3.read(1, window=Window(x_map, y_map, x, y))
153
+
154
+ return vi, qa, lc