PyPI - terrakio-core - Versions diffs - 0.3.9__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

terrakio-core 0.3.9py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of terrakio-core might be problematic. Click here for more details.

Files changed (12) hide show

terrakio_core/__init__.py +1 -1
terrakio_core/async_client.py +21 -2
terrakio_core/client.py +101 -5
terrakio_core/convenience_functions/convenience_functions.py +280 -29
terrakio_core/endpoints/mass_stats.py +71 -16
terrakio_core/endpoints/model_management.py +424 -217
terrakio_core/endpoints/user_management.py +5 -5
terrakio_core/sync_client.py +106 -185
{terrakio_core-0.3.9.dist-info → terrakio_core-0.4.2.dist-info}/METADATA +1 -1
{terrakio_core-0.3.9.dist-info → terrakio_core-0.4.2.dist-info}/RECORD +12 -12
{terrakio_core-0.3.9.dist-info → terrakio_core-0.4.2.dist-info}/WHEEL +0 -0
{terrakio_core-0.3.9.dist-info → terrakio_core-0.4.2.dist-info}/top_level.txt +0 -0

terrakio_core/endpoints/model_management.py CHANGED Viewed

@@ -3,10 +3,11 @@ import json
 import time
 import textwrap
 import logging
-from typing import Dict, Any, Union, Tuple
+from typing import Dict, Any, Union, Tuple, Optional
 from io import BytesIO
 import numpy as np
 from google.cloud import storage
+import ast
 from ..helper.decorators import require_token, require_api_key, require_auth
 TORCH_AVAILABLE = False
 SKL2ONNX_AVAILABLE = False
@@ -30,6 +31,7 @@ except ImportError:
 from io import BytesIO
 from typing import Tuple
 class ModelManagement:
     def __init__(self, client):
         self._client = client
@@ -347,9 +349,10 @@ class ModelManagement:
         except Exception as e:
             raise ValueError(f"Failed to convert scikit-learn model {model_name} to ONNX: {str(e)}")
     @require_api_key
-    async def upload_and_deploy_cnn_model(self, model, model_name: str, dataset: str, product: str, input_expression: str, dates_iso8601: list, input_shape: Tuple[int, ...] = None):
+    async def upload_and_deploy_cnn_model(self, model, model_name: str, dataset: str, product: str, input_expression: str, dates_iso8601: list, input_shape: Tuple[int, ...] = None, processing_script_path: Optional[str] = None):
         """
         Upload a CNN model to the bucket and deploy it.
@@ -361,6 +364,7 @@ class ModelManagement:
             input_expression: Input expression for the dataset
             dates_iso8601: List of dates in ISO8601 format
             input_shape: Shape of input data for ONNX conversion (required for PyTorch models)
+            processing_script_path: Path to the processing script, if not provided, no processing will be done
         Raises:
             APIError: If the API request fails
@@ -369,7 +373,8 @@ class ModelManagement:
         """
         await self.upload_model(model=model, model_name=model_name, input_shape=input_shape)
         # so the uploading process is kinda similar, but the deployment step is kinda different
-        await self.deploy_cnn_model(dataset=dataset, product=product, model_name=model_name, input_expression=input_expression, model_training_job_name=model_name, dates_iso8601=dates_iso8601)
+        # we should pass the processing script path to the deploy cnn model function
+        await self.deploy_cnn_model(dataset=dataset, product=product, model_name=model_name, input_expression=input_expression, model_training_job_name=model_name, dates_iso8601=dates_iso8601, processing_script_path=processing_script_path)
     @require_api_key
     async def upload_and_deploy_model(self, model, model_name: str, dataset: str, product: str, input_expression: str, dates_iso8601: list, input_shape: Tuple[int, ...] = None):
@@ -475,6 +480,88 @@ class ModelManagement:
             padding=0
         )
+    def _parse_processing_script(self, script_path: str) -> Tuple[Optional[str], Optional[str]]:
+        """
+        Parse a Python file and extract preprocessing and postprocessing function bodies.
+        Args:
+            script_path: Path to the Python file containing processing functions
+        Returns:
+            Tuple of (preprocessing_code, postprocessing_code) where each can be None
+        """
+        try:
+            with open(script_path, 'r', encoding='utf-8') as f:
+                script_content = f.read()
+        except FileNotFoundError:
+            raise FileNotFoundError(f"Processing script not found: {script_path}")
+        except Exception as e:
+            raise ValueError(f"Error reading processing script: {e}")
+        # Handle empty file
+        if not script_content.strip():
+            self._client.logger.info(f"Processing script {script_path} is empty")
+            return None, None
+        try:
+            # Parse the Python file
+            tree = ast.parse(script_content)
+        except SyntaxError as e:
+            raise ValueError(f"Syntax error in processing script: {e}")
+        preprocessing_code = None
+        postprocessing_code = None
+        # Find function definitions
+        function_names = []
+        for node in ast.walk(tree):
+            if isinstance(node, ast.FunctionDef):
+                function_names.append(node.name)
+                if node.name == 'preprocessing':
+                    preprocessing_code = self._extract_function_body(script_content, node)
+                elif node.name == 'postprocessing':
+                    postprocessing_code = self._extract_function_body(script_content, node)
+        # Log what was found for debugging
+        if not function_names:
+            self._client.logger.warning(f"No functions found in processing script: {script_path}")
+        else:
+            found_functions = [name for name in function_names if name in ['preprocessing', 'postprocessing']]
+            if found_functions:
+                self._client.logger.info(f"Found processing functions: {found_functions}")
+            else:
+                self._client.logger.warning(f"No 'preprocessing' or 'postprocessing' functions found in {script_path}. "
+                                          f"Available functions: {function_names}")
+        return preprocessing_code, postprocessing_code
+    def _extract_function_body(self, script_content: str, func_node: ast.FunctionDef) -> str:
+        """Extract the body of a function from the script content."""
+        lines = script_content.split('\n')
+        # AST line numbers are 1-indexed, convert to 0-indexed
+        start_line = func_node.lineno - 1  # This is the 'def' line (0-indexed)
+        end_line = func_node.end_lineno - 1 if hasattr(func_node, 'end_lineno') else len(lines) - 1
+        # Extract ONLY the body lines (skip the def line entirely)
+        body_lines = []
+        for i in range(start_line + 1, end_line + 1):  # +1 to skip the 'def' line
+            if i < len(lines):
+                body_lines.append(lines[i])
+        if not body_lines:
+            return ""
+        # Join and dedent to remove function-level indentation
+        body_text = '\n'.join(body_lines)
+        cleaned_body = textwrap.dedent(body_text).strip()
+        # Handle empty function body
+        if not cleaned_body or cleaned_body in ['pass', 'return', 'return None']:
+            return ""
+        return cleaned_body
     @require_api_key
     async def deploy_cnn_model(
         self,
@@ -483,7 +570,8 @@ class ModelManagement:
         model_name: str,
         input_expression: str,
         model_training_job_name: str,
-        dates_iso8601: list
+        dates_iso8601: list,
+        processing_script_path: Optional[str] = None
     ) -> Dict[str, Any]:
         """
         Deploy a CNN model by generating inference script and creating dataset.
@@ -495,7 +583,7 @@ class ModelManagement:
             input_expression: Input expression for the dataset
             model_training_job_name: Name of the training job
             dates_iso8601: List of dates in ISO8601 format
+            processing_script_path: Path to the processing script, if not provided, no processing will be done
         Returns:
             dict: Response from the deployment process
@@ -506,8 +594,31 @@ class ModelManagement:
         user_info = await self._client.auth.get_user_info()
         uid = user_info["uid"]
+        preprocessing_code, postprocessing_code = None, None
+        if processing_script_path:
+            # if there is a function that is being passed in
+            try:
+                preprocessing_code, postprocessing_code = self._parse_processing_script(processing_script_path)
+                if preprocessing_code:
+                    self._client.logger.info(f"Using custom preprocessing from: {processing_script_path}")
+                if postprocessing_code:
+                    self._client.logger.info(f"Using custom postprocessing from: {processing_script_path}")
+                if not preprocessing_code and not postprocessing_code:
+                    self._client.logger.warning(f"No preprocessing or postprocessing functions found in {processing_script_path}")
+                    self._client.logger.info("Deployment will continue without custom processing")
+            except Exception as e:
+                raise ValueError(f"Failed to load processing script: {str(e)}")
+        # so we already have the preprocessing code and the post processing code, I need to pass them to the generate cnn script function
         # Generate and upload script
-        script_content = self.generate_cnn_script(model_name, product, model_training_job_name, uid)
+        # Build preprocessing section with CONSISTENT 8-space indentation
+        preprocessing_section = ""
+        if preprocessing_code and preprocessing_code.strip():
+            # First dedent the preprocessing code to remove any existing indentation
+            clean_preprocessing = preprocessing_code
+            # Then add consistent 8-space indentation to match the template
+            preprocessing_section = f"""{textwrap.indent(clean_preprocessing, '')}"""  # 8 spaces
+        print(preprocessing_section)
+        script_content = self.generate_cnn_script(model_name, product, model_training_job_name, uid, preprocessing_code, postprocessing_code)
         script_name = f"{product}.py"
         self._upload_script_to_bucket(script_content, script_name, model_training_job_name, uid)
         # Create dataset
@@ -536,109 +647,109 @@ class ModelManagement:
             str: Generated Python script content
         """
         return textwrap.dedent(f'''
-            import logging
-            from io import BytesIO
+        import logging
+        from io import BytesIO
-            import numpy as np
-            import pandas as pd
-            import xarray as xr
-            from google.cloud import storage
-            from onnxruntime import InferenceSession
+        import numpy as np
+        import pandas as pd
+        import xarray as xr
+        from google.cloud import storage
+        from onnxruntime import InferenceSession
-            logging.basicConfig(
-                level=logging.INFO
-            )
+        logging.basicConfig(
+            level=logging.INFO
+        )
-            def get_model():
-                logging.info("Loading model for {model_name}...")
+        def get_model():
+            logging.info("Loading model for {model_name}...")
-                client = storage.Client()
-                bucket = client.get_bucket('terrakio-mass-requests')
-                blob = bucket.blob('{uid}/{model_training_job_name}/models/{model_name}.onnx')
+            client = storage.Client()
+            bucket = client.get_bucket('terrakio-mass-requests')
+            blob = bucket.blob('{uid}/{model_training_job_name}/models/{model_name}.onnx')
-                model = BytesIO()
-                blob.download_to_file(model)
-                model.seek(0)
+            model = BytesIO()
+            blob.download_to_file(model)
+            model.seek(0)
-                session = InferenceSession(model.read(), providers=["CPUExecutionProvider"])
-                return session
+            session = InferenceSession(model.read(), providers=["CPUExecutionProvider"])
+            return session
-            def {product}(*bands, model):
-                logging.info("start preparing data")
-                data_arrays = list(bands)
-                reference_array = data_arrays[0]
-                original_shape = reference_array.shape
-                logging.info(f"Original shape: {{original_shape}}")
-                if 'time' in reference_array.dims:
-                    time_coords = reference_array.coords['time']
-                    if len(time_coords) == 1:
-                        output_timestamp = time_coords[0]
-                    else:
-                        years = [pd.to_datetime(t).year for t in time_coords.values]
-                        unique_years = set(years)
-                        if len(unique_years) == 1:
-                            year = list(unique_years)[0]
-                            output_timestamp = pd.Timestamp(f"{{year}}-01-01")
-                        else:
-                            latest_year = max(unique_years)
-                            output_timestamp = pd.Timestamp(f"{{latest_year}}-01-01")
+        def {product}(*bands, model):
+            logging.info("start preparing data")
+            data_arrays = list(bands)
+            reference_array = data_arrays[0]
+            original_shape = reference_array.shape
+            logging.info(f"Original shape: {{original_shape}}")
+            if 'time' in reference_array.dims:
+                time_coords = reference_array.coords['time']
+                if len(time_coords) == 1:
+                    output_timestamp = time_coords[0]
                 else:
-                    output_timestamp = pd.Timestamp("1970-01-01")
-                averaged_bands = []
-                for data_array in data_arrays:
-                    if 'time' in data_array.dims:
-                        averaged_band = np.mean(data_array.values, axis=0)
-                        logging.info(f"Averaged band from {{data_array.shape}} to {{averaged_band.shape}}")
+                    years = [pd.to_datetime(t).year for t in time_coords.values]
+                    unique_years = set(years)
+                    if len(unique_years) == 1:
+                        year = list(unique_years)[0]
+                        output_timestamp = pd.Timestamp(f"{{year}}-01-01")
                     else:
-                        averaged_band = data_array.values
-                        logging.info(f"No time dimension, shape: {{averaged_band.shape}}")
+                        latest_year = max(unique_years)
+                        output_timestamp = pd.Timestamp(f"{{latest_year}}-01-01")
+            else:
+                output_timestamp = pd.Timestamp("1970-01-01")
-                    flattened_band = averaged_band.reshape(-1, 1)
-                    averaged_bands.append(flattened_band)
+            averaged_bands = []
+            for data_array in data_arrays:
+                if 'time' in data_array.dims:
+                    averaged_band = np.mean(data_array.values, axis=0)
+                    logging.info(f"Averaged band from {{data_array.shape}} to {{averaged_band.shape}}")
+                else:
+                    averaged_band = data_array.values
+                    logging.info(f"No time dimension, shape: {{averaged_band.shape}}")
-                input_data = np.hstack(averaged_bands)
+                flattened_band = averaged_band.reshape(-1, 1)
+                averaged_bands.append(flattened_band)
-                logging.info(f"Final input shape: {{input_data.shape}}")
+            input_data = np.hstack(averaged_bands)
-                output = model.run(None, {{"float_input": input_data.astype(np.float32)}})[0]
+            logging.info(f"Final input shape: {{input_data.shape}}")
-                logging.info(f"Model output shape: {{output.shape}}")
+            output = model.run(None, {{"float_input": input_data.astype(np.float32)}})[0]
-                if len(original_shape) >= 3:
-                    spatial_shape = original_shape[1:]
-                else:
-                    spatial_shape = original_shape
+            logging.info(f"Model output shape: {{output.shape}}")
-                output_reshaped = output.reshape(spatial_shape)
+            if len(original_shape) >= 3:
+                spatial_shape = original_shape[1:]
+            else:
+                spatial_shape = original_shape
-                output_with_time = np.expand_dims(output_reshaped, axis=0)
+            output_reshaped = output.reshape(spatial_shape)
-                if 'time' in reference_array.dims:
-                    spatial_dims = [dim for dim in reference_array.dims if dim != 'time']
-                    spatial_coords = {{dim: reference_array.coords[dim] for dim in spatial_dims if dim in reference_array.coords}}
-                else:
-                    spatial_dims = list(reference_array.dims)
-                    spatial_coords = dict(reference_array.coords)
-                result = xr.DataArray(
-                    data=output_with_time.astype(np.float32),
-                    dims=['time'] + list(spatial_dims),
-                    coords={{
-                        'time': [output_timestamp.values],
-                        'y': spatial_coords['y'].values,
-                        'x': spatial_coords['x'].values
-                    }}
-                )
-                return result
+            output_with_time = np.expand_dims(output_reshaped, axis=0)
+            if 'time' in reference_array.dims:
+                spatial_dims = [dim for dim in reference_array.dims if dim != 'time']
+                spatial_coords = {{dim: reference_array.coords[dim] for dim in spatial_dims if dim in reference_array.coords}}
+            else:
+                spatial_dims = list(reference_array.dims)
+                spatial_coords = dict(reference_array.coords)
+            result = xr.DataArray(
+                data=output_with_time.astype(np.float32),
+                dims=['time'] + list(spatial_dims),
+                coords={{
+                    'time': [output_timestamp.values],
+                    'y': spatial_coords['y'].values,
+                    'x': spatial_coords['x'].values
+                }}
+            )
+            return result
             ''').strip()
     @require_api_key
-    def generate_cnn_script(self, model_name: str, product: str, model_training_job_name: str, uid: str) -> str:
+    def generate_cnn_script(self, model_name: str, product: str, model_training_job_name: str, uid: str, preprocessing_code: Optional[str] = None, postprocessing_code: Optional[str] = None) -> str:
         """
         Generate Python inference script for CNN model with time-stacked bands.
@@ -647,137 +758,233 @@ class ModelManagement:
             product: Product name
             model_training_job_name: Training job name
             uid: User ID
+            preprocessing_code: Preprocessing code
+            postprocessing_code: Postprocessing code
         Returns:
             str: Generated Python script content
         """
-        return textwrap.dedent(f'''
-            import logging
-            from io import BytesIO
-            import numpy as np
-            import pandas as pd
-            import xarray as xr
-            from google.cloud import storage
-            from onnxruntime import InferenceSession
-            logging.basicConfig(
-                level=logging.INFO
-            )
-            def get_model():
-                logging.info("Loading CNN model for {model_name}...")
-                client = storage.Client()
-                bucket = client.get_bucket('terrakio-mass-requests')
-                blob = bucket.blob('{uid}/{model_training_job_name}/models/{model_name}.onnx')
-                model = BytesIO()
-                blob.download_to_file(model)
-                model.seek(0)
-                session = InferenceSession(model.read(), providers=["CPUExecutionProvider"])
-                return session
-            def {product}(*bands, model):
-                logging.info("Start preparing CNN data with time-stacked bands")
-                data_arrays = list(bands)
-                if not data_arrays:
-                    raise ValueError("No bands provided")
-                reference_array = data_arrays[0]
-                original_shape = reference_array.shape
-                logging.info(f"Original shape: {{original_shape}}")
-                # Get time coordinates - all bands should have the same time dimension
-                if 'time' not in reference_array.dims:
-                    raise ValueError("Time dimension is required for CNN processing")
-                time_coords = reference_array.coords['time']
-                num_timestamps = len(time_coords)
-                logging.info(f"Number of timestamps: {{num_timestamps}}")
-                # Get spatial dimensions
-                spatial_dims = [dim for dim in reference_array.dims if dim != 'time']
-                height = reference_array.sizes[spatial_dims[0]]  # assuming first spatial dim is height
-                width = reference_array.sizes[spatial_dims[1]]   # assuming second spatial dim is width
-                logging.info(f"Spatial dimensions: {{height}} x {{width}}")
-                # Stack bands across time dimension
-                # Result will be: (num_bands * num_timestamps, height, width)
-                stacked_channels = []
-                for band_idx, data_array in enumerate(data_arrays):
-                    logging.info(f"Processing band {{band_idx + 1}}/{{len(data_arrays)}}")
-                    # Ensure consistent time coordinates across bands
-                    if not np.array_equal(data_array.coords['time'].values, time_coords.values):
-                        logging.warning(f"Band {{band_idx}} has different time coordinates, aligning...")
-                        data_array = data_array.sel(time=time_coords, method='nearest')
-                    # Extract values and ensure proper ordering (time, height, width)
-                    band_values = data_array.values
-                    if band_values.ndim == 3:
-                        # Reorder dimensions if needed to ensure (time, height, width)
-                        time_dim_idx = data_array.dims.index('time')
-                        if time_dim_idx != 0:
-                            axes_order = [time_dim_idx] + [i for i in range(len(data_array.dims)) if i != time_dim_idx]
-                            band_values = np.transpose(band_values, axes_order)
-                    # Add each timestamp of this band to the channel stack
-                    for t in range(num_timestamps):
-                        stacked_channels.append(band_values[t])
-                # Stack all channels: (num_bands * num_timestamps, height, width)
-                input_channels = np.stack(stacked_channels, axis=0)
-                total_channels = len(data_arrays) * num_timestamps
-                logging.info(f"Stacked channels shape: {{input_channels.shape}}")
-                logging.info(f"Total channels: {{total_channels}} ({{len(data_arrays)}} bands × {{num_timestamps}} timestamps)")
-                # Add batch dimension: (1, num_channels, height, width)
-                input_data = np.expand_dims(input_channels, axis=0).astype(np.float32)
-                logging.info(f"Final input shape for CNN: {{input_data.shape}}")
-                # Run inference
-                output = model.run(None, {{"float_input": input_data}})[0]
-                logging.info(f"Model output shape: {{output.shape}}")
-                # Process output back to xarray format
-                # Assuming output is (1, height, width) or (1, 1, height, width)
-                if output.ndim == 4 and output.shape[1] == 1:
-                    # Remove channel dimension if it's 1
-                    output_2d = output[0, 0]
-                elif output.ndim == 3:
-                    # Remove batch dimension
-                    output_2d = output[0]
-                else:
-                    # Handle other cases
-                    output_2d = np.squeeze(output)
-                    if output_2d.ndim != 2:
-                        raise ValueError(f"Unexpected output shape after processing: {{output_2d.shape}}")
-                # Determine output timestamp (use the latest timestamp)
-                output_timestamp = time_coords[-1]
-                # Get spatial coordinates from reference array
-                spatial_coords = {{dim: reference_array.coords[dim] for dim in spatial_dims}}
-                # Create output DataArray
-                result = xr.DataArray(
-                    data=np.expand_dims(output_2d.astype(np.float32), axis=0),
-                    dims=['time'] + spatial_dims,
-                    coords={{
-                        'time': [output_timestamp.values],
-                        spatial_dims[0]: spatial_coords[spatial_dims[0]].values,
-                        spatial_dims[1]: spatial_coords[spatial_dims[1]].values
-                    }}
-                )
-                logging.info(f"Final result shape: {{result.shape}}")
-                return result
-            ''').strip()
+        import textwrap
+        # Build preprocessing section with CONSISTENT 4-space indentation
+        preprocessing_section = ""
+        if preprocessing_code and preprocessing_code.strip():
+            clean_preprocessing = textwrap.dedent(preprocessing_code)
+            preprocessing_section = textwrap.indent(clean_preprocessing, '    ')
+        # Build postprocessing section with CONSISTENT 4-space indentation
+        postprocessing_section = ""
+        if postprocessing_code and postprocessing_code.strip():
+            clean_postprocessing = textwrap.dedent(postprocessing_code)
+            postprocessing_section = textwrap.indent(clean_postprocessing, '    ')
+        # Build the template WITHOUT dedenting the whole thing, so indentation is preserved
+        script_lines = [
+            "import logging",
+            "from io import BytesIO",
+            "import numpy as np",
+            "import pandas as pd",
+            "import xarray as xr",
+            "from google.cloud import storage",
+            "from onnxruntime import InferenceSession",
+            "from typing import Tuple",
+            "",
+            "logging.basicConfig(",
+            "    level=logging.INFO",
+            ")",
+            "",
+        ]
+        # Add preprocessing function definition BEFORE the main function
+        if preprocessing_section:
+            script_lines.extend([
+                "def preprocessing(array: Tuple[xr.DataArray, ...]) -> Tuple[xr.DataArray, ...]:",
+                preprocessing_section,
+                "",
+            ])
+        # Add postprocessing function definition BEFORE the main function
+        if postprocessing_section:
+            script_lines.extend([
+                "def postprocessing(array: xr.DataArray) -> xr.DataArray:",
+                postprocessing_section,
+                "",
+            ])
+        # Add the get_model function
+        script_lines.extend([
+            "def get_model():",
+            f"    logging.info(\"Loading CNN model for {model_name}...\")",
+            "",
+            "    client = storage.Client()",
+            "    bucket = client.get_bucket('terrakio-mass-requests')",
+            f"    blob = bucket.blob('{uid}/{model_training_job_name}/models/{model_name}.onnx')",
+            "",
+            "    model = BytesIO()",
+            "    blob.download_to_file(model)",
+            "    model.seek(0)",
+            "",
+            "    session = InferenceSession(model.read(), providers=[\"CPUExecutionProvider\"])",
+            "    return session",
+            "",
+            f"def {product}(*bands, model):",
+            "    logging.info(\"Start preparing CNN data with time-stacked bands\")",
+            "    data_arrays = list(bands)",
+            "    ",
+            "    if not data_arrays:",
+            "        raise ValueError(\"No bands provided\")",
+            "    ",
+        ])
+        # Add preprocessing call if preprocessing exists
+        if preprocessing_section:
+            script_lines.extend([
+                "    # Apply preprocessing",
+                "    data_arrays = preprocessing(tuple(data_arrays))",
+                "    data_arrays = list(data_arrays)  # Convert back to list for processing",
+                "    ",
+            ])
+        # Continue with the rest of the processing logic
+        script_lines.extend([
+            "    reference_array = data_arrays[0]",
+            "    original_shape = reference_array.shape",
+            "    logging.info(f\"Original shape: {original_shape}\")",
+            "    ",
+            "    # Get time coordinates - all bands should have the same time dimension",
+            "    if 'time' not in reference_array.dims:",
+            "        raise ValueError(\"Time dimension is required for CNN processing\")",
+            "    ",
+            "    time_coords = reference_array.coords['time']",
+            "    num_timestamps = len(time_coords)",
+            "    logging.info(f\"Number of timestamps: {num_timestamps}\")",
+            "    ",
+            "    # Get spatial dimensions",
+            "    spatial_dims = [dim for dim in reference_array.dims if dim != 'time']",
+            "    height = reference_array.sizes[spatial_dims[0]]  # assuming first spatial dim is height",
+            "    width = reference_array.sizes[spatial_dims[1]]   # assuming second spatial dim is width",
+            "    logging.info(f\"Spatial dimensions: {height} x {width}\")",
+            "    ",
+            "    # Stack bands across time dimension",
+            "    # Result will be: (num_bands * num_timestamps, height, width)",
+            "    stacked_channels = []",
+            "    ",
+            "    for band_idx, data_array in enumerate(data_arrays):",
+            "        logging.info(f\"Processing band {band_idx + 1}/{len(data_arrays)}\")",
+            "        ",
+            "        # Ensure consistent time coordinates across bands",
+            "        if not np.array_equal(data_array.coords['time'].values, time_coords.values):",
+            "            logging.warning(f\"Band {band_idx} has different time coordinates, aligning...\")",
+            "            data_array = data_array.sel(time=time_coords, method='nearest')",
+            "        ",
+            "        # Extract values and ensure proper ordering (time, height, width)",
+            "        band_values = data_array.values",
+            "        if band_values.ndim == 3:",
+            "            # Reorder dimensions if needed to ensure (time, height, width)",
+            "            time_dim_idx = data_array.dims.index('time')",
+            "            if time_dim_idx != 0:",
+            "                axes_order = [time_dim_idx] + [i for i in range(len(data_array.dims)) if i != time_dim_idx]",
+            "                band_values = np.transpose(band_values, axes_order)",
+            "        ",
+            "        # Add each timestamp of this band to the channel stack",
+            "        for t in range(num_timestamps):",
+            "            stacked_channels.append(band_values[t])",
+            "    ",
+            "    # Stack all channels: (num_bands * num_timestamps, height, width)",
+            "    input_channels = np.stack(stacked_channels, axis=0)",
+            "    total_channels = len(data_arrays) * num_timestamps",
+            "    logging.info(f\"Stacked channels shape: {input_channels.shape}\")",
+            "    logging.info(f\"Total channels: {total_channels} ({len(data_arrays)} bands × {num_timestamps} timestamps)\")",
+            "    ",
+            "    # Add batch dimension: (1, num_channels, height, width)",
+            "    input_data = np.expand_dims(input_channels, axis=0).astype(np.float32)",
+            "    logging.info(f\"Final input shape for CNN: {input_data.shape}\")",
+            "    ",
+            "    # Run inference",
+            "    output = model.run(None, {\"float_input\": input_data})[0]",
+            "    logging.info(f\"Model output shape: {output.shape}\")",
+            "    ",
+            "    # UPDATED: Handle multi-class CNN output properly",
+            "    if output.ndim == 4:",
+            "        if output.shape[1] == 1:",
+            "            # Single class output (regression or binary classification)",
+            "            output_2d = output[0, 0]",
+            "            logging.info(\"Single channel output detected\")",
+            "        else:",
+            "            # Multi-class output - convert logits/probabilities to class predictions",
+            "            output_classes = np.argmax(output, axis=1)  # Shape: (1, height, width)",
+            "            output_2d = output_classes[0]  # Shape: (height, width)",
+            "            ",
+            "            # Apply class merging: merge class 6 into class 3",
+            "            output_2d = np.where(output_2d == 6, 3, output_2d)",
+            "            ",
+            "            logging.info(f\"Multi-class output processed. Original classes: {output.shape[1]}\")",
+            "            logging.info(f\"Unique classes in output: {np.unique(output_2d)}\")",
+            "            logging.info(f\"Class distribution: {np.bincount(output_2d.flatten())}\")",
+            "    elif output.ndim == 3:",
+            "        # Remove batch dimension",
+            "        output_2d = output[0]",
+            "        logging.info(\"3D output detected, removed batch dimension\")",
+            "    else:",
+            "        # Handle other cases",
+            "        output_2d = np.squeeze(output)",
+            "        if output_2d.ndim != 2:",
+            "            logging.error(f\"Cannot process output shape: {output.shape}\")",
+            "            logging.error(f\"After squeeze: {output_2d.shape}\")",
+            "            raise ValueError(f\"Unexpected output shape after processing: {output_2d.shape}\")",
+            "        logging.info(\"Applied squeeze to output\")",
+            "    ",
+            "    # Ensure output is 2D",
+            "    if output_2d.ndim != 2:",
+            "        raise ValueError(f\"Final output must be 2D, got shape: {output_2d.shape}\")",
+            "    ",
+            "    # Determine output timestamp (use the latest timestamp)",
+            "    output_timestamp = time_coords[-1]",
+            "    ",
+            "    # Get spatial coordinates from reference array",
+            "    spatial_coords = {dim: reference_array.coords[dim] for dim in spatial_dims}",
+            "    ",
+            "    # Create output DataArray with appropriate data type",
+            "    # Use int32 for classification, float32 for regression",
+            "    is_multiclass = output.ndim == 4 and output.shape[1] > 1",
+            "    if is_multiclass:",
+            "        # Multi-class classification - use integer type",
+            "        output_dtype = np.int32",
+            "        output_type = 'classification'",
+            "    else:",
+            "        # Single output - use float type",
+            "        output_dtype = np.float32",
+            "        output_type = 'regression'",
+            "    ",
+            "    result = xr.DataArray(",
+            "        data=np.expand_dims(output_2d.astype(output_dtype), axis=0),",
+            "        dims=['time'] + spatial_dims,",
+            "        coords={",
+            "            'time': [output_timestamp.values],",
+            "            spatial_dims[0]: spatial_coords[spatial_dims[0]].values,",
+            "            spatial_dims[1]: spatial_coords[spatial_dims[1]].values",
+            "        },",
+            "        attrs={",
+            "            'description': 'CNN model prediction',",
+            "        }",
+            "    )",
+            "    ",
+            "    logging.info(f\"Final result shape: {result.shape}\")",
+            "    logging.info(f\"Final result data type: {result.dtype}\")",
+            "    logging.info(f\"Final result value range: {result.values.min()} to {result.values.max()}\")",
+        ])
+        # Add postprocessing call if postprocessing exists
+        if postprocessing_section:
+            script_lines.extend([
+                "    # Apply postprocessing",
+                "    result = postprocessing(result)",
+                "    ",
+            ])
+        # Single return statement at the end
+        script_lines.append("    return result")
+        return "\n".join(script_lines)
     @require_api_key
     def _upload_script_to_bucket(self, script_content: str, script_name: str, model_training_job_name: str, uid: str):

terrakio-core 0.3.9__py3-none-any.whl → 0.4.2__py3-none-any.whl

Potentially problematic release.

terrakio-core 0.3.9py3-none-any.whl → 0.4.2py3-none-any.whl