tracdap-runtime 0.5.28__py3-none-any.whl → 0.5.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tracdap/rt/_impl/data.py CHANGED
@@ -74,11 +74,7 @@ class DataView:
74
74
 
75
75
 
76
76
  class _DataInternal:
77
-
78
- @staticmethod
79
- def float_dtype_check():
80
- if "Float64Dtype" not in pd.__dict__:
81
- raise _ex.EStartup("TRAC D.A.P. requires Pandas >= 1.2")
77
+ pass
82
78
 
83
79
 
84
80
  class DataMapping:
@@ -111,8 +107,40 @@ class DataMapping:
111
107
  }
112
108
 
113
109
  # Check the Pandas dtypes for handling floats are available before setting up the type mapping
114
- __PANDAS_FLOAT_DTYPE_CHECK = _DataInternal.float_dtype_check()
115
- __PANDAS_DATETIME_TYPE = pd.to_datetime([]).dtype
110
+ __PANDAS_VERSION_ELEMENTS = pd.__version__.split(".")
111
+ __PANDAS_MAJOR_VERSION = int(__PANDAS_VERSION_ELEMENTS[0])
112
+ __PANDAS_MINOR_VERSION = int(__PANDAS_VERSION_ELEMENTS[1])
113
+
114
+ if __PANDAS_MAJOR_VERSION == 2:
115
+
116
+ __PANDAS_DATE_TYPE = pd.to_datetime([dt.date(2000, 1, 1)]).as_unit(__TRAC_TIMESTAMP_UNIT).dtype
117
+ __PANDAS_DATETIME_TYPE = pd.to_datetime([dt.datetime(2000, 1, 1, 0, 0, 0)]).as_unit(__TRAC_TIMESTAMP_UNIT).dtype
118
+
119
+ @classmethod
120
+ def __pandas_datetime_type(cls, tz, unit):
121
+ if tz is None and unit is None:
122
+ return cls.__PANDAS_DATETIME_TYPE
123
+ _unit = unit if unit is not None else cls.__TRAC_TIMESTAMP_UNIT
124
+ if tz is None:
125
+ return pd.to_datetime([dt.datetime(2000, 1, 1, 0, 0, 0)]).as_unit(_unit).dtype
126
+ else:
127
+ return pd.DatetimeTZDtype(tz=tz, unit=_unit)
128
+
129
+ # Minimum supported version for Pandas is 1.2, when pd.Float64Dtype was introduced
130
+ elif __PANDAS_MAJOR_VERSION == 1 and __PANDAS_MINOR_VERSION >= 2:
131
+
132
+ __PANDAS_DATE_TYPE = pd.to_datetime([dt.date(2000, 1, 1)]).dtype
133
+ __PANDAS_DATETIME_TYPE = pd.to_datetime([dt.datetime(2000, 1, 1, 0, 0, 0)]).dtype
134
+
135
+ @classmethod
136
+ def __pandas_datetime_type(cls, tz, unit): # noqa
137
+ if tz is None:
138
+ return cls.__PANDAS_DATETIME_TYPE
139
+ else:
140
+ return pd.DatetimeTZDtype(tz=tz)
141
+
142
+ else:
143
+ raise _ex.EStartup(f"Pandas version not supported: [{pd.__version__}]")
116
144
 
117
145
  # Only partial mapping is possible, decimal and temporal dtypes cannot be mapped this way
118
146
  __ARROW_TO_PANDAS_TYPE_MAPPING = {
@@ -224,8 +252,12 @@ class DataMapping:
224
252
  cls.__TRAC_DECIMAL_SCALE)
225
253
 
226
254
  @classmethod
227
- def pandas_datetime_type(cls):
228
- return cls.__PANDAS_DATETIME_TYPE
255
+ def pandas_date_type(cls):
256
+ return cls.__PANDAS_DATE_TYPE
257
+
258
+ @classmethod
259
+ def pandas_datetime_type(cls, tz=None, unit=None):
260
+ return cls.__pandas_datetime_type(tz, unit)
229
261
 
230
262
  @classmethod
231
263
  def view_to_pandas(
@@ -297,7 +329,8 @@ class DataMapping:
297
329
  else:
298
330
  DataConformance.check_duplicate_fields(table.schema.names, False)
299
331
 
300
- return table.to_pandas(
332
+ # Use Arrow's built-in function to convert to Pandas
333
+ df_from_arrow = table.to_pandas(
301
334
 
302
335
  # Mapping for arrow -> pandas types for core types
303
336
  types_mapper=cls.__ARROW_TO_PANDAS_TYPE_MAPPING.get,
@@ -313,6 +346,33 @@ class DataMapping:
313
346
  # This is a significant performance win for very wide datasets
314
347
  split_blocks=True) # noqa
315
348
 
349
+ # Arrow 12 doesn't support the new precision handling for datetime values supported in Pandas 2
350
+ # However Arrow 13 dropped support for Python 3.7, which is a requirement for the TRAC 0.5.x series
351
+ # So to backport Pandas 2 support, special handling is needed for datetime fields when using Pandas 2
352
+ # This is not needed from TRAC 0.6 onward, which upgrades to Arrow 13 and drops Python 3.7 support
353
+ # Also it is not needed if the temporal objects flag is set, since it only affects NumPy datetime64
354
+
355
+ if cls.__PANDAS_MAJOR_VERSION == 2 and not temporal_objects_flag:
356
+ # Use table.schema, it is always present and has been normalized if a separate schema was supplied
357
+ return cls._fix_pandas_2_datetime_precision(df_from_arrow, table.schema)
358
+ else:
359
+ return df_from_arrow
360
+
361
+ @classmethod
362
+ def _fix_pandas_2_datetime_precision(cls, df: pd.DataFrame, schema: pa.Schema) -> pd.DataFrame:
363
+
364
+ for field in schema:
365
+ if pa.types.is_date(field.type):
366
+ dtype = cls.__PANDAS_DATE_TYPE
367
+ if df[field.name].dtype != dtype:
368
+ df[field.name] = df[field.name].astype(dtype)
369
+ if pa.types.is_timestamp(field.type):
370
+ dtype = cls.__pandas_datetime_type(field.type.tz, field.type.unit)
371
+ if df[field.name].dtype != dtype:
372
+ df[field.name] = df[field.name].astype(dtype)
373
+
374
+ return df
375
+
316
376
  @classmethod
317
377
  def pandas_to_arrow(cls, df: pd.DataFrame, schema: tp.Optional[pa.Schema] = None) -> pa.Table:
318
378
 
@@ -463,7 +523,7 @@ class DataConformance:
463
523
 
464
524
  table_column: pa.Array = table.column(table_index)
465
525
 
466
- pandas_type = pandas_types[table_index] \
526
+ pandas_type = pandas_types.iloc[table_index] \
467
527
  if pandas_types is not None \
468
528
  else None
469
529
 
@@ -691,16 +751,20 @@ class DataConformance:
691
751
  @classmethod
692
752
  def _coerce_date(cls, vector: pa.Array, field: pa.Field, pandas_type=None) -> pa.Array:
693
753
 
694
- # Allow casting date32 -> date64, both range and precision are greater so there is no data loss
754
+ # The bit-width restriction could be removed here
755
+ # For date types there is never loss of precision and pa.cast will raise an error on overflow
756
+ # Impact to client code is unlikely, still this change should happen with a TRAC minor version update
695
757
  if pa.types.is_date(vector.type):
696
758
  if field.type.bit_width >= vector.type.bit_width:
697
759
  return pc.cast(vector, field.type)
698
760
 
699
- # Special handling for Pandas/NumPy date values
700
- # These are encoded as np.datetime64[ns] in Pandas -> pa.timestamp64[ns] in Arrow
701
- # Only allow this conversion if the vector is coming from Pandas with datetime type
702
- if pandas_type == DataMapping.pandas_datetime_type():
703
- if pa.types.is_timestamp(vector.type) and vector.type.unit == "ns":
761
+ # Special handling for date values coming from Pandas/NumPy
762
+ # Only allow these conversions if the vector is supplied with Pandas type info
763
+ # For Pandas 1.x, dates are always encoded as np.datetime64[ns]
764
+ # For Pandas 2.x dates are still np.datetime64 but can be in s, ms, us or ns
765
+ # This conversion will not apply to dates held in Pandas using the Python date object types
766
+ if pandas_type is not None:
767
+ if pa.types.is_timestamp(vector.type) and pd.api.types.is_datetime64_any_dtype(pandas_type):
704
768
  return pc.cast(vector, field.type)
705
769
 
706
770
  error_message = cls._format_error(cls.__E_WRONG_DATA_TYPE, vector, field)
tracdap/rt/_version.py CHANGED
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.5.28"
15
+ __version__ = "0.5.30"
@@ -1,10 +1,5 @@
1
1
  # Code generated by TRAC
2
2
 
3
- from .result import TagUpdateList
4
- from .result import JobResult
5
-
6
- from .job import JobConfig
7
-
8
3
  from .common import _ConfigFile
9
4
  from .common import PluginConfig
10
5
  from .common import PlatformInfo
@@ -22,9 +17,6 @@ from .platform import InstanceConfig
22
17
  from .platform import ServiceMap
23
18
  from .platform import ServiceConfig
24
19
 
25
- from .runtime import RuntimeConfig
26
- from .runtime import SparkSettings
27
-
28
20
  from .gateway import GwProtocol
29
21
  from .gateway import GwRestMapping
30
22
  from .gateway import GatewayConfig
@@ -33,3 +25,11 @@ from .gateway import GwMatch
33
25
  from .gateway import GwTarget
34
26
  from .gateway import GwServiceMap
35
27
  from .gateway import GwService
28
+
29
+ from .result import TagUpdateList
30
+ from .result import JobResult
31
+
32
+ from .runtime import RuntimeConfig
33
+ from .runtime import SparkSettings
34
+
35
+ from .job import JobConfig
@@ -13,13 +13,17 @@ from .type import Value
13
13
  from .type import ArrayValue
14
14
  from .type import MapValue
15
15
 
16
- from .tag_update import TagOperation
17
- from .tag_update import TagUpdate
18
-
19
16
  from .object_id import ObjectType
20
17
  from .object_id import TagHeader
21
18
  from .object_id import TagSelector
22
19
 
20
+ from .search import SearchOperator
21
+ from .search import LogicalOperator
22
+ from .search import SearchTerm
23
+ from .search import LogicalExpression
24
+ from .search import SearchExpression
25
+ from .search import SearchParameters
26
+
23
27
  from .data import SchemaType
24
28
  from .data import PartType
25
29
  from .data import FieldSchema
@@ -28,18 +32,23 @@ from .data import SchemaDefinition
28
32
  from .data import PartKey
29
33
  from .data import DataDefinition
30
34
 
35
+ from .file import FileDefinition
36
+
37
+ from .stoarge import CopyStatus
38
+ from .stoarge import IncarnationStatus
39
+ from .stoarge import StorageCopy
40
+ from .stoarge import StorageIncarnation
41
+ from .stoarge import StorageItem
42
+ from .stoarge import StorageDefinition
43
+
44
+ from .tag_update import TagOperation
45
+ from .tag_update import TagUpdate
46
+
31
47
  from .model import ModelParameter
32
48
  from .model import ModelInputSchema
33
49
  from .model import ModelOutputSchema
34
50
  from .model import ModelDefinition
35
51
 
36
- from .search import SearchOperator
37
- from .search import LogicalOperator
38
- from .search import SearchTerm
39
- from .search import LogicalExpression
40
- from .search import SearchExpression
41
- from .search import SearchParameters
42
-
43
52
  from .flow import FlowNodeType
44
53
  from .flow import FlowNode
45
54
  from .flow import FlowSocket
@@ -53,17 +62,8 @@ from .job import RunModelJob
53
62
  from .job import RunFlowJob
54
63
  from .job import ImportModelJob
55
64
 
56
- from .file import FileDefinition
57
-
58
65
  from .custom import CustomDefinition
59
66
 
60
- from .stoarge import CopyStatus
61
- from .stoarge import IncarnationStatus
62
- from .stoarge import StorageCopy
63
- from .stoarge import StorageIncarnation
64
- from .stoarge import StorageItem
65
- from .stoarge import StorageDefinition
66
-
67
67
  from .object import ObjectDefinition
68
68
 
69
69
  from .tag import Tag
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tracdap-runtime
3
- Version: 0.5.28
3
+ Version: 0.5.30
4
4
  Summary: Runtime package for building models on the TRAC Data & Analytics Platform
5
5
  Home-page: https://tracdap.finos.org/
6
6
  Author: Martin Traverse
@@ -16,16 +16,17 @@ Classifier: Operating System :: OS Independent
16
16
  Requires-Python: <3.12,>=3.7
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: protobuf (==4.21.10)
20
- Requires-Dist: pyarrow (==10.0.1)
21
- Requires-Dist: pyyaml (==6.0.0)
22
- Requires-Dist: dulwich (==0.21.2)
23
- Requires-Dist: requests (==2.28.1)
24
- Requires-Dist: pandas (<1.6.0,>=1.2.0)
19
+ Requires-Dist: protobuf ==4.23.2
20
+ Requires-Dist: pyarrow ==12.0.1
21
+ Requires-Dist: pyyaml ==6.0.0
22
+ Requires-Dist: dulwich ==0.21.2
23
+ Requires-Dist: requests ==2.31.0
24
+ Requires-Dist: pandas <2.2.0,>=1.2.0
25
+ Requires-Dist: numpy <2.0.0
25
26
  Provides-Extra: aws
26
- Requires-Dist: boto3 (==1.26.22) ; extra == 'aws'
27
+ Requires-Dist: boto3 ==1.26.22 ; extra == 'aws'
27
28
  Provides-Extra: spark
28
- Requires-Dist: pyspark (<3.4.0,>=2.4.0) ; extra == 'spark'
29
+ Requires-Dist: pyspark <3.4.0,>=2.4.0 ; extra == 'spark'
29
30
 
30
31
  # TRAC Model Runtime for Python
31
32
 
@@ -46,7 +47,7 @@ Documentation for the TRAC platform is available on our website at
46
47
  The TRAC runtime for Python has these requirements:
47
48
 
48
49
  * Python: 3.7 up to 3.11.x
49
- * Pandas: 1.2 up to 1.5.x
50
+ * Pandas: 1.2 up to 2.1.x
50
51
  * PySpark 2.4.x, or 3.0 up to 3.3.x
51
52
 
52
53
  Not every combination of versions will work, e.g. PySpark 3 requires Python 3.8.
@@ -1,5 +1,5 @@
1
1
  tracdap/rt/__init__.py,sha256=rz9ERpKMlnR4LFZNGLtdNE26B_Y2V168bdd8hRmasKk,643
2
- tracdap/rt/_version.py,sha256=WibObUtJ9RYv5ay-pWo3yOkMVZvwdPKOOw1e_OoUKbQ,632
2
+ tracdap/rt/_version.py,sha256=krI77g3AnXMgUfWuNKj9uat-Gv5TDR8WCP6P9Sb8y4E,632
3
3
  tracdap/rt/exceptions.py,sha256=7nLYOOquROJwMBHJAoPHUbmrwp9jkT7LuR6lCsc7Am8,7730
4
4
  tracdap/rt/_exec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  tracdap/rt/_exec/actors.py,sha256=faZ7hCxsONLbjQKrFRZv-wL_aXaKjsgbLOHDI0uUXDc,32242
@@ -12,7 +12,7 @@ tracdap/rt/_exec/graph_builder.py,sha256=5vDjgLuzt8HrZp0c-U5CQ8dm_cwebEgeYfgR2ht
12
12
  tracdap/rt/_exec/runtime.py,sha256=MCfpEBwbh1vs5Ir4vOOK_R2z6XVaK-teAm2Iz3pAmLo,13708
13
13
  tracdap/rt/_impl/__init__.py,sha256=eBZMpgFx9uHJxMA-yMWHCQMvShTKxJRAizdz4vy5eWg,609
14
14
  tracdap/rt/_impl/config_parser.py,sha256=udnYRafOcP3aQSVXSfjCc70gOEbh7MxJpR1v0wqnBUw,15263
15
- tracdap/rt/_impl/data.py,sha256=GXe1CSe30G8S4TBOz76oEQVTxrSgUr5AjrnhZVULWYI,30498
15
+ tracdap/rt/_impl/data.py,sha256=I88rzp__hmrae288-oRd8Ozo5Y9BOGODBEWYD3ShWyQ,33763
16
16
  tracdap/rt/_impl/guard_rails.py,sha256=eZbhmBvAY-6SP-Y1CM33bMOQu4xLmAjDVctSR-yocZk,10724
17
17
  tracdap/rt/_impl/models.py,sha256=BWTL7y1fcSWCOxBn7f4NLpzcNrPABzP9hC7bWTVT2wc,9020
18
18
  tracdap/rt/_impl/repos.py,sha256=bdpUkBbNOIQOKJQGOhCDdXUMz2bY91YNIYlKRKTtM4Y,2063
@@ -37,7 +37,7 @@ tracdap/rt/api/__init__.py,sha256=rOiUwK6sav9QTXohpZAFXJ9MgsL0KBfUyKA7dYspfGQ,11
37
37
  tracdap/rt/api/hook.py,sha256=3gc90J2mzp27koDysHvl8HwiSePw0hjg1BED3VhZVR4,4033
38
38
  tracdap/rt/api/model_api.py,sha256=S2Dz7cre5pFIK3jw8Qlv6D90bv6KSULdbPkMBurEkAM,15402
39
39
  tracdap/rt/api/static_api.py,sha256=6wwsF6nQG56NPocAs8VUUN5zL1fVFLDDOvcqW-dbhvo,20725
40
- tracdap/rt/config/__init__.py,sha256=7O8x6B3InoUk_Ujwy3c6pk50QUAf3hra0yX4zQCMKBs,991
40
+ tracdap/rt/config/__init__.py,sha256=doDFsgsVexgXHsGYtigdIwf_0UfyqZuD3QZOqQGY3GI,991
41
41
  tracdap/rt/config/common.py,sha256=p5K6W6PVfTB9Ag3VXsM0HAhk4lW-_owaFmmELsoPkSI,1026
42
42
  tracdap/rt/config/common_pb2.py,sha256=2x6--pFrp1pwJkZNwCiBik6kTJ32CdzFD7DjZDmnrBY,3732
43
43
  tracdap/rt/config/gateway.py,sha256=Uz1ihOTHGLU0GBBIHAaZCV0ufyyHQJtQnAHeqD1dyOM,1551
@@ -60,7 +60,7 @@ tracdap/rt/launch/__init__.py,sha256=Zz_4f_ODsmweCxRmG2Dq1Slpb927jSugYclfF_Wgfws
60
60
  tracdap/rt/launch/__main__.py,sha256=9UVYYSsqvvMVOqjjBBeLNdzV_6IeIa_97KWOMXIpXY4,654
61
61
  tracdap/rt/launch/cli.py,sha256=giC30Dffz9eYd-7fDQqTZ65OXDDf0OPmKb6NV_meePE,2196
62
62
  tracdap/rt/launch/launch.py,sha256=ap6PWdIS4eg43aCK2A-bdAoXS75ZUIT3Kt-6Uc-4eNw,4497
63
- tracdap/rt/metadata/__init__.py,sha256=jYz1JNLb3WtIqUjUUjHbt-dhE0Jl_AvsxZzFy-tut-Y,1779
63
+ tracdap/rt/metadata/__init__.py,sha256=hm_6z9Dr8lCyOLzcqo69xtWTg0KkiCKH0I4Mnb1Umlc,1779
64
64
  tracdap/rt/metadata/common.py,sha256=HIOphjbX_y7gRpERThb0Hrgiij1qaSc9XmvcWiuE3nI,1453
65
65
  tracdap/rt/metadata/common_pb2.py,sha256=l3PJBc6jOoPD8o6KFJMoDYHMMQ5S3HyuTH2lFTfGPF4,1710
66
66
  tracdap/rt/metadata/custom.py,sha256=bNG2Fy8uYz1qE8idgN1h4EHkecV95BG0ruEl7XUWvRU,295
@@ -89,8 +89,8 @@ tracdap/rt/metadata/tag_update.py,sha256=gSNG1Nv4mIhcmBcuMa5P6pmuj_R4DwHc9R7riSw
89
89
  tracdap/rt/metadata/tag_update_pb2.py,sha256=Fcp59cwjzlPWnjoOxIBxVLA6frEVHhqwg-_NntqLu-Y,1816
90
90
  tracdap/rt/metadata/type.py,sha256=a11dBdO8Cp8fkXfS9Xom1IDqz3BQZenzvGY20V855QE,9402
91
91
  tracdap/rt/metadata/type_pb2.py,sha256=RooCizEbIXEA_UJ0i1qd2mtBY0SjcA_aJWuJ8qNhNV8,3920
92
- tracdap_runtime-0.5.28.dist-info/LICENSE,sha256=Q5Gh9SdMNa_F2ehQRShh7dJBz6qW_EQFtWzLukOWFWY,11365
93
- tracdap_runtime-0.5.28.dist-info/METADATA,sha256=oerdnj0vHdvY3ownivvq8oAPOqdxojfyHS2RAYQmW0g,4129
94
- tracdap_runtime-0.5.28.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
95
- tracdap_runtime-0.5.28.dist-info/top_level.txt,sha256=Uv0JfaE1Lp4JnCzqW8lqXNJAEcsAFpAUGOghJolVNdM,8
96
- tracdap_runtime-0.5.28.dist-info/RECORD,,
92
+ tracdap_runtime-0.5.30.dist-info/LICENSE,sha256=Q5Gh9SdMNa_F2ehQRShh7dJBz6qW_EQFtWzLukOWFWY,11365
93
+ tracdap_runtime-0.5.30.dist-info/METADATA,sha256=z5roQQrBYaWoOIAVxt8luqd1KqZ81AnHUiuquhhb7VY,4140
94
+ tracdap_runtime-0.5.30.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
95
+ tracdap_runtime-0.5.30.dist-info/top_level.txt,sha256=Uv0JfaE1Lp4JnCzqW8lqXNJAEcsAFpAUGOghJolVNdM,8
96
+ tracdap_runtime-0.5.30.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.40.0)
2
+ Generator: setuptools (70.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5