teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (151) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +193 -1
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +25 -18
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  8. teradataml/analytics/sqle/__init__.py +20 -2
  9. teradataml/analytics/utils.py +15 -1
  10. teradataml/analytics/valib.py +18 -4
  11. teradataml/automl/__init__.py +341 -112
  12. teradataml/automl/autodataprep/__init__.py +471 -0
  13. teradataml/automl/data_preparation.py +84 -42
  14. teradataml/automl/data_transformation.py +69 -33
  15. teradataml/automl/feature_engineering.py +76 -9
  16. teradataml/automl/feature_exploration.py +639 -25
  17. teradataml/automl/model_training.py +35 -14
  18. teradataml/clients/auth_client.py +2 -2
  19. teradataml/common/__init__.py +1 -2
  20. teradataml/common/constants.py +122 -63
  21. teradataml/common/messagecodes.py +14 -3
  22. teradataml/common/messages.py +8 -4
  23. teradataml/common/sqlbundle.py +40 -10
  24. teradataml/common/utils.py +366 -74
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +348 -86
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/apriori_example.json +22 -0
  29. teradataml/data/byom_example.json +11 -0
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  37. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  38. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  39. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  40. teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
  41. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  42. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  43. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  45. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  46. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  47. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  48. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  49. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  51. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  52. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  53. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  54. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  55. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  56. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  57. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  58. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  59. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  60. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  61. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  62. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  63. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  64. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  65. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  66. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  67. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  68. teradataml/data/hnsw_alter_data.csv +5 -0
  69. teradataml/data/hnsw_data.csv +10 -0
  70. teradataml/data/jsons/byom/h2opredict.json +1 -1
  71. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  72. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  73. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  74. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  75. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  76. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  77. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  78. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  79. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  80. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  81. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  82. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  83. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  84. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  85. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  86. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  87. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  88. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  89. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  90. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  91. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  92. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  93. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
  94. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
  95. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
  96. teradataml/data/ner_dict.csv +8 -0
  97. teradataml/data/ner_input_eng.csv +7 -0
  98. teradataml/data/ner_rule.csv +5 -0
  99. teradataml/data/pos_input.csv +40 -0
  100. teradataml/data/tdnerextractor_example.json +14 -0
  101. teradataml/data/teradataml_example.json +21 -0
  102. teradataml/data/textmorph_example.json +5 -0
  103. teradataml/data/to_num_data.csv +4 -0
  104. teradataml/data/tochar_data.csv +5 -0
  105. teradataml/data/trans_dense.csv +16 -0
  106. teradataml/data/trans_sparse.csv +55 -0
  107. teradataml/data/vectordistance_example.json +1 -1
  108. teradataml/dataframe/copy_to.py +45 -29
  109. teradataml/dataframe/data_transfer.py +72 -46
  110. teradataml/dataframe/dataframe.py +642 -166
  111. teradataml/dataframe/dataframe_utils.py +167 -22
  112. teradataml/dataframe/functions.py +135 -20
  113. teradataml/dataframe/setop.py +11 -6
  114. teradataml/dataframe/sql.py +330 -78
  115. teradataml/dbutils/dbutils.py +556 -140
  116. teradataml/dbutils/filemgr.py +14 -10
  117. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  118. teradataml/lib/aed_0_1.dll +0 -0
  119. teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
  120. teradataml/opensource/_class.py +141 -17
  121. teradataml/opensource/{constants.py → _constants.py} +7 -3
  122. teradataml/opensource/_lightgbm.py +52 -53
  123. teradataml/opensource/_sklearn.py +1008 -0
  124. teradataml/opensource/_wrapper_utils.py +5 -5
  125. teradataml/options/__init__.py +47 -15
  126. teradataml/options/configure.py +103 -26
  127. teradataml/options/display.py +13 -2
  128. teradataml/plot/axis.py +47 -8
  129. teradataml/plot/figure.py +33 -0
  130. teradataml/plot/plot.py +63 -13
  131. teradataml/scriptmgmt/UserEnv.py +307 -40
  132. teradataml/scriptmgmt/lls_utils.py +428 -145
  133. teradataml/store/__init__.py +2 -3
  134. teradataml/store/feature_store/feature_store.py +102 -7
  135. teradataml/table_operators/Apply.py +48 -19
  136. teradataml/table_operators/Script.py +23 -2
  137. teradataml/table_operators/TableOperator.py +3 -1
  138. teradataml/table_operators/table_operator_util.py +58 -9
  139. teradataml/utils/dtypes.py +49 -1
  140. teradataml/utils/internal_buffer.py +38 -0
  141. teradataml/utils/validators.py +377 -62
  142. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
  143. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
  144. teradataml/data/SQL_Fundamentals.pdf +0 -0
  145. teradataml/libaed_0_1.dylib +0 -0
  146. teradataml/libaed_0_1.so +0 -0
  147. teradataml/opensource/sklearn/__init__.py +0 -0
  148. teradataml/store/vector_store/__init__.py +0 -1586
  149. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
  150. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
  151. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
@@ -5,9 +5,8 @@ TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
5
5
  Primary Owner: pradeep.garre@teradata.com
6
6
  Secondary Owner: aanchal.kavedia@teradata.com
7
7
 
8
- This file imports components from Feature Store and Vector Store.
8
+ This file imports components from Feature Store.
9
9
  """
10
10
 
11
11
  from teradataml.store.feature_store.feature_store import FeatureStore
12
- from teradataml.store.feature_store.models import *
13
- from teradataml.store.vector_store import *
12
+ from teradataml.store.feature_store.models import *
@@ -66,8 +66,7 @@ class FeatureStore:
66
66
  self.__df_container = {}
67
67
 
68
68
  # Store the table names here. Then use this where ever required.
69
- self.__table_names = {name: UtilFuncs._get_qualified_table_name(self.__repo, table_name)
70
- for name, table_name in EFS_TABLES.items()}
69
+ self.__table_names = EFS_TABLES
71
70
 
72
71
  # Declare getter's for getting the corresponding DataFrame's.
73
72
  self.__get_features_df = lambda : self.__get_obj_df("feature")
@@ -1560,6 +1559,7 @@ class FeatureStore:
1560
1559
  table_name = self.__table_names["{}_staging".format(type_)]
1561
1560
 
1562
1561
  res = _delete_data(table_name=table_name,
1562
+ schema_name=self.__repo,
1563
1563
  delete_conditions=(Col("name") == name)
1564
1564
  )
1565
1565
 
@@ -1606,11 +1606,13 @@ class FeatureStore:
1606
1606
 
1607
1607
  # remove it from xref table first.
1608
1608
  _delete_data(table_name=ent_table_xref,
1609
+ schema_name=self.__repo,
1609
1610
  delete_conditions=(Col("entity_name") == name)
1610
1611
  )
1611
1612
 
1612
1613
  # remove from entity table.
1613
1614
  res = _delete_data(table_name=ent_table,
1615
+ schema_name=self.__repo,
1614
1616
  delete_conditions=(Col("name") == name)
1615
1617
  )
1616
1618
 
@@ -1758,6 +1760,96 @@ class FeatureStore:
1758
1760
  """
1759
1761
  return self.__remove_obj(name=feature, type_="feature")
1760
1762
 
1763
+ def delete(self):
1764
+ """
1765
+ DESCRIPTION:
1766
+ Removes the FeatureStore and its components from repository.
1767
+ Notes:
1768
+ * The function removes all the associated database objects along with data.
1769
+ Be cautious while using this function.
1770
+ * The function tries to remove the underlying Database also once
1771
+ all the Feature Store objects are removed.
1772
+ * The user must have permission on the database used by this Feature Store
1773
+ * to drop triggers.
1774
+ * to drop the tables.
1775
+ * to drop the Database.
1776
+ * If the user lacks any of the mentioned permissions, Teradata recommends
1777
+ to not use this function.
1778
+
1779
+ PARAMETERS:
1780
+ None
1781
+
1782
+ RETURNS:
1783
+ bool.
1784
+
1785
+ RAISES:
1786
+ None
1787
+
1788
+ EXAMPLES:
1789
+ # Setup FeatureStore for repo 'vfs_v1'.
1790
+ >>> from teradataml import FeatureStore
1791
+ >>> fs = FeatureStore("vfs_v1")
1792
+ >>> fs.setup()
1793
+ True
1794
+ >>> # Delete FeatureStore.
1795
+ >>> fs.delete()
1796
+ True
1797
+ >>>
1798
+ """
1799
+ confirmation = input("The function removes Feature Store and drops the "
1800
+ "corresponding repo also. Are you sure you want to proceed? (Y/N): ")
1801
+
1802
+ if confirmation in ["Y", "y"]:
1803
+ return self.__drop_feature_store_objects(self.__repo)
1804
+
1805
+ return False
1806
+
1807
+ @staticmethod
1808
+ def __drop_feature_store_objects(repo_name):
1809
+ """
1810
+ DESCRIPTION:
1811
+ Removes the FeatureStore and it's components from repository.
1812
+
1813
+ PARAMETERS:
1814
+ repo_name:
1815
+ Required Argument.
1816
+ Specifies the name of the repository.
1817
+ Types: str
1818
+
1819
+ RETURNS:
1820
+ bool
1821
+ """
1822
+ # Drop all the tables and staging tables.
1823
+ tables_ = [
1824
+ EFS_GROUP_FEATURES_SPEC["table_name"],
1825
+ EFS_FEATURE_GROUP_SPEC["table_name"],
1826
+ EFS_FEATURES_SPEC['table_name'],
1827
+ EFS_ENTITY_XREF_SPEC['table_name'],
1828
+ EFS_ENTITY_SPEC["table_name"],
1829
+ EFS_DATA_SOURCE_SPEC["table_name"]
1830
+ ]
1831
+
1832
+ tables_stg_ = [
1833
+ EFS_FEATURES_STAGING_SPEC['table_name'],
1834
+ EFS_ENTITY_STAGING_SPEC["table_name"],
1835
+ EFS_ENTITY_XREF_STAGING_SPEC["table_name"],
1836
+ EFS_DATA_SOURCE_STAGING_SPEC["table_name"],
1837
+ EFS_FEATURE_GROUP_STAGING_SPEC["table_name"],
1838
+ EFS_GROUP_FEATURES_STAGING_SPEC["table_name"]
1839
+ ]
1840
+
1841
+ # Drop all the triggers first. So that tables can be dropped.
1842
+ triggers = ["{}_trg".format(table) for table in tables_]
1843
+ for trigger in triggers:
1844
+ execute_sql("drop trigger {}.{}".format(repo_name, trigger))
1845
+
1846
+ for table in (tables_ + [EFS_VERSION_SPEC["table_name"]] + tables_stg_):
1847
+ db_drop_table(table, schema_name=repo_name)
1848
+
1849
+ execute_sql("DROP DATABASE {}".format(repo_name))
1850
+
1851
+ return True
1852
+
1761
1853
  def delete_feature(self, feature):
1762
1854
  """
1763
1855
  DESCRIPTION:
@@ -2134,10 +2226,12 @@ class FeatureStore:
2134
2226
 
2135
2227
  # Remove data for FeatureGroup.
2136
2228
  _delete_data(table_name=self.__table_names["group_features_staging"],
2229
+ schema_name=self.__repo,
2137
2230
  delete_conditions=(Col("group_name") == fg_name)
2138
2231
  )
2139
2232
 
2140
2233
  res = _delete_data(table_name=self.__table_names["feature_group_staging"],
2234
+ schema_name=self.__repo,
2141
2235
  delete_conditions=(Col("name") == fg_name)
2142
2236
  )
2143
2237
 
@@ -2175,25 +2269,26 @@ class FeatureStore:
2175
2269
  fs.__get_features_df()
2176
2270
  """
2177
2271
  if obj_type not in self.__df_container:
2272
+ from teradataml.dataframe.dataframe import in_schema
2178
2273
 
2179
2274
  # For feature or feature_staging, join it with xref table
2180
2275
  # so group name appears while listing features.
2181
2276
  map_ = {"feature": "group_features", "feature_staging": "group_features_staging"}
2182
2277
  if obj_type in map_:
2183
- features = DataFrame(self.__table_names[obj_type])
2184
- features_xref = DataFrame(self.__table_names[map_[obj_type]]).select(
2278
+ features = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
2279
+ features_xref = DataFrame(in_schema(self.__repo, self.__table_names[map_[obj_type]])).select(
2185
2280
  ["feature_name", "group_name"])
2186
2281
  df = features.join(features_xref, on="name==feature_name", how='left')
2187
2282
  self.__df_container[obj_type] = df.select(features.columns+["group_name"])
2188
2283
  # For entity, join with xref table.
2189
2284
  elif obj_type == "entity" or obj_type == "entity_staging":
2190
- ent_df = DataFrame(self.__table_names[obj_type])
2191
- xref_df = DataFrame(self.__table_names["{}_xref".format(obj_type)]).select(
2285
+ ent_df = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
2286
+ xref_df = DataFrame(in_schema(self.__repo, self.__table_names["{}_xref".format(obj_type)])).select(
2192
2287
  ['entity_name', 'entity_column'])
2193
2288
  df = ent_df.join(xref_df, on="name==entity_name", how="inner")
2194
2289
  self.__df_container[obj_type] = df.select(ent_df.columns+["entity_column"])
2195
2290
  else:
2196
- self.__df_container[obj_type] = DataFrame(self.__table_names[obj_type])
2291
+ self.__df_container[obj_type] = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
2197
2292
 
2198
2293
  return self.__df_container[obj_type]
2199
2294
 
@@ -51,7 +51,8 @@ class Apply(TableOperator):
51
51
  data_order_column=None,
52
52
  is_local_order=False,
53
53
  sort_ascending=True,
54
- nulls_first=True):
54
+ nulls_first=True,
55
+ **kwargs):
55
56
  """
56
57
  DESCRIPTION:
57
58
  The fastpath Apply table operator executes a user-installed script or
@@ -316,14 +317,6 @@ class Apply(TableOperator):
316
317
  is_local_order,
317
318
  sort_ascending,
318
319
  nulls_first)
319
-
320
- # Set the variable specific to this child class.
321
- self.apply_command = apply_command
322
- self.env_name = env_name if env_name is not None else get_user_env()
323
- self.style = style
324
- self.returns = returns
325
- self._skip_argument_validation = False
326
-
327
320
  # Create AnalyticsWrapperUtils instance which contains validation functions.
328
321
  # This is required for is_default_or_not check.
329
322
  # Rest all validation is done using _Validators
@@ -332,20 +325,56 @@ class Apply(TableOperator):
332
325
  # Perform argument validation for arguments specific to this class.
333
326
  self.__arg_info_matrix = []
334
327
 
335
- self.__arg_info_matrix.append(["style", self.style, True, (str), True, ['CSV']])
336
- self.__arg_info_matrix.append(["env_name", self.env_name, False, (str, UserEnv), True])
337
- self.__arg_info_matrix.append(["apply_command", self.apply_command, False, (str), True])
338
- self.__arg_info_matrix.append(["returns", self.returns, True, (dict), True])
339
-
328
+ self.__arg_info_matrix.append(["style", style, True, (str), True, ['CSV']])
329
+ self.__arg_info_matrix.append(["env_name", env_name, False, (str, UserEnv), True])
330
+ self.__arg_info_matrix.append(["apply_command", apply_command, False, (str), True])
331
+ self.__arg_info_matrix.append(["returns", returns, True, (dict), True])
332
+ self._skip_argument_validation = False
340
333
  # Perform the function argument validations.
341
334
  self.__apply__validate()
342
335
 
343
- self.env = self.env_name if isinstance(self.env_name, UserEnv) else get_env(self.env_name)
336
+ # If user do not pass environment, get the default environment.
337
+ if env_name is None:
338
+ env_name = get_user_env()
339
+ self._open_af_env = env_name
340
+
341
+ # Set the variable specific to this child class.
342
+ self.apply_command = apply_command
343
+ self.env_name = env_name if isinstance(env_name, str) else env_name.env_name
344
+ self.style = style
345
+ self.returns = returns
346
+
347
+ # Internal variable to check if validation is required for Python and python package versions mismatch.
348
+ _validation_required = kwargs.pop('_validate_version', False)
349
+ # Interval variable to store the function name for which validation is required.
350
+ _func_name = kwargs.pop('_func_name', None)
351
+ # Internal variable to store the list of packages required for the function.
352
+ _packages = kwargs.pop('_packages', None)
353
+
354
+ # Check if validation for Python and python package versions mismatch is required.
355
+ if _validation_required:
356
+ # Check if the Python interpreter major versions are consistent between Vantage and local.
357
+ UtilFuncs._check_python_version_diff(self.env_name)
358
+ # Check if the package versions are consistent between Vantage and local.
359
+ UtilFuncs._check_package_version_diff(_func_name, _packages, self.env_name)
360
+
361
+
362
+ @property
363
+ def env(self):
364
+ """
365
+ DESCRIPTION:
366
+ Getter to get environment.
367
+
368
+ RETURNS:
369
+ bool
370
+
371
+ RAISES:
372
+ None
373
+ """
374
+ if isinstance(self._open_af_env, str):
375
+ self._open_af_env = get_env(self._open_af_env)
344
376
 
345
- # User can specify object of UserEnv class. Or if environment is already created just pass
346
- # remote user environment name as string.
347
- if isinstance(self.env_name, UserEnv):
348
- self.env_name = self.env_name.env_name
377
+ return self._open_af_env
349
378
 
350
379
  @property
351
380
  def skip_argument_validation(self):
@@ -67,7 +67,8 @@ class Script(TableOperator):
67
67
  data_order_column=None,
68
68
  is_local_order=False,
69
69
  sort_ascending=True,
70
- nulls_first=True):
70
+ nulls_first=True,
71
+ **kwargs):
71
72
  """
72
73
  DESCRIPTION:
73
74
  The Script table operator function executes a user-installed script or
@@ -354,6 +355,24 @@ class Script(TableOperator):
354
355
  # Perform the function validations
355
356
  self.__validate()
356
357
 
358
+ # Add the prefix OPENBLAS_NUM_THREADS to the script command.
359
+ self.script_command = f"{TableOperatorConstants.OPENBLAS_NUM_THREADS.value} {self.script_command}"
360
+
361
+ # Internal variable to check if validation is required for Python and python package versions mismatch.
362
+ _validation_required = kwargs.pop('_validate_version', False)
363
+ # Interval variable to store the function name for which validation is required.
364
+ _func_name = kwargs.pop('_func_name', None)
365
+ # Internal variable to store the list of packages required for the function.
366
+ _packages = kwargs.pop('_packages', None)
367
+
368
+ # Check if validation for Python and python package versions mismatch is required.
369
+ if _validation_required:
370
+ # Check if the Python interpreter major versions are consistent between Vantage and local.
371
+ UtilFuncs._check_python_version_diff()
372
+ # Check if the package versions are consistent between Vantage and local.
373
+ UtilFuncs._check_package_version_diff(_func_name, _packages)
374
+
375
+
357
376
  @property
358
377
  def skip_argument_validation(self):
359
378
  """
@@ -1701,7 +1720,9 @@ class Script(TableOperator):
1701
1720
  gc_on_quit=True, quote=False,
1702
1721
  table_type=table_type)
1703
1722
  try:
1704
- if output_style == OutputStyle.OUTPUT_TABLE.value:
1723
+ if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
1724
+ UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query, volatile=True)
1725
+ elif output_style == OutputStyle.OUTPUT_TABLE.value:
1705
1726
  UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query)
1706
1727
  else:
1707
1728
  UtilFuncs._create_view(tblop_stdout_temp_tablename, self._tblop_query)
@@ -458,7 +458,9 @@ class TableOperator:
458
458
  )
459
459
 
460
460
  try:
461
- if output_style == OutputStyle.OUTPUT_TABLE.value:
461
+ if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
462
+ UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query, volatile=True)
463
+ elif output_style == OutputStyle.OUTPUT_TABLE.value:
462
464
  UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query)
463
465
  else:
464
466
  UtilFuncs._create_view(tblop_stdout_temp_tablename, self._tblop_query)
@@ -249,6 +249,9 @@ class _TableOperatorUtils:
249
249
  self.data_partition_column = data_partition_column
250
250
  self.data_hash_column = data_hash_column
251
251
  self.__env = None
252
+ self.debug = kwargs.pop('debug', False)
253
+ self._validate_version = False
254
+ self.packages = []
252
255
  # Add all entries from kwargs as class attributes.
253
256
  self.__dict__.update(kwargs)
254
257
 
@@ -464,7 +467,10 @@ class _TableOperatorUtils:
464
467
  # Remove local copy of file to free up the disk space immediately.
465
468
  # Garbage collection will take care of it as a failsafe.
466
469
  # We may end up here after the script was created, but even before executing it.
467
- GarbageCollector._delete_local_file(self.script_path)
470
+ if not self.debug:
471
+ GarbageCollector._delete_local_file(self.script_path)
472
+ elif not self.__dict__.get('is_printed', False):
473
+ self._print_script_path()
468
474
 
469
475
  def __get_script_name(self):
470
476
  """
@@ -486,7 +492,7 @@ class _TableOperatorUtils:
486
492
  script_entry, script_alias, script_name, script_base_name = self.__get_script_name()
487
493
  """
488
494
  script_entry = UtilFuncs._generate_temp_script_name(prefix="" if self.operation is None else self.operation,
489
- extension="py")
495
+ extension="py", gc_on_quit=(self.debug^True))
490
496
  # script_alias is the file ID.
491
497
  script_alias = UtilFuncs._teradata_unquote_arg(UtilFuncs._extract_table_name(script_entry), quote='"')
492
498
 
@@ -530,6 +536,12 @@ class _TableOperatorUtils:
530
536
  # User can provide the installation location of previous version in case they are not using the latest.
531
537
  # This fix is done for ELE-5958 - https://teradata-pe.atlassian.net/browse/ELE-5958
532
538
 
539
+ # If operation is map_row or map_partition, then it should validate the python and
540
+ # 'dill' package version mismatch.
541
+ if self.operation in ["map_row", "map_partition"]:
542
+ self._validate_version = True
543
+ self.packages = ['dill']
544
+
533
545
  from teradataml.table_operators.Script import Script
534
546
  table_op_obj = Script(data=self.data,
535
547
  script_name=self.script_name,
@@ -546,7 +558,10 @@ class _TableOperatorUtils:
546
558
  nulls_first=self.nulls_first,
547
559
  charset=self.charset,
548
560
  data_partition_column=self.data_partition_column,
549
- data_hash_column=self.data_hash_column
561
+ data_hash_column=self.data_hash_column,
562
+ _validate_version = self._validate_version,
563
+ _func_name = self.operation,
564
+ _packages = self.packages
550
565
  )
551
566
  table_op_obj.check_reserved_keyword = check_reserved_keyword
552
567
 
@@ -588,6 +603,12 @@ class _TableOperatorUtils:
588
603
  EXAMPLES:
589
604
  return_obj = self.__execute_apply_table_operator()
590
605
  """
606
+
607
+ # If operation is apply, then it should validate the python and 'dill' package version mismatch.
608
+ if self.operation == "apply":
609
+ self._validate_version = True
610
+ self.packages = ['dill']
611
+
591
612
  # First create Apply Table operator object so that validations are done on inputs.
592
613
  from teradataml.table_operators.Apply import Apply
593
614
  apply_op_obj = Apply(data=self.data,
@@ -604,7 +625,10 @@ class _TableOperatorUtils:
604
625
  nulls_first=self.nulls_first,
605
626
  data_partition_column=self.data_partition_column,
606
627
  data_hash_column=self.data_hash_column,
607
- style=self.style
628
+ style=self.style,
629
+ _validate_version=self._validate_version,
630
+ _func_name = self.operation,
631
+ _packages = self.packages
608
632
  )
609
633
 
610
634
  # APPLY operator requires installation and deletion of script file.
@@ -670,8 +694,33 @@ class _TableOperatorUtils:
670
694
  TableOperatorConstants.MAP_PARTITION_OP.value,
671
695
  TableOperatorConstants.APPLY_OP.value,
672
696
  TableOperatorConstants.UDF_OP.value]:
673
- GarbageCollector._delete_object_entry(
674
- object_to_delete=self.script_entry,
675
- object_type=TeradataConstants.TERADATA_SCRIPT,
676
- remove_entry_from_gc_list=True
677
- )
697
+ if self.debug:
698
+ self._print_script_path()
699
+ self.is_printed = True
700
+ else:
701
+ GarbageCollector._delete_object_entry(
702
+ object_to_delete=self.script_entry,
703
+ object_type=TeradataConstants.TERADATA_SCRIPT,
704
+ remove_entry_from_gc_list=True
705
+ )
706
+ def _print_script_path(self):
707
+ """
708
+ DESCRIPTION:
709
+ Internal function to print the path of the script file.
710
+
711
+ PARAMETERS:
712
+ None.
713
+
714
+ RAISES:
715
+ None.
716
+
717
+ RETURNS:
718
+ None.
719
+ """
720
+ mssg1 = f"Path for the script {self.script_path}"
721
+ mssg2 = "The user should delete the script file since it is not being garbage collected."
722
+ mssg_len = max(len(mssg1), len(mssg2))
723
+ print("-" * mssg_len)
724
+ print(mssg1)
725
+ print(mssg2)
726
+ print("-" * mssg_len)
@@ -8,6 +8,7 @@ from teradatasqlalchemy import (INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_
8
8
  INTERVAL_HOUR, INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
9
9
  INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND, INTERVAL_SECOND)
10
10
  from teradatasqlalchemy import (GEOMETRY, MBB, MBR)
11
+ from teradatasqlalchemy import VECTOR
11
12
  from teradataml.common.td_coltype_code_to_tdtype import HELP_COL_TYPE_TO_TDTYPE
12
13
  from teradataml.common.constants import TeradataTypes, PythonTypes
13
14
  from datetime import datetime, time, date
@@ -148,7 +149,7 @@ _GET_DATATYPES = {
148
149
  INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR, INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
149
150
  INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND, INTERVAL_MONTH, INTERVAL_SECOND, INTERVAL_YEAR,
150
151
  INTERVAL_YEAR_TO_MONTH, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP, TIME, TIMESTAMP, VARBYTE,
151
- VARCHAR, GEOMETRY, MBB, MBR],
152
+ VARCHAR, GEOMETRY, MBB, MBR, VECTOR],
152
153
  'NON_NUM_DATE_INTERVAL': [BLOB, BYTE, CHAR, CLOB, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP, TIME, TIMESTAMP,
153
154
  VARBYTE, VARCHAR, GEOMETRY, MBB, MBR],
154
155
  'NON_NUM_INTERVAL': [BLOB, BYTE, CHAR, CLOB, DATE, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP, TIME, TIMESTAMP,
@@ -293,10 +294,57 @@ class _DtypesMappers:
293
294
  VARCHAR: lambda x: "{0},{1}".format(x.__class__.__name__, x.length)
294
295
  }
295
296
 
297
+ # Holds mapping between string representation of teradatasqlalchemy type
298
+ # and actual teradatasqlalchemy type.
299
+ DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER = {
300
+ "CHAR": CHAR,
301
+ "VARCHAR": VARCHAR,
302
+
303
+ "BYTEINT": BYTEINT,
304
+ "SMALLINT": SMALLINT,
305
+ "INTEGER": INTEGER,
306
+ "BIGINT": BIGINT,
307
+
308
+ "REAL": FLOAT,
309
+ "FLOAT": FLOAT,
310
+ "DOUBLE": FLOAT,
311
+ "DECIMAL": DECIMAL,
312
+ "NUMBER": NUMBER,
313
+
314
+ "DATE": DATE,
315
+ "TIME": TIME,
316
+ "TIMESTAMP": TIMESTAMP,
317
+ "TIMESTAMP_WTZ": TIMESTAMP,
318
+
319
+ "BYTE": BYTE,
320
+ "VARBYTE": VARBYTE,
321
+ "BLOB": BLOB,
322
+ # TODO: Add CLOB type when support is added from OTF.
323
+
324
+ # TODO: Check these types when corresponding data type support
325
+ # is available from OTF support or not.
326
+ "INTERVAL_YEAR": INTERVAL_YEAR,
327
+ "INTERVAL_YTM": INTERVAL_YEAR_TO_MONTH,
328
+ "INTERVAL_MONTH": INTERVAL_MONTH,
329
+ "INTERVAL_DAY": INTERVAL_DAY,
330
+
331
+ "INTERVAL_DTH": INTERVAL_DAY_TO_HOUR,
332
+ "INTERVAL_DTM": INTERVAL_DAY_TO_MINUTE,
333
+ "INTERVAL_DTS": INTERVAL_DAY_TO_SECOND,
334
+ "INTERVAL_HOUR": INTERVAL_HOUR,
335
+ "INTERVAL_HTM": INTERVAL_HOUR_TO_MINUTE,
336
+ "INTERVAL_HTS": INTERVAL_HOUR_TO_SECOND,
337
+ "INTERVAL_MINUTE": INTERVAL_MINUTE,
338
+ "INTERVAL_MTS": INTERVAL_MINUTE_TO_SECOND,
339
+ "INTERVAL_SECOND": INTERVAL_SECOND
340
+ }
341
+
342
+
296
343
  class _SuppArgTypes:
297
344
  VAL_ARG_DATATYPE = (str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
298
345
  TIMESTAMP, VARCHAR)
299
346
 
347
+
300
348
  class _Dtypes:
301
349
 
302
350
  @staticmethod
@@ -82,3 +82,41 @@ class _InternalBuffer:
82
82
  """
83
83
  if key in cls.__data:
84
84
  return cls.__data.get(key)
85
+
86
+ @classmethod
87
+ def remove_key(cls, key):
88
+ """
89
+ DESCRIPTION:
90
+ Remove a particular key from the internal buffer.
91
+
92
+ RETURNS:
93
+ None
94
+
95
+ RAISES:
96
+ None
97
+
98
+ EXAMPLES:
99
+ # Remove key "vs_session_id" from _InternalBuffer.
100
+ >>> _InternalBuffer.remove_key("vs_session_id")
101
+ """
102
+ del cls.__data[key]
103
+
104
+ @classmethod
105
+ def remove_keys(cls, keys):
106
+ """
107
+ DESCRIPTION:
108
+ Removes specified keys from the internal buffer.
109
+
110
+ RETURNS:
111
+ None
112
+
113
+ RAISES:
114
+ None
115
+
116
+ EXAMPLES:
117
+ # Remove keys "list_base_envs" and "default_base_env" from _InternalBuffer.
118
+ >>> _InternalBuffer.remove_keys(['list_base_envs', 'default_base_env'])
119
+ """
120
+ for key in keys:
121
+ if cls.__data.get(key) is not None:
122
+ del cls.__data[key]