teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (88) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +196 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +79 -4
  6. teradataml/analytics/json_parser/metadata.py +12 -3
  7. teradataml/analytics/json_parser/utils.py +7 -2
  8. teradataml/analytics/sqle/__init__.py +1 -0
  9. teradataml/analytics/table_operator/__init__.py +1 -1
  10. teradataml/analytics/uaf/__init__.py +1 -1
  11. teradataml/analytics/utils.py +4 -0
  12. teradataml/automl/data_preparation.py +3 -2
  13. teradataml/automl/feature_engineering.py +15 -7
  14. teradataml/automl/model_training.py +39 -33
  15. teradataml/common/__init__.py +2 -1
  16. teradataml/common/constants.py +35 -0
  17. teradataml/common/garbagecollector.py +2 -1
  18. teradataml/common/messagecodes.py +8 -2
  19. teradataml/common/messages.py +3 -1
  20. teradataml/common/sqlbundle.py +25 -3
  21. teradataml/common/utils.py +134 -9
  22. teradataml/context/context.py +20 -10
  23. teradataml/data/SQL_Fundamentals.pdf +0 -0
  24. teradataml/data/dataframe_example.json +18 -2
  25. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  26. teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
  27. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  28. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  29. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  30. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  31. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  32. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  33. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  34. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  35. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  36. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  37. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  38. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  39. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  40. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  41. teradataml/data/medical_readings.csv +101 -0
  42. teradataml/data/patient_profile.csv +101 -0
  43. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  44. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  45. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  46. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  47. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  48. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  49. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  50. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  51. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  52. teradataml/data/target_udt_data.csv +8 -0
  53. teradataml/data/templates/open_source_ml.json +3 -2
  54. teradataml/data/vectordistance_example.json +4 -0
  55. teradataml/dataframe/dataframe.py +543 -175
  56. teradataml/dataframe/functions.py +553 -25
  57. teradataml/dataframe/sql.py +184 -15
  58. teradataml/dbutils/dbutils.py +556 -18
  59. teradataml/dbutils/filemgr.py +48 -1
  60. teradataml/lib/aed_0_1.dll +0 -0
  61. teradataml/opensource/__init__.py +1 -1
  62. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  63. teradataml/opensource/_lightgbm.py +950 -0
  64. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  65. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  66. teradataml/opensource/sklearn/__init__.py +0 -1
  67. teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
  68. teradataml/options/__init__.py +7 -23
  69. teradataml/options/configure.py +29 -3
  70. teradataml/scriptmgmt/UserEnv.py +3 -3
  71. teradataml/scriptmgmt/lls_utils.py +74 -21
  72. teradataml/store/__init__.py +13 -0
  73. teradataml/store/feature_store/__init__.py +0 -0
  74. teradataml/store/feature_store/constants.py +291 -0
  75. teradataml/store/feature_store/feature_store.py +2223 -0
  76. teradataml/store/feature_store/models.py +1505 -0
  77. teradataml/store/vector_store/__init__.py +1586 -0
  78. teradataml/table_operators/query_generator.py +3 -0
  79. teradataml/table_operators/table_operator_query_generator.py +3 -1
  80. teradataml/table_operators/table_operator_util.py +37 -38
  81. teradataml/table_operators/templates/dataframe_register.template +69 -0
  82. teradataml/utils/dtypes.py +4 -2
  83. teradataml/utils/validators.py +33 -1
  84. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
  85. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
  86. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  87. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  88. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
Binary file
teradataml/README.md CHANGED
@@ -16,6 +16,187 @@ Copyright 2024, Teradata. All Rights Reserved.
16
16
  * [License](#license)
17
17
 
18
18
  ## Release Notes:
19
+
20
+ #### teradataml 20.00.00.03
21
+
22
+ * teradataml no longer supports setting the `auth_token` using `set_config_params()`. Users should use `set_auth_token()` to set the token.
23
+
24
+ * ##### New Features/Functionality
25
+ * ###### teradataml: DataFrame
26
+ * New Function
27
+ * `alias()` - Creates a DataFrame with alias name.
28
+ * New Properties
29
+ * `db_object_name` - Get the underlying database object name, on which DataFrame is created.
30
+
31
+ * ###### teradataml: GeoDataFrame
32
+ * New Function
33
+ * `alias()` - Creates a GeoDataFrame with alias name.
34
+
35
+ * ###### teradataml: DataFrameColumn a.k.a. ColumnExpression
36
+ * _Arithmetic Functions_
37
+ * `DataFrameColumn.isnan()` - Function evaluates expression to determine if the floating-point
38
+ argument is a NaN (Not-a-Number) value.
39
+ * `DataFrameColumn.isinf()` - Function evaluates expression to determine if the floating-point
40
+ argument is an infinite number.
41
+ * `DataFrameColumn.isfinite()` - Function evaluates expression to determine if it is a finite
42
+ floating value.
43
+
44
+ * ###### FeatureStore - handles feature management within the Vantage environment
45
+ * FeatureStore Components
46
+ * Feature - Represents a feature which is used in ML Modeling.
47
+ * Entity - Represents the columns which serves as uniqueness for the data used in ML Modeling.
48
+ * DataSource - Represents the source of Data.
49
+ * FeatureGroup - Collection of Feature, Entity and DataSource.
50
+ * Methods
51
+ * `apply()` - Adds Feature, Entity, DataSource to a FeatureGroup.
52
+ * `from_DataFrame()` - Creates a FeatureGroup from teradataml DataFrame.
53
+ * `from_query()` - Creates a FeatureGroup using a SQL query.
54
+ * `remove()` - Removes Feature, Entity, or DataSource from a FeatureGroup.
55
+ * `reset_labels()` - Removes the labels assigned to the FeatureGroup, that are set using `set_labels()`.
56
+ * `set_labels()` - Sets the Features as labels for a FeatureGroup.
57
+ * Properties
58
+ * `features` - Get the features of a FeatureGroup.
59
+ * `labels` - Get the labels of FeatureGroup.
60
+ * FeatureStore
61
+ * Methods
62
+ * `apply()` - Adds Feature, Entity, DataSource, FeatureGroup to FeatureStore.
63
+ * `archive_data_source()` - Archives a specified DataSource from a FeatureStore.
64
+ * `archive_entity()` - Archives a specified Entity from a FeatureStore.
65
+ * `archive_feature()` - Archives a specified Feature from a FeatureStore.
66
+ * `archive_feature_group()` - Archives a specified FeatureGroup from a FeatureStore. Method archives underlying Feature, Entity, DataSource also.
67
+ * `delete_data_source()` - Deletes an archived DataSource.
68
+ * `delete_entity()` - Deletes an archived Entity.
69
+ * `delete_feature()` - Deletes an archived Feature.
70
+ * `delete_feature_group()` - Deletes an archived FeatureGroup.
71
+ * `get_data_source()` - Get the DataSources associated with FeatureStore.
72
+ * `get_dataset()` - Get the teradataml DataFrame based on Features, Entities and DataSource from FeatureGroup.
73
+ * `get_entity()` - Get the Entity associated with FeatureStore.
74
+ * `get_feature()` - Get the Feature associated with FeatureStore.
75
+ * `get_feature_group()` - Get the FeatureGroup associated with FeatureStore.
76
+ * `list_data_sources()` - List DataSources.
77
+ * `list_entities()` - List Entities.
78
+ * `list_feature_groups()` - List FeatureGroups.
79
+ * `list_features()` - List Features.
80
+ * `list_repos()` - List available repos which are configured for FeatureStore.
81
+ * `repair()` - Repairs the underlying FeatureStore schema on database.
82
+ * `set_features_active()` - Marks the Features as active.
83
+ * `set_features_inactive()` - Marks the Features as inactive.
84
+ * `setup()` - Setup the FeatureStore for a repo.
85
+ * Property
86
+ * `repo` - Property for FeatureStore repo.
87
+ * `grant` - Property to Grant access on FeatureStore to user.
88
+ * `revoke` - Property to Revoke access on FeatureStore from user.
89
+
90
+ * ###### teradataml: Table Operator Functions
91
+ * `Image2Matrix()` - Converts an image into a matrix.
92
+
93
+ * ###### teradataml: SQLE Engine Analytic Functions
94
+ * New Analytics Database Analytic Functions:
95
+ * `CFilter()`
96
+ * `NaiveBayes()`
97
+ * `TDNaiveBayesPredict()`
98
+ * `Shap()`
99
+ * `SMOTE()`
100
+
101
+ * ###### teradataml: Unbounded Array Framework (UAF) Functions
102
+ * New Unbounded Array Framework(UAF) Functions:
103
+ * `CopyArt()`
104
+
105
+ * ###### General functions
106
+ * Vantage File Management Functions
107
+ * `list_files()` - List the installed files in Database.
108
+
109
+ * ###### OpensourceML: LightGBM
110
+ * teradataml adds support for lightGBM package through `OpensourceML` (`OpenML`) feature.
111
+ The following functionality is added in the current release:
112
+ * `td_lightgbm` - Interface object to run lightgbm functions and classes through Teradata Vantage.
113
+ Example usage below:
114
+ ```
115
+ from teradataml import td_lightgbm, DataFrame
116
+
117
+ df_train = DataFrame("multi_model_classification")
118
+
119
+ feature_columns = ["col1", "col2", "col3", "col4"]
120
+ label_columns = ["label"]
121
+ part_columns = ["partition_column_1", "partition_column_2"]
122
+
123
+ df_x = df_train.select(feature_columns)
124
+ df_y = df_train.select(label_columns)
125
+
126
+ # Dataset creation.
127
+ # Single model case.
128
+ obj_s = td_lightgbm.Dataset(df_x, df_y, silent=True, free_raw_data=False)
129
+
130
+ # Multi model case.
131
+ obj_m = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
132
+ obj_m_v = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
133
+
134
+ ## Model training.
135
+ # Single model case.
136
+ opt = td_lightgbm.train(params={}, train_set = obj_s, num_boost_round=30)
137
+
138
+ opt.predict(data=df_x, num_iteration=20, pred_contrib=True)
139
+
140
+ # Multi model case.
141
+ opt = td_lightgbm.train(params={}, train_set = obj_m, num_boost_round=30,
142
+ callbacks=[td_lightgbm.record_evaluation(rec)],
143
+ valid_sets=[obj_m_v, obj_m_v])
144
+
145
+ # Passing `label` argument to get it returned in output DataFrame.
146
+ opt.predict(data=df_x, label=df_y, num_iteration=20)
147
+
148
+ ```
149
+ * Added support for accessing scikit-learn APIs using exposed inteface object `td_lightgbm`.
150
+
151
+ Refer Teradata Python Package User Guide for more details of this feature, arguments, usage, examples and supportability in Vantage.
152
+
153
+ * ###### teradataml: Functions
154
+ * `register()` - Registers a user defined function (UDF).
155
+ * `call_udf()` - Calls a registered user defined function (UDF) and returns ColumnExpression.
156
+ * `list_udfs()` - List all the UDFs registered using 'register()' function.
157
+ * `deregister()` - Deregisters a user defined function (UDF).
158
+
159
+ * ###### teradataml: Options
160
+ * Configuration Options
161
+ * `table_operator` - Specifies the name of table operator.
162
+
163
+ * ##### Updates
164
+ * ###### General functions
165
+ * `set_auth_token()` - Added `base_url` parameter which accepts the CCP url.
166
+ 'ues_url' will be deprecated in future and users
167
+ will need to specify 'base_url' instead.
168
+
169
+ * ###### teradataml: DataFrame function
170
+ * `join()`
171
+ * Now supports compound ColumExpression having more than one binary operator in `on` argument.
172
+ * Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
173
+ * self-join now expects aliased DataFrame in `other` argument.
174
+
175
+ * ###### teradataml: GeoDataFrame function
176
+ * `join()`
177
+ * Now supports compound ColumExpression having more than one binary operator in `on` argument.
178
+ * Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
179
+ * self-join now expects aliased DataFrame in `other` argument.
180
+
181
+ * ###### teradataml: Unbounded Array Framework (UAF) Functions
182
+ * `SAX()` - Default value added for `window_size` and `output_frequency`.
183
+ * `DickeyFuller()`
184
+ * Supports TDAnalyticResult as input.
185
+ * Default value added for `max_lags`.
186
+ * Removed parameter `drift_trend_formula`.
187
+ * Updated permitted values for `algorithm`.
188
+
189
+ * ##### teradataml: AutoML
190
+ * `AutoML`, `AutoRegressor` and `AutoClassifier`
191
+ * Now supports DECIMAL datatype as input.
192
+
193
+ * ##### teradataml: SQLE Engine Analytic Functions
194
+ * `TextParser()`
195
+ * Argument name `covert_to_lowercase` changed to `convert_to_lowercase`.
196
+
197
+ * ##### Bug Fixes
198
+ * `db_list_tables()` now returns correct results when '%' is used.
199
+
19
200
  #### teradataml 20.00.00.02
20
201
 
21
202
  * teradataml will no longer be supported with SQLAlchemy < 2.0.
@@ -83,6 +264,10 @@ Copyright 2024, Teradata. All Rights Reserved.
83
264
  * Following arguments will be deprecated in the future:
84
265
  * `ues_url`
85
266
  * `auth_token`
267
+
268
+ * #### teradata DataFrame
269
+ * `to_pandas()` - Function returns the pandas dataframe with Decimal columns types as float instead of object.
270
+ If user want datatype to be object, set argument `coerce_float` to False.
86
271
 
87
272
  * ###### Database Utility
88
273
  * `list_td_reserved_keywords()` - Accepts a list of strings as argument.
@@ -102,7 +287,7 @@ Copyright 2024, Teradata. All Rights Reserved.
102
287
  * ##### Bug Fixes
103
288
  * KNN `predict()` function can now predict on test data which does not contain target column.
104
289
  * Metrics functions are supported on the Lake system.
105
- * The following OpensourceML functions from different sklearn modules are fixed.
290
+ * The following OpensourceML functions from different sklearn modules in single model case are fixed.
106
291
  * `sklearn.ensemble`:
107
292
  * ExtraTreesClassifier - `apply()`
108
293
  * ExtraTreesRegressor - `apply()`
@@ -115,12 +300,21 @@ Copyright 2024, Teradata. All Rights Reserved.
115
300
  * Nystroem - `transform()`, `fit_transform()`
116
301
  * PolynomialCountSketch - `transform()`, `fit_transform()`
117
302
  * RBFSampler - `transform()`, `fit_transform()`
118
- * `sklearn.neighbours`:
303
+ * `sklearn.neighbors`:
119
304
  * KNeighborsTransformer - `transform()`, `fit_transform()`
120
305
  * RadiusNeighborsTransformer - `transform()`, `fit_transform()`
121
306
  * `sklearn.preprocessing`:
122
307
  * KernelCenterer - `transform()`
123
308
  * OneHotEncoder - `transform()`, `inverse_transform()`
309
+ * The following OpensourceML functions from different sklearn modules in multi model case are fixed.
310
+ * `sklearn.feature_selection`:
311
+ * SelectFpr - `transform()`, `fit_transform()`, `inverse_transform()`
312
+ * SelectFdr - `transform()`, `fit_transform()`, `inverse_transform()`
313
+ * SelectFromModel - `transform()`, `fit_transform()`, `inverse_transform()`
314
+ * SelectFwe - `transform()`, `fit_transform()`, `inverse_transform()`
315
+ * RFECV - `transform()`, `fit_transform()`, `inverse_transform()`
316
+ * `sklearn.clustering`:
317
+ * Birch - `transform()`, `fit_transform()`
124
318
  * OpensourceML returns teradataml objects for model attributes and functions instead of sklearn
125
319
  objects so that the user can perform further operations like `score()`, `predict()` etc on top
126
320
  of the returned objects.
teradataml/__init__.py CHANGED
@@ -71,3 +71,7 @@ session_queryband.configure_queryband_parameters(app_name="TDML", app_version=__
71
71
 
72
72
  # Import functions.
73
73
  from teradataml.dataframe.functions import *
74
+
75
+ # Import FeatureStore and VectorStore
76
+ from teradataml.store import *
77
+
teradataml/_version.py CHANGED
@@ -8,4 +8,4 @@
8
8
  #
9
9
  # ##################################################################
10
10
 
11
- version = "20.00.00.02"
11
+ version = "20.00.00.03"
@@ -28,8 +28,8 @@ from teradataml.common.messages import Messages, MessageCodes
28
28
  from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
29
29
  from teradataml.common.utils import UtilFuncs
30
30
  from teradataml.context.context import _get_context_temp_databasename
31
- from teradataml.dataframe.dataframe import in_schema
32
- from teradataml.dbutils.dbutils import _create_table, db_drop_table
31
+ from teradataml.dataframe.dataframe import in_schema, DataFrame
32
+ from teradataml.dbutils.dbutils import _create_table, db_drop_table, list_td_reserved_keywords
33
33
  from teradatasqlalchemy.types import *
34
34
  from teradataml.table_operators.table_operator_query_generator import TableOperatorQueryGenerator
35
35
  from teradataml.telemetry_utils.queryband import collect_queryband
@@ -343,6 +343,17 @@ class _AnlyticFunctionExecutor:
343
343
  self._func_output_args.append(temp_table_name)
344
344
  self._function_output_table_map[lang_name] = temp_table_name
345
345
 
346
+ def _get_column_name_from_feature(self, obj):
347
+ # Extract the associated column name from Feature.
348
+ from teradataml.store.feature_store.feature_store import Feature
349
+ if isinstance(obj, Feature):
350
+ return obj.column_name
351
+
352
+ if isinstance(obj, list):
353
+ return [self._get_column_name_from_feature(col) for col in obj]
354
+
355
+ return obj
356
+
346
357
  def _process_other_argument(self, **kwargs):
347
358
  """
348
359
  DESCRIPTION:
@@ -439,6 +450,9 @@ class _AnlyticFunctionExecutor:
439
450
 
440
451
  self._validate_analytic_function_argument(arg_name, arg_value, argument)
441
452
 
453
+ # Extract column names if it is a Feature.
454
+ arg_value = self._get_column_name_from_feature(arg_value)
455
+
442
456
  # Perform the checks which are specific to argument(_AnlyFuncArgument) type.
443
457
  # Check lower bound and upper bound for number type of arguments.
444
458
  if isinstance(arg_value, (int, float)):
@@ -474,6 +488,12 @@ class _AnlyticFunctionExecutor:
474
488
  # does not require special case handler.
475
489
  arg_value = self._spl_func_obj._add_square_bracket(arg_value)
476
490
 
491
+ # Handling special case for Teradata reserved keywords or column names with spaces.
492
+ # If argument is a string or list of strings, then add quotes to the string.
493
+ if arg_name not in ["partition_columns"] and (\
494
+ UtilFuncs._contains_space(arg_value) or list_td_reserved_keywords(arg_value)):
495
+ arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
496
+
477
497
  # SequenceInputBy arguments require special processing.
478
498
  if 500 <= argument.get_r_order_number() <= 510:
479
499
  quoted_value = UtilFuncs._teradata_collapse_arglist(arg_value, "")
@@ -535,6 +555,17 @@ class _AnlyticFunctionExecutor:
535
555
  return repr_string
536
556
  self._dyn_cls_data_members["__repr__"] = print_result
537
557
 
558
+ def copy(self, **args):
559
+ """ Function to copy the ART to another table."""
560
+ from teradataml import CopyArt
561
+ params = {
562
+ "data": self.result,
563
+ "database_name": args.get("database_name", None),
564
+ "table_name": args.get("table_name", None),
565
+ "map_name": args.get("map_name", None),
566
+ "persist": args.get("persist", False)}
567
+ return CopyArt(**params)
568
+
538
569
  query = self.sqlmr_query
539
570
  build_time = None if self.__build_time is None else round(self.__build_time, 2)
540
571
 
@@ -544,6 +575,7 @@ class _AnlyticFunctionExecutor:
544
575
  # To list attributes using dict()
545
576
  self._dyn_cls_data_members["__dict__"] = self._dyn_cls_data_members
546
577
  self._dyn_cls_data_members["_mlresults"] = self._mlresults
578
+ self._dyn_cls_data_members["copy"] = copy
547
579
 
548
580
  # Dynamic class creation with In-DB function name.
549
581
  indb_class = type(self.func_name, (object,), self._dyn_cls_data_members)
@@ -700,6 +732,14 @@ class _AnlyticFunctionExecutor:
700
732
  raise TeradataMlException(
701
733
  Messages.get_message(MessageCodes.CANNOT_USE_TOGETHER_WITH, "persist", "volatile"),
702
734
  MessageCodes.CANNOT_USE_TOGETHER_WITH)
735
+
736
+ # If function is VectorDistance and largereference_input is set to True,
737
+ # then set data_partition_column to PartitionKind.DIMENSION and
738
+ # reference_data_partition_column to PartitionKind.ANY .
739
+ if self.func_name == "VectorDistance" and \
740
+ kwargs.get("largereference_input", False):
741
+ kwargs['target_data_partition_column'] = PartitionKind.DIMENSION
742
+ kwargs['reference_data_partition_column'] = PartitionKind.ANY
703
743
 
704
744
  self._dyn_cls_data_members.update(kwargs)
705
745
 
@@ -721,6 +761,11 @@ class _AnlyticFunctionExecutor:
721
761
  if self.func_name in ['GLM', 'TDGLMPredict'] and \
722
762
  any(key in kwargs for key in ['data_partition_column', 'data_hash_column', 'local_order_data']):
723
763
  skip_output_arg_processing = True
764
+ elif self.func_name in ['CopyArt']:
765
+ # CopyArt function take care of persisting the result table internally
766
+ # through 'permanent_table' argument.
767
+ persist = False
768
+ volatile = False
724
769
 
725
770
  if not skip_output_arg_processing:
726
771
  self._process_output_argument(**kwargs)
@@ -2180,6 +2225,31 @@ class _StoredProcedureExecutor(_UAFFunctionExecutor):
2180
2225
  self._func_other_args['database_name'] = UtilFuncs._teradata_quote_arg(schema_name, "\'", False)
2181
2226
  self._func_other_args['table_name'] = UtilFuncs._teradata_quote_arg(table_name, "\'", False)
2182
2227
 
2228
+ # 'CopyArt' function requires 'SRC_DATABASENMAE' and 'SRC_TABLENAME' as input arguments.
2229
+ # Extract the database and table name from the 'data' argument and add them to the
2230
+ # '_func_other_args' dictionary.
2231
+ if self.func_name == "CopyArt":
2232
+ data = kwargs.get('data', None)
2233
+ argument_info = ["data", data, False, (DataFrame), True]
2234
+ # 'data' is a required argument for 'CopyArt' function to get the source table name and database name.
2235
+ _Validators._validate_missing_required_arguments([argument_info])
2236
+ # 'data' should be a DataFrame.
2237
+ _Validators._validate_function_arguments([argument_info])
2238
+
2239
+ # Add the 'SRC_DATABASENMAE' and 'SRC_TABLENAME' to the '_func_other_args' dictionary.
2240
+ self._func_other_args["SRC_DATABASENMAE"] = "'{0}'".format(UtilFuncs._extract_db_name(data._table_name))
2241
+ self._func_other_args["SRC_TABLENAME"] = "'{0}'".format(UtilFuncs._extract_table_name(data._table_name))
2242
+
2243
+ # Setting permanent_table to True if 'persist' is set to True, else False.
2244
+ kwargs['permanent_table'] = 'True' if kwargs.get('persist', False) else 'False'
2245
+
2246
+ # Setting 'map_name' to empty string if not provided.
2247
+ if kwargs.get('map_name', None) is None:
2248
+ kwargs['map_name'] = ""
2249
+
2250
+ # CopyArt does not take 'data' as input argument.
2251
+ kwargs.pop('data')
2252
+
2183
2253
  for argument in self._metadata.arguments:
2184
2254
  sql_name = argument.get_name()
2185
2255
  lang_name = argument.get_lang_name()
@@ -2236,8 +2306,13 @@ class _StoredProcedureExecutor(_UAFFunctionExecutor):
2236
2306
  Internal function to process the function output.
2237
2307
  """
2238
2308
  for lang_name, table_name in self._function_output_table_map.items():
2239
- out_table_name = UtilFuncs._extract_table_name(table_name)
2240
- out_db_name = UtilFuncs._extract_db_name(table_name)
2309
+ # For 'CopyArt' function, the result should be the destination table name and database name provided as input.
2310
+ if self.func_name == "CopyArt":
2311
+ out_table_name = kwargs.get('table_name')
2312
+ out_db_name = kwargs.get('database_name')
2313
+ else:
2314
+ out_table_name = UtilFuncs._extract_table_name(table_name)
2315
+ out_db_name = UtilFuncs._extract_db_name(table_name)
2241
2316
  df = self._awu._create_data_set_object(
2242
2317
  df_input=out_table_name, database_name=out_db_name, source_type="table")
2243
2318
  self._dyn_cls_data_members[lang_name] = df
@@ -1179,10 +1179,19 @@ class _AnlyFuncMetadata:
1179
1179
  # from teradataml.data.docs.<function_type>.<doc_dir_with_version_info>.<func_name>
1180
1180
  # import <func_name>
1181
1181
  func_module = __import__(("teradataml.data.docs.{}.{}.{}".
1182
- format(function_type, doc_dir, self.func_name)),
1183
- fromlist=[self.func_name])
1184
- return getattr(func_module, self.func_name).__doc__
1182
+ format(function_type, doc_dir, self.func_name)),
1183
+ fromlist=[self.func_name])
1184
+ return getattr(func_module, self.func_name).__doc__
1185
1185
  except:
1186
+ # For db_version 20.00, if function type is sqle, then check for docs_17_20 directory.
1187
+ if version_dir == '20.00' and function_type == 'sqle':
1188
+ try:
1189
+ func_module = __import__(("teradataml.data.docs.{}.{}.{}".
1190
+ format(function_type, "docs_17_20", self.func_name)),
1191
+ fromlist=[self.func_name])
1192
+ return getattr(func_module, self.func_name).__doc__
1193
+ except:
1194
+ pass
1186
1195
  return ("Refer to Teradata Package for Python Function Reference guide for "
1187
1196
  "Documentation. Reference guide can be found at: https://docs.teradata.com ."
1188
1197
  "Refer to the section with Database version: {}".format(self.__database_version))
@@ -54,7 +54,6 @@ def _get_json_data_from_tdml_repo():
54
54
  # both versions are matched, then the json store has data available so no need
55
55
  # to parse again.
56
56
  if configure.database_version != _JsonStore.version:
57
-
58
57
  # Json store version is different from database version. So, json's should
59
58
  # be parsed again. Before parsing the json, first clean the json store.
60
59
  _JsonStore.clean()
@@ -171,9 +170,15 @@ def __get_json_files_directory():
171
170
  if func_info.value["lowest_version"]:
172
171
  # Check if current function type is allowed on connected Vantage version or not.
173
172
  if func_info.value["func_type"] in func_type_json_version.keys():
173
+ # If function type is SQLE and db_version is 20.00, then add 17.20 JSON directory.
174
+ if func_type_json_version[func_info.value["func_type"]] == '20.00' and \
175
+ func_info.value["func_type"] == 'sqle':
176
+ yield [UtilFuncs._get_data_directory(dir_name="jsons", func_type=func_info,
177
+ version='17.20'),
178
+ func_info.name]
174
179
  yield [UtilFuncs._get_data_directory(dir_name="jsons", func_type=func_info,
175
180
  version=func_type_json_version[func_info.value["func_type"]]),
176
- func_info.name]
181
+ func_info.name]
177
182
  else:
178
183
  yield [UtilFuncs._get_data_directory(dir_name="jsons", func_type=func_info), func_info.name]
179
184
 
@@ -71,6 +71,7 @@ _sqle_functions = ['ANOVA',
71
71
  'Silhouette',
72
72
  'SimpleImputeFit',
73
73
  'SimpleImputeTransform',
74
+ 'SMOTE',
74
75
  'StrApply',
75
76
  'StringSimilarity',
76
77
  'SVM',
@@ -1,7 +1,7 @@
1
1
  from teradataml.analytics.meta_class import _AnalyticFunction
2
2
  from teradataml.analytics.meta_class import _common_init, _common_dir
3
3
 
4
- _nos_functions = ['ReadNOS', 'WriteNOS']
4
+ _nos_functions = ['ReadNOS', 'WriteNOS', 'Image2Matrix']
5
5
 
6
6
  for func in _nos_functions:
7
7
  globals()[func] = type("{}".format(func), (_AnalyticFunction,),
@@ -73,7 +73,7 @@ for func in _uaf_functions:
73
73
  "__doc__": _AnalyticFunction.__doc__,
74
74
  "__dir__": _common_dir})
75
75
 
76
- _stored_procedure = ['FilterFactory1d']
76
+ _stored_procedure = ['CopyArt', 'FilterFactory1d']
77
77
 
78
78
  for func in _stored_procedure:
79
79
  globals()[func] = type("{}".format(func), (_AnalyticFunction,),
@@ -441,6 +441,10 @@ class FuncSpecialCaseHandler():
441
441
  "filter_type": self._single_quote_arg,
442
442
  "window_type": self._single_quote_arg,
443
443
  "filter_description": self._single_quote_arg},
444
+ "CopyArt":{"database_name": self._single_quote_arg,
445
+ "table_name": self._single_quote_arg,
446
+ "map_name": self._single_quote_arg,
447
+ "permanent_table": self._single_quote_arg},
444
448
  "DWT": {"wavelet": self._single_quote_arg},
445
449
  "IDWT": {"part": self._single_quote_arg,
446
450
  "wavelet": self._single_quote_arg,
@@ -465,7 +465,7 @@ class _DataPreparation:
465
465
  RETURNS:
466
466
  int, number of folds to be used for cross-validation.
467
467
  """
468
- num_of_folds = lambda rows: 1 if rows > 20000 else (3 if 1000 < rows <= 20000 else 10)
468
+ num_of_folds = lambda rows: 2 if rows > 20000 else (4 if 1000 < rows <= 20000 else 10)
469
469
  return num_of_folds(rows)
470
470
 
471
471
  def _feature_selection_PCA(self):
@@ -783,7 +783,8 @@ class _DataPreparation:
783
783
  for col in data.columns:
784
784
  # Selecting columns that will be scaled
785
785
  # Exculding target_col and columns with single value
786
- if col not in ['id', self.target_column] and data.drop_duplicate(col).size > 1:
786
+ if col not in ['id', self.target_column] and \
787
+ data.drop_duplicate(col).size > 1:
787
788
  columns_to_scale.append(col)
788
789
 
789
790
  if feature_selection_mtd == "lasso":
@@ -40,6 +40,7 @@ from teradataml.common.garbagecollector import GarbageCollector
40
40
  from teradataml.dataframe.sql_functions import case
41
41
  from teradataml.hyperparameter_tuner.utils import _ProgressBar
42
42
  from teradataml.utils.validators import _Validators
43
+ from teradataml.common.utils import UtilFuncs
43
44
 
44
45
 
45
46
  class _FeatureEngineering:
@@ -273,7 +274,7 @@ class _FeatureEngineering:
273
274
  show_data=True)
274
275
  start_time = time.time()
275
276
  rows = self.data.shape[0]
276
- self.data=self.data.drop_duplicate()
277
+ self.data=self.data.drop_duplicate(self.data.columns)
277
278
  if rows != self.data.shape[0]:
278
279
  self._display_msg(msg=f'Updated dataset sample after removing {rows-self.data.shape[0]} duplicate records:',
279
280
  data=self.data,
@@ -565,11 +566,18 @@ class _FeatureEngineering:
565
566
 
566
567
  # Removing rows with missing target column value
567
568
  self.data = self.data.dropna(subset=[self.target_column])
569
+
570
+ params = {
571
+ "data": self.data,
572
+ "target_columns": self.data.columns,
573
+ "persist": True,
574
+ "display_table_name": False
575
+ }
568
576
 
569
- obj = ColumnSummary(data=self.data,
570
- target_columns=self.data.columns,
571
- volatile=self.volatile,
572
- persist=self.persist)
577
+ obj = ColumnSummary(**params)
578
+
579
+ # Adding transformed data containing table to garbage collector
580
+ GarbageCollector._add_to_garbagecollector(obj.result._table_name)
573
581
 
574
582
  cols_miss_val={}
575
583
  # Iterating over each row in the column summary result
@@ -704,7 +712,7 @@ class _FeatureEngineering:
704
712
  for key, val in self.imputation_cols.items():
705
713
 
706
714
  col_stat.append(key)
707
- if self.data_types[key] in ['float', 'int']:
715
+ if self.data_types[key] in ['float', 'int', 'decimal.Decimal']:
708
716
  val = skew_data[f'skew_{key}']
709
717
  # Median imputation method, if abs(skewness value) > 1
710
718
  if abs(val) > 1:
@@ -713,7 +721,7 @@ class _FeatureEngineering:
713
721
  else:
714
722
  stat.append('mean')
715
723
  # Mode imputation method, if categorical column
716
- else:
724
+ elif self.data_types[key] in ['str']:
717
725
  stat.append('mode')
718
726
 
719
727
  self._display_msg(msg="Columns with their imputation method:",