teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (88) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +196 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +79 -4
  6. teradataml/analytics/json_parser/metadata.py +12 -3
  7. teradataml/analytics/json_parser/utils.py +7 -2
  8. teradataml/analytics/sqle/__init__.py +1 -0
  9. teradataml/analytics/table_operator/__init__.py +1 -1
  10. teradataml/analytics/uaf/__init__.py +1 -1
  11. teradataml/analytics/utils.py +4 -0
  12. teradataml/automl/data_preparation.py +3 -2
  13. teradataml/automl/feature_engineering.py +15 -7
  14. teradataml/automl/model_training.py +39 -33
  15. teradataml/common/__init__.py +2 -1
  16. teradataml/common/constants.py +35 -0
  17. teradataml/common/garbagecollector.py +2 -1
  18. teradataml/common/messagecodes.py +8 -2
  19. teradataml/common/messages.py +3 -1
  20. teradataml/common/sqlbundle.py +25 -3
  21. teradataml/common/utils.py +134 -9
  22. teradataml/context/context.py +20 -10
  23. teradataml/data/SQL_Fundamentals.pdf +0 -0
  24. teradataml/data/dataframe_example.json +18 -2
  25. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  26. teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
  27. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  28. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  29. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  30. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  31. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  32. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  33. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  34. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  35. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  36. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  37. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  38. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  39. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  40. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  41. teradataml/data/medical_readings.csv +101 -0
  42. teradataml/data/patient_profile.csv +101 -0
  43. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  44. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  45. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  46. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  47. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  48. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  49. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  50. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  51. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  52. teradataml/data/target_udt_data.csv +8 -0
  53. teradataml/data/templates/open_source_ml.json +3 -2
  54. teradataml/data/vectordistance_example.json +4 -0
  55. teradataml/dataframe/dataframe.py +543 -175
  56. teradataml/dataframe/functions.py +553 -25
  57. teradataml/dataframe/sql.py +184 -15
  58. teradataml/dbutils/dbutils.py +556 -18
  59. teradataml/dbutils/filemgr.py +48 -1
  60. teradataml/lib/aed_0_1.dll +0 -0
  61. teradataml/opensource/__init__.py +1 -1
  62. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  63. teradataml/opensource/_lightgbm.py +950 -0
  64. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  65. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  66. teradataml/opensource/sklearn/__init__.py +0 -1
  67. teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
  68. teradataml/options/__init__.py +7 -23
  69. teradataml/options/configure.py +29 -3
  70. teradataml/scriptmgmt/UserEnv.py +3 -3
  71. teradataml/scriptmgmt/lls_utils.py +74 -21
  72. teradataml/store/__init__.py +13 -0
  73. teradataml/store/feature_store/__init__.py +0 -0
  74. teradataml/store/feature_store/constants.py +291 -0
  75. teradataml/store/feature_store/feature_store.py +2223 -0
  76. teradataml/store/feature_store/models.py +1505 -0
  77. teradataml/store/vector_store/__init__.py +1586 -0
  78. teradataml/table_operators/query_generator.py +3 -0
  79. teradataml/table_operators/table_operator_query_generator.py +3 -1
  80. teradataml/table_operators/table_operator_util.py +37 -38
  81. teradataml/table_operators/templates/dataframe_register.template +69 -0
  82. teradataml/utils/dtypes.py +4 -2
  83. teradataml/utils/validators.py +33 -1
  84. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
  85. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
  86. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  87. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  88. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -481,6 +481,9 @@ class QueryGenerator:
481
481
  return configure.read_nos_function_mapping.upper()
482
482
  elif "WriteNOS".lower() == function_name.lower():
483
483
  return configure.write_nos_function_mapping.upper()
484
+ # If Table Operator function is IMAGE2MATRIX, then return alias name as TD_IMAGE2MATRIX.
485
+ elif "IMAGE2MATRIX".lower() == function_name.lower():
486
+ return "TD_IMAGE2MATRIX"
484
487
 
485
488
  engine_name = UtilFuncs._get_engine_name(self._engine)
486
489
 
@@ -231,7 +231,9 @@ class TableOperatorQueryGenerator(QueryGenerator):
231
231
  using_clause = ""
232
232
  # If the function is a NOS function, then USING clause is needed.
233
233
  if self._function_name.lower() in [configure.write_nos_function_mapping.lower(),
234
- configure.read_nos_function_mapping.lower()]:
234
+ configure.read_nos_function_mapping.lower(),
235
+ "td_image2matrix"
236
+ ]:
235
237
  using_clause = "USING"
236
238
  invocation_sql = "{0}\n\t{1}{2}".format(invocation_sql, using_clause, self.__OTHER_ARG_CLAUSE)
237
239
 
@@ -24,6 +24,7 @@ from teradataml.utils.utils import execute_sql
24
24
  from teradataml.utils.validators import _Validators
25
25
  from functools import partial
26
26
  from inspect import isfunction, getsource
27
+ from pathlib import Path
27
28
 
28
29
 
29
30
  class _TableOperatorUtils:
@@ -281,7 +282,8 @@ class _TableOperatorUtils:
281
282
  """
282
283
  # Validate the user defined function.
283
284
 
284
- if self.operation == TableOperatorConstants.UDF_OP.value:
285
+ if self.operation in [TableOperatorConstants.UDF_OP.value,\
286
+ TableOperatorConstants.REGISTER_OP.value]:
285
287
  for udf_function in self.user_function:
286
288
  if not isfunction(udf_function):
287
289
  raise TypeError(Messages.get_message(
@@ -330,20 +332,30 @@ class _TableOperatorUtils:
330
332
  EXAMPLES:
331
333
  self.__create_user_script()
332
334
  """
333
- # Generate script name and alias, and add entry to a Garbage Collector.
334
- # script_entry is the string that is added to Garbage collector.
335
- # It has the format "<databasename>"."<file_id>".
336
- self.script_entry, self.script_alias, self.script_name, self.script_base_name = self.__get_script_name()
337
-
338
- # Get the converters to use with pandas.read_csv, and to correctly
339
- # typecast the numeric data.
340
- python_input_col_types = [UtilFuncs._teradata_type_to_python_type(col.type)
341
- for col in self.data._metaexpr.c]
342
- input_converters = UtilFuncs._get_pandas_converters(python_input_col_types)
343
-
344
- python_output_col_types = [UtilFuncs._teradata_type_to_python_type(type_)
345
- for type_ in list(self.returns.values())]
346
- output_converters = UtilFuncs._get_pandas_converters(python_output_col_types)
335
+ # If operation is register, then generate script name based on the
336
+ # user function name and return type.
337
+ # It has the format "tdml_udf_name_<registered_name>_udf_type_<return_type>_register.py"
338
+ if self.operation == TableOperatorConstants.REGISTER_OP.value:
339
+ registered_name = list(self.returns.keys())[0]
340
+ return_type = self.returns[registered_name]
341
+ self.script_name = "tdml_udf_name_{}_udf_type_{}_register.py".format(registered_name, return_type)
342
+ self.script_base_name = Path(self.script_name).stem
343
+ else:
344
+ # Generate script name and alias, and add entry to a Garbage Collector.
345
+ # script_entry is the string that is added to Garbage collector.
346
+ # It has the format "<databasename>"."<file_id>".
347
+ self.script_entry, self.script_alias, self.script_name, self.script_base_name = self.__get_script_name()
348
+
349
+ if self.operation not in [TableOperatorConstants.UDF_OP.value, TableOperatorConstants.REGISTER_OP.value]:
350
+ # Get the converters to use with pandas.read_csv, and to correctly
351
+ # typecast the numeric data.
352
+ python_input_col_types = [UtilFuncs._teradata_type_to_python_type(col.type)
353
+ for col in self.data._metaexpr.c]
354
+ input_converters = UtilFuncs._get_pandas_converters(python_input_col_types)
355
+
356
+ python_output_col_types = [UtilFuncs._teradata_type_to_python_type(type_)
357
+ for type_ in list(self.returns.values())]
358
+ output_converters = UtilFuncs._get_pandas_converters(python_output_col_types)
347
359
 
348
360
  # Create script in .teradataml directory.
349
361
  script_dir = GarbageCollector._get_temp_dir_name()
@@ -357,35 +369,16 @@ class _TableOperatorUtils:
357
369
  "templates")
358
370
  # Get the template.
359
371
  template = {TableOperatorConstants.APPLY_OP.value: TableOperatorConstants.APPLY_TEMPLATE.value,
360
- TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value}
372
+ TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value,
373
+ TableOperatorConstants.REGISTER_OP.value: TableOperatorConstants.REGISTER_TEMPLATE.value }
361
374
  template_name = template.get(self.operation, TableOperatorConstants.MAP_TEMPLATE.value)
362
375
  # Write to the script based on the template.
363
376
  try:
364
377
  with open(os.path.join(template_dir, template_name), 'r') as input_file:
365
378
  with open(self.script_path, 'w') as output_file:
366
379
  if self.operation == TableOperatorConstants.UDF_OP.value:
367
-
368
- # Function can have udf as decorator. Remove that.
369
- # The below notation
370
- # @udf
371
- # def to_upper(s):
372
- # return s.upper()
373
- # Then source code will be as it is.
374
- # But if below notation is used,
375
- # f = udf(to_upper)
376
- # Then source code will not have udf.
377
- # So, remove first line if it comes with first notation.
378
- # For both notations if in starting function defination have any extra space. Remove that.
379
- # If multiple UDF's are there append them as a single string.
380
380
 
381
- user_function_code = ""
382
- for udf_code in self.user_function:
383
- udf_code = getsource(udf_code)
384
- udf_code = udf_code.lstrip()
385
- if udf_code.startswith("@"):
386
- udf_code = udf_code[udf_code.find("\n")+1: ].lstrip()
387
- user_function_code += udf_code + '\n'
388
-
381
+ user_function_code = UtilFuncs._func_to_string(self.user_function)
389
382
  output_file.write(input_file.read().format(
390
383
  DELIMITER=self.delimiter,
391
384
  QUOTECHAR=self.quotechar,
@@ -396,6 +389,13 @@ class _TableOperatorUtils:
396
389
  COLUMNS_DEFINITIONS=json.dumps(self.columns_definitions),
397
390
  OUTPUT_TYPE_CONVERTERS=json.dumps(self.output_type_converters)
398
391
  ))
392
+ elif self.operation == TableOperatorConstants.REGISTER_OP.value:
393
+ # Get the source code of the user function.
394
+ user_function_code = UtilFuncs._func_to_string(self.user_function)
395
+ output_file.write(input_file.read().format(
396
+ FUNCTION_DEFINITION=user_function_code,
397
+ FUNCTION_NAME = self.user_function[0].__name__
398
+ ))
399
399
  else:
400
400
  # prepare script file from template file for maprow and mappartition.
401
401
  output_file.write(
@@ -494,7 +494,6 @@ class _TableOperatorUtils:
494
494
  script_name = script_alias # alias now contains extension also.
495
495
 
496
496
  # Extract the base name without extension.
497
- from pathlib import Path
498
497
  script_base_name = Path(script_alias).stem
499
498
  return script_entry, script_alias, script_name, script_base_name
500
499
 
@@ -0,0 +1,69 @@
1
+ import json
2
+ import sys, csv
3
+ import datetime
4
+ import urllib.parse
5
+
6
+ td_buffer = {{}}
7
+
8
+
9
+ {FUNCTION_DEFINITION}
10
+
11
+ # Decode the URL encoded string and store it back as dictionary.
12
+ dec = urllib.parse.unquote_plus(sys.argv[1])
13
+ script_data = json.loads(dec)
14
+
15
+ # Information that is required to help with the script usage.
16
+ # The delimiter to use with the input and output text.
17
+ delimiter = script_data["delimiter"]
18
+ # The quotechar to use.
19
+ quotechar = script_data["qoutechar"]
20
+ # The names of columns in the input teradataml DataFrame.
21
+ _input_columns = script_data["input_cols"]
22
+ # The names of columns in the output teradataml DataFrame.
23
+ _output_columns = script_data["output_cols"]
24
+ # The types of columns in the input/output teradataml DataFrame.
25
+ # The mapper of output column name to function arguments
26
+ function_args = script_data["function_args"]
27
+ # The definition for new columns in output.
28
+ columns_definitions = {{_output_columns[-1]: "{FUNCTION_NAME}"}}
29
+ output_type_converters = script_data["output_type_converters"]
30
+ for k,v in output_type_converters.items():
31
+ if v == 'datetime.date' or v == 'datetime.time' or v == 'datetime.datetime':
32
+ output_type_converters[k] = 'str'
33
+ output_type_converters = {{k:getattr(__builtins__, v) for k,v in output_type_converters.items()}}
34
+
35
+
36
+
37
+ # The entry point to the script.
38
+ if __name__ == "__main__":
39
+
40
+ records = csv.reader(sys.stdin.readlines(), delimiter=delimiter, quotechar=quotechar)
41
+ for record in records:
42
+ record = dict(zip(_input_columns, record))
43
+ out_rec = []
44
+ for column in _output_columns:
45
+
46
+ # If it is a new column, get the value from definition.
47
+ if column in columns_definitions:
48
+ f_args = tuple()
49
+ # Convert the argument types first.
50
+ for v in function_args[column]:
51
+ if v in _input_columns:
52
+ c_type_ = output_type_converters.get(v)
53
+ if record[v]:
54
+ # If it is a float, replace the empty character.
55
+ if c_type_.__name__ == 'float':
56
+ arg = output_type_converters.get(v)(record[v].replace(' ', ''))
57
+ else:
58
+ arg = output_type_converters.get(v)(record[v])
59
+ else:
60
+ arg = record[v]
61
+ else:
62
+ arg = v
63
+ f_args = f_args + (arg, )
64
+ func_ = globals()[columns_definitions[column]]
65
+ out_rec.append(output_type_converters[column](func_(*f_args)))
66
+ else:
67
+ out_rec.append(record[column])
68
+
69
+ print("{{}}".format(delimiter).join((str(i) for i in out_rec)))
@@ -641,11 +641,13 @@ class _Dtypes:
641
641
 
642
642
  """
643
643
  from teradataml.dataframe.dataframe import TDSeries, TDMatrix, TDGenSeries, TDAnalyticResult
644
+ from teradataml.store.feature_store.feature_store import Feature
644
645
  _DtypesMappers.JSON_TD_TO_PYTHON_TYPE_MAPPER.update({"SERIES": TDSeries,
645
646
  "MATRIX": TDMatrix,
646
647
  "ART": TDAnalyticResult,
647
- "GENSERIES": TDGenSeries})
648
-
648
+ "GENSERIES": TDGenSeries,
649
+ "COLUMN": (str, Feature),
650
+ "COLUMNS": (str, Feature)})
649
651
 
650
652
  return _DtypesMappers.JSON_TD_TO_PYTHON_TYPE_MAPPER.get(json_td_type.upper())
651
653
 
@@ -1,3 +1,4 @@
1
+ import enum
1
2
  import numbers
2
3
  import os
3
4
  import pandas as pd
@@ -11,6 +12,8 @@ from teradataml.options.configure import configure
11
12
  from teradataml.dataframe.sql_interfaces import ColumnExpression
12
13
  from functools import wraps, reduce
13
14
 
15
+ from teradataml.utils.internal_buffer import _InternalBuffer
16
+
14
17
 
15
18
  def skip_validation():
16
19
  """
@@ -545,7 +548,7 @@ class _Validators:
545
548
  raise TypeError("Third element in argument information matrix should be bool.")
546
549
 
547
550
  if not (isinstance(args[3], tuple) or isinstance(args[3], type) or
548
- isinstance(args[3], (_ListOf, _TupleOf))):
551
+ isinstance(args[3], (_ListOf, _TupleOf)) or isinstance(args[3], enum.EnumMeta)):
549
552
  err_msg = "Fourth element in argument information matrix should be a 'tuple of types' or 'type' type."
550
553
  raise TypeError(err_msg)
551
554
 
@@ -2274,4 +2277,33 @@ class _Validators:
2274
2277
  MessageCodes.INVALID_ARG_VALUE).format(ip_address, "ip_address",
2275
2278
  'of four numbers (each between 0 and 255) separated by periods'))
2276
2279
 
2280
+ return True
2281
+
2282
+
2283
+ @staticmethod
2284
+ @skip_validation()
2285
+ def _check_auth_token(func_name):
2286
+ """
2287
+ DESCRIPTION:
2288
+ Check if the user has set the authentication token.
2289
+
2290
+ PARAMETERS:
2291
+ func_name:
2292
+ Required Argument.
2293
+ Specifies the function name where the authentication token is required.
2294
+ Types: str
2295
+
2296
+ RAISES:
2297
+ TeradataMLException
2298
+
2299
+ RETURNS:
2300
+ None.
2301
+
2302
+ EXAMPLES:
2303
+ >>> _Validators._check_auth_token("udf")
2304
+ """
2305
+ if _InternalBuffer.get("auth_token") is None:
2306
+ raise TeradataMlException(Messages.get_message(MessageCodes.AUTH_TOKEN_REQUIRED,\
2307
+ func_name), MessageCodes.AUTH_TOKEN_REQUIRED)
2308
+
2277
2309
  return True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: teradataml
3
- Version: 20.0.0.2
3
+ Version: 20.0.0.3
4
4
  Summary: Teradata Vantage Python package for Advanced Analytics
5
5
  Home-page: http://www.teradata.com/
6
6
  Author: Teradata Corporation
@@ -17,8 +17,8 @@ Classifier: Topic :: Database :: Front-Ends
17
17
  Classifier: License :: Other/Proprietary License
18
18
  Requires-Python: >=3.8
19
19
  Description-Content-Type: text/markdown
20
- Requires-Dist: teradatasql (>=17.10.0.11)
21
- Requires-Dist: teradatasqlalchemy (>=20.0.0.2)
20
+ Requires-Dist: teradatasql (>=20.0.0.19)
21
+ Requires-Dist: teradatasqlalchemy (>=20.0.0.3)
22
22
  Requires-Dist: pandas (>=0.22)
23
23
  Requires-Dist: psutil
24
24
  Requires-Dist: requests (>=2.25.1)
@@ -28,6 +28,7 @@ Requires-Dist: imbalanced-learn (>=0.8.0)
28
28
  Requires-Dist: pyjwt (>=2.8.0)
29
29
  Requires-Dist: cryptography (>=42.0.5)
30
30
  Requires-Dist: sqlalchemy (>=2.0)
31
+ Requires-Dist: lightgbm (>=3.3.3)
31
32
 
32
33
  ## Teradata Python package for Advanced Analytics.
33
34
 
@@ -47,6 +48,187 @@ Copyright 2024, Teradata. All Rights Reserved.
47
48
  * [License](#license)
48
49
 
49
50
  ## Release Notes:
51
+
52
+ #### teradataml 20.00.00.03
53
+
54
+ * teradataml no longer supports setting the `auth_token` using `set_config_params()`. Users should use `set_auth_token()` to set the token.
55
+
56
+ * ##### New Features/Functionality
57
+ * ###### teradataml: DataFrame
58
+ * New Function
59
+ * `alias()` - Creates a DataFrame with alias name.
60
+ * New Properties
61
+ * `db_object_name` - Get the underlying database object name, on which DataFrame is created.
62
+
63
+ * ###### teradataml: GeoDataFrame
64
+ * New Function
65
+ * `alias()` - Creates a GeoDataFrame with alias name.
66
+
67
+ * ###### teradataml: DataFrameColumn a.k.a. ColumnExpression
68
+ * _Arithmetic Functions_
69
+ * `DataFrameColumn.isnan()` - Function evaluates expression to determine if the floating-point
70
+ argument is a NaN (Not-a-Number) value.
71
+ * `DataFrameColumn.isinf()` - Function evaluates expression to determine if the floating-point
72
+ argument is an infinite number.
73
+ * `DataFrameColumn.isfinite()` - Function evaluates expression to determine if it is a finite
74
+ floating value.
75
+
76
+ * ###### FeatureStore - handles feature management within the Vantage environment
77
+ * FeatureStore Components
78
+ * Feature - Represents a feature which is used in ML Modeling.
79
+ * Entity - Represents the columns which serves as uniqueness for the data used in ML Modeling.
80
+ * DataSource - Represents the source of Data.
81
+ * FeatureGroup - Collection of Feature, Entity and DataSource.
82
+ * Methods
83
+ * `apply()` - Adds Feature, Entity, DataSource to a FeatureGroup.
84
+ * `from_DataFrame()` - Creates a FeatureGroup from teradataml DataFrame.
85
+ * `from_query()` - Creates a FeatureGroup using a SQL query.
86
+ * `remove()` - Removes Feature, Entity, or DataSource from a FeatureGroup.
87
+ * `reset_labels()` - Removes the labels assigned to the FeatureGroup, that are set using `set_labels()`.
88
+ * `set_labels()` - Sets the Features as labels for a FeatureGroup.
89
+ * Properties
90
+ * `features` - Get the features of a FeatureGroup.
91
+ * `labels` - Get the labels of FeatureGroup.
92
+ * FeatureStore
93
+ * Methods
94
+ * `apply()` - Adds Feature, Entity, DataSource, FeatureGroup to FeatureStore.
95
+ * `archive_data_source()` - Archives a specified DataSource from a FeatureStore.
96
+ * `archive_entity()` - Archives a specified Entity from a FeatureStore.
97
+ * `archive_feature()` - Archives a specified Feature from a FeatureStore.
98
+ * `archive_feature_group()` - Archives a specified FeatureGroup from a FeatureStore. Method archives underlying Feature, Entity, DataSource also.
99
+ * `delete_data_source()` - Deletes an archived DataSource.
100
+ * `delete_entity()` - Deletes an archived Entity.
101
+ * `delete_feature()` - Deletes an archived Feature.
102
+ * `delete_feature_group()` - Deletes an archived FeatureGroup.
103
+ * `get_data_source()` - Get the DataSources associated with FeatureStore.
104
+ * `get_dataset()` - Get the teradataml DataFrame based on Features, Entities and DataSource from FeatureGroup.
105
+ * `get_entity()` - Get the Entity associated with FeatureStore.
106
+ * `get_feature()` - Get the Feature associated with FeatureStore.
107
+ * `get_feature_group()` - Get the FeatureGroup associated with FeatureStore.
108
+ * `list_data_sources()` - List DataSources.
109
+ * `list_entities()` - List Entities.
110
+ * `list_feature_groups()` - List FeatureGroups.
111
+ * `list_features()` - List Features.
112
+ * `list_repos()` - List available repos which are configured for FeatureStore.
113
+ * `repair()` - Repairs the underlying FeatureStore schema on database.
114
+ * `set_features_active()` - Marks the Features as active.
115
+ * `set_features_inactive()` - Marks the Features as inactive.
116
+ * `setup()` - Setup the FeatureStore for a repo.
117
+ * Property
118
+ * `repo` - Property for FeatureStore repo.
119
+ * `grant` - Property to Grant access on FeatureStore to user.
120
+ * `revoke` - Property to Revoke access on FeatureStore from user.
121
+
122
+ * ###### teradataml: Table Operator Functions
123
+ * `Image2Matrix()` - Converts an image into a matrix.
124
+
125
+ * ###### teradataml: SQLE Engine Analytic Functions
126
+ * New Analytics Database Analytic Functions:
127
+ * `CFilter()`
128
+ * `NaiveBayes()`
129
+ * `TDNaiveBayesPredict()`
130
+ * `Shap()`
131
+ * `SMOTE()`
132
+
133
+ * ###### teradataml: Unbounded Array Framework (UAF) Functions
134
+ * New Unbounded Array Framework(UAF) Functions:
135
+ * `CopyArt()`
136
+
137
+ * ###### General functions
138
+ * Vantage File Management Functions
139
+ * `list_files()` - List the installed files in Database.
140
+
141
+ * ###### OpensourceML: LightGBM
142
+ * teradataml adds support for lightGBM package through `OpensourceML` (`OpenML`) feature.
143
+ The following functionality is added in the current release:
144
+ * `td_lightgbm` - Interface object to run lightgbm functions and classes through Teradata Vantage.
145
+ Example usage below:
146
+ ```
147
+ from teradataml import td_lightgbm, DataFrame
148
+
149
+ df_train = DataFrame("multi_model_classification")
150
+
151
+ feature_columns = ["col1", "col2", "col3", "col4"]
152
+ label_columns = ["label"]
153
+ part_columns = ["partition_column_1", "partition_column_2"]
154
+
155
+ df_x = df_train.select(feature_columns)
156
+ df_y = df_train.select(label_columns)
157
+
158
+ # Dataset creation.
159
+ # Single model case.
160
+ obj_s = td_lightgbm.Dataset(df_x, df_y, silent=True, free_raw_data=False)
161
+
162
+ # Multi model case.
163
+ obj_m = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
164
+ obj_m_v = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
165
+
166
+ ## Model training.
167
+ # Single model case.
168
+ opt = td_lightgbm.train(params={}, train_set = obj_s, num_boost_round=30)
169
+
170
+ opt.predict(data=df_x, num_iteration=20, pred_contrib=True)
171
+
172
+ # Multi model case.
173
+ opt = td_lightgbm.train(params={}, train_set = obj_m, num_boost_round=30,
174
+ callbacks=[td_lightgbm.record_evaluation(rec)],
175
+ valid_sets=[obj_m_v, obj_m_v])
176
+
177
+ # Passing `label` argument to get it returned in output DataFrame.
178
+ opt.predict(data=df_x, label=df_y, num_iteration=20)
179
+
180
+ ```
181
+ * Added support for accessing scikit-learn APIs using exposed inteface object `td_lightgbm`.
182
+
183
+ Refer Teradata Python Package User Guide for more details of this feature, arguments, usage, examples and supportability in Vantage.
184
+
185
+ * ###### teradataml: Functions
186
+ * `register()` - Registers a user defined function (UDF).
187
+ * `call_udf()` - Calls a registered user defined function (UDF) and returns ColumnExpression.
188
+ * `list_udfs()` - List all the UDFs registered using 'register()' function.
189
+ * `deregister()` - Deregisters a user defined function (UDF).
190
+
191
+ * ###### teradataml: Options
192
+ * Configuration Options
193
+ * `table_operator` - Specifies the name of table operator.
194
+
195
+ * ##### Updates
196
+ * ###### General functions
197
+ * `set_auth_token()` - Added `base_url` parameter which accepts the CCP url.
198
+ 'ues_url' will be deprecated in future and users
199
+ will need to specify 'base_url' instead.
200
+
201
+ * ###### teradataml: DataFrame function
202
+ * `join()`
203
+ * Now supports compound ColumExpression having more than one binary operator in `on` argument.
204
+ * Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
205
+ * self-join now expects aliased DataFrame in `other` argument.
206
+
207
+ * ###### teradataml: GeoDataFrame function
208
+ * `join()`
209
+ * Now supports compound ColumExpression having more than one binary operator in `on` argument.
210
+ * Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
211
+ * self-join now expects aliased DataFrame in `other` argument.
212
+
213
+ * ###### teradataml: Unbounded Array Framework (UAF) Functions
214
+ * `SAX()` - Default value added for `window_size` and `output_frequency`.
215
+ * `DickeyFuller()`
216
+ * Supports TDAnalyticResult as input.
217
+ * Default value added for `max_lags`.
218
+ * Removed parameter `drift_trend_formula`.
219
+ * Updated permitted values for `algorithm`.
220
+
221
+ * ##### teradataml: AutoML
222
+ * `AutoML`, `AutoRegressor` and `AutoClassifier`
223
+ * Now supports DECIMAL datatype as input.
224
+
225
+ * ##### teradataml: SQLE Engine Analytic Functions
226
+ * `TextParser()`
227
+ * Argument name `covert_to_lowercase` changed to `convert_to_lowercase`.
228
+
229
+ * ##### Bug Fixes
230
+ * `db_list_tables()` now returns correct results when '%' is used.
231
+
50
232
  #### teradataml 20.00.00.02
51
233
 
52
234
  * teradataml will no longer be supported with SQLAlchemy < 2.0.
@@ -115,6 +297,10 @@ Copyright 2024, Teradata. All Rights Reserved.
115
297
  * `ues_url`
116
298
  * `auth_token`
117
299
 
300
+ * #### teradata DataFrame
301
+ * `to_pandas()` - Function returns the pandas dataframe with Decimal columns types as float instead of object.
302
+ If user want datatype to be object, set argument `coerce_float` to False.
303
+
118
304
  * ###### Database Utility
119
305
  * `list_td_reserved_keywords()` - Accepts a list of strings as argument.
120
306
 
@@ -133,7 +319,7 @@ Copyright 2024, Teradata. All Rights Reserved.
133
319
  * ##### Bug Fixes
134
320
  * KNN `predict()` function can now predict on test data which does not contain target column.
135
321
  * Metrics functions are supported on the Lake system.
136
- * The following OpensourceML functions from different sklearn modules are fixed.
322
+ * The following OpensourceML functions from different sklearn modules in single model case are fixed.
137
323
  * `sklearn.ensemble`:
138
324
  * ExtraTreesClassifier - `apply()`
139
325
  * ExtraTreesRegressor - `apply()`
@@ -146,12 +332,21 @@ Copyright 2024, Teradata. All Rights Reserved.
146
332
  * Nystroem - `transform()`, `fit_transform()`
147
333
  * PolynomialCountSketch - `transform()`, `fit_transform()`
148
334
  * RBFSampler - `transform()`, `fit_transform()`
149
- * `sklearn.neighbours`:
335
+ * `sklearn.neighbors`:
150
336
  * KNeighborsTransformer - `transform()`, `fit_transform()`
151
337
  * RadiusNeighborsTransformer - `transform()`, `fit_transform()`
152
338
  * `sklearn.preprocessing`:
153
339
  * KernelCenterer - `transform()`
154
340
  * OneHotEncoder - `transform()`, `inverse_transform()`
341
+ * The following OpensourceML functions from different sklearn modules in multi model case are fixed.
342
+ * `sklearn.feature_selection`:
343
+ * SelectFpr - `transform()`, `fit_transform()`, `inverse_transform()`
344
+ * SelectFdr - `transform()`, `fit_transform()`, `inverse_transform()`
345
+ * SelectFromModel - `transform()`, `fit_transform()`, `inverse_transform()`
346
+ * SelectFwe - `transform()`, `fit_transform()`, `inverse_transform()`
347
+ * RFECV - `transform()`, `fit_transform()`, `inverse_transform()`
348
+ * `sklearn.clustering`:
349
+ * Birch - `transform()`, `fit_transform()`
155
350
  * OpensourceML returns teradataml objects for model attributes and functions instead of sklearn
156
351
  objects so that the user can perform further operations like `score()`, `predict()` etc on top
157
352
  of the returned objects.