teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +315 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +95 -8
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/metadata.py +12 -3
  8. teradataml/analytics/json_parser/utils.py +7 -2
  9. teradataml/analytics/sqle/__init__.py +5 -1
  10. teradataml/analytics/table_operator/__init__.py +1 -1
  11. teradataml/analytics/uaf/__init__.py +1 -1
  12. teradataml/analytics/utils.py +4 -0
  13. teradataml/analytics/valib.py +18 -4
  14. teradataml/automl/__init__.py +51 -6
  15. teradataml/automl/data_preparation.py +59 -35
  16. teradataml/automl/data_transformation.py +58 -33
  17. teradataml/automl/feature_engineering.py +27 -12
  18. teradataml/automl/model_training.py +73 -46
  19. teradataml/common/constants.py +88 -29
  20. teradataml/common/garbagecollector.py +2 -1
  21. teradataml/common/messagecodes.py +19 -3
  22. teradataml/common/messages.py +6 -1
  23. teradataml/common/sqlbundle.py +64 -12
  24. teradataml/common/utils.py +246 -47
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +161 -27
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/byom_example.json +11 -0
  29. teradataml/data/dataframe_example.json +18 -2
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  37. teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
  38. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  39. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  40. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  41. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  42. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  43. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  44. teradataml/data/hnsw_alter_data.csv +5 -0
  45. teradataml/data/hnsw_data.csv +10 -0
  46. teradataml/data/jsons/byom/h2opredict.json +1 -1
  47. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  48. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  49. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  50. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  51. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  52. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  53. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  54. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  55. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  56. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  57. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  58. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  59. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  60. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  61. teradataml/data/medical_readings.csv +101 -0
  62. teradataml/data/patient_profile.csv +101 -0
  63. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  64. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  65. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  66. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  67. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  68. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  69. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  70. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  71. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  72. teradataml/data/target_udt_data.csv +8 -0
  73. teradataml/data/templates/open_source_ml.json +3 -2
  74. teradataml/data/teradataml_example.json +8 -0
  75. teradataml/data/vectordistance_example.json +4 -0
  76. teradataml/dataframe/copy_to.py +8 -3
  77. teradataml/dataframe/data_transfer.py +11 -1
  78. teradataml/dataframe/dataframe.py +1049 -285
  79. teradataml/dataframe/dataframe_utils.py +152 -20
  80. teradataml/dataframe/functions.py +578 -35
  81. teradataml/dataframe/setop.py +11 -6
  82. teradataml/dataframe/sql.py +185 -16
  83. teradataml/dbutils/dbutils.py +1049 -115
  84. teradataml/dbutils/filemgr.py +48 -1
  85. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  86. teradataml/lib/aed_0_1.dll +0 -0
  87. teradataml/opensource/__init__.py +1 -1
  88. teradataml/opensource/_base.py +1466 -0
  89. teradataml/opensource/_class.py +464 -0
  90. teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
  91. teradataml/opensource/_lightgbm.py +949 -0
  92. teradataml/opensource/_sklearn.py +1008 -0
  93. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
  94. teradataml/options/__init__.py +54 -38
  95. teradataml/options/configure.py +131 -27
  96. teradataml/options/display.py +13 -2
  97. teradataml/plot/axis.py +47 -8
  98. teradataml/plot/figure.py +33 -0
  99. teradataml/plot/plot.py +63 -13
  100. teradataml/scriptmgmt/UserEnv.py +5 -5
  101. teradataml/scriptmgmt/lls_utils.py +130 -40
  102. teradataml/store/__init__.py +12 -0
  103. teradataml/store/feature_store/__init__.py +0 -0
  104. teradataml/store/feature_store/constants.py +291 -0
  105. teradataml/store/feature_store/feature_store.py +2318 -0
  106. teradataml/store/feature_store/models.py +1505 -0
  107. teradataml/table_operators/Apply.py +32 -18
  108. teradataml/table_operators/Script.py +3 -1
  109. teradataml/table_operators/TableOperator.py +3 -1
  110. teradataml/table_operators/query_generator.py +3 -0
  111. teradataml/table_operators/table_operator_query_generator.py +3 -1
  112. teradataml/table_operators/table_operator_util.py +37 -38
  113. teradataml/table_operators/templates/dataframe_register.template +69 -0
  114. teradataml/utils/dtypes.py +51 -2
  115. teradataml/utils/internal_buffer.py +18 -0
  116. teradataml/utils/validators.py +99 -8
  117. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
  118. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
  119. teradataml/libaed_0_1.dylib +0 -0
  120. teradataml/libaed_0_1.so +0 -0
  121. teradataml/opensource/sklearn/__init__.py +0 -1
  122. teradataml/opensource/sklearn/_class.py +0 -255
  123. teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
  124. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  125. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  126. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -149,7 +149,7 @@ def __check_concat_compatibility(df_list, join, sort, ignore_index):
149
149
  # Iterate on all DFs to be applied for set operation.
150
150
  for df in dfs_to_operate_on:
151
151
  # Process each column in the DF of the iteration.
152
- for c in df._metaexpr.t.c:
152
+ for c in df._metaexpr.c:
153
153
  col_name = c.name
154
154
  # Process the column name if it is not already processed.
155
155
  # Processing of set operation is column name based so if the DF in the nth iteration had column 'xyz',
@@ -193,6 +193,8 @@ def __check_concat_compatibility(df_list, join, sort, ignore_index):
193
193
  col_dict[col_name]['col_present'] = col_present_in_dfs
194
194
  # The type to be used for the column is the one of the first DF it is present in.
195
195
  col_dict[col_name]['col_type'] = col_types_in_dfs[0]
196
+ # Column name stored with quotes if required.
197
+ col_dict[col_name]['name'] = c.compile()
196
198
 
197
199
  # If the type of the column in all DFs is not the same, then the operation is not lazy.
198
200
  if not all(ctype == col_dict[col_name]['col_type']
@@ -217,6 +219,8 @@ def __check_concat_compatibility(df_list, join, sort, ignore_index):
217
219
  col_dict[col_name]['col_present'] = col_present_in_dfs
218
220
  # The type to be used for the column is the one of the first DF it is present in.
219
221
  col_dict[col_name]['col_type'] = non_none_type_to_add
222
+ # Column name stored with quotes if required.
223
+ col_dict[col_name]['name'] = c.compile()
220
224
 
221
225
  # If the type of the column in all DFs is not the same, then the operation is not lazy.
222
226
  if not all(True if ctype is None else ctype == non_none_type_to_add
@@ -667,15 +671,16 @@ def concat(df_list, join='OUTER', allow_duplicates=True, sort=False, ignore_inde
667
671
 
668
672
  # Now create the list of columns for each DataFrame to concatenate
669
673
  type_compiler = td_type_compiler(td_dialect)
674
+
670
675
  for col_name, value in master_columns_dict.items():
671
676
  for i in range(len(col_list)):
677
+ # Quoting is already done for column names if column name starts with number or it is reserved keywords.
678
+ # Here checking again if it is teradata keyword or not for quotes.
679
+ column_name = UtilFuncs._process_for_teradata_keyword(value['name'])
672
680
  if not value['col_present'][i]:
673
- col_list[i].append('CAST(NULL as {}) as {}'.format(type_compiler.process(value['col_type']),
674
- UtilFuncs._teradata_quote_arg(col_name, "\"",
675
- False)))
681
+ col_list[i].append('CAST(NULL as {}) as {}'.format(type_compiler.process(value['col_type']), column_name))
676
682
  else:
677
- col_name = UtilFuncs._process_for_teradata_keyword(col_name)
678
- col_list[i].append(col_name)
683
+ col_list[i].append(column_name)
679
684
 
680
685
  input_table_columns = []
681
686
  for i in range(len(col_list)):
@@ -222,6 +222,10 @@ class _MetaExpression(object):
222
222
  def __repr__(self):
223
223
  return repr(self.__t)
224
224
 
225
+ def _get_table_expr(self):
226
+ return self.__t
227
+
228
+
225
229
  class _PandasTableExpression(TableExpression):
226
230
 
227
231
  def _assign(self, drop_columns, **kw):
@@ -261,7 +265,7 @@ class _PandasTableExpression(TableExpression):
261
265
 
262
266
  existing = [(c.name, c) for c in self.c]
263
267
  new = [(label, expression) for label, expression in kw.items() if label not in current]
264
- new = sorted(new, key = lambda x: x[0])
268
+ new = sorted(new, key=lambda x: x[0])
265
269
 
266
270
  for alias, expression in existing + new:
267
271
  if drop_columns and alias not in kw:
@@ -484,6 +488,7 @@ class _SQLTableExpression(_PandasTableExpression):
484
488
  columns = []
485
489
  for c in kw['column_order']:
486
490
  name = c.strip()
491
+ # Get case-insensitive column names from Table object.
487
492
  col = table.c.get(name, table.c.get(name.lower(), table.c.get(name.upper())))
488
493
 
489
494
  if col is None:
@@ -5473,7 +5478,8 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5473
5478
  self._env_name = kw.get("env_name", None)
5474
5479
  self._delimiter = kw.get("delimiter", None)
5475
5480
  self._quotechar = kw.get("quotechar", None)
5476
- self.alias_name = self.compile() if self._udf is None else None
5481
+ self._udf_script = kw.get("udf_script", None)
5482
+ self.alias_name = self.compile() if (self._udf or self._udf_script) is None else None
5477
5483
 
5478
5484
  @property
5479
5485
  def expression(self):
@@ -5653,23 +5659,23 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5653
5659
  """
5654
5660
  Calls the compile method of the underlying sqlalchemy.Column
5655
5661
  """
5656
- if len(kw) == 0:
5657
- kw = dict({'dialect': td_dialect(),
5658
- 'compile_kwargs':
5659
- {
5660
- 'include_table': False,
5661
- 'literal_binds': True
5662
- }
5663
- })
5664
-
5665
- return str(self.expression.compile(*args, **kw))
5662
+ kw_new = dict({'dialect': td_dialect(),
5663
+ 'compile_kwargs':
5664
+ {
5665
+ 'include_table': False,
5666
+ 'literal_binds': True
5667
+ }
5668
+ })
5669
+ if len(kw) != 0:
5670
+ kw_new.update(kw)
5671
+ return str(self.expression.compile(*args, **kw_new))
5666
5672
 
5667
5673
  def compile_label(self, label):
5668
5674
  """
5669
5675
  DESCRIPTION:
5670
5676
  Compiles expression with label, by calling underlying sqlalchemy methods.
5671
5677
 
5672
- PARAMETES:
5678
+ PARAMETERS:
5673
5679
  label:
5674
5680
  Required Argument.
5675
5681
  Specifies the label to be used to alias the compiled expression.
@@ -5699,7 +5705,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5699
5705
  with the "value". Use this function either to replace or remove
5700
5706
  NA from Column.
5701
5707
 
5702
- PARAMETES:
5708
+ PARAMETERS:
5703
5709
  value:
5704
5710
  Required Argument.
5705
5711
  Specifies the replacement value for null values in the column.
@@ -6186,12 +6192,19 @@ class _SQLColumnExpression(_LogicalColumnExpression,
6186
6192
  # If user has not passed any type, then set it to
6187
6193
  # NullType().
6188
6194
  type = sqlalc.sql.sqltypes.NullType()
6189
-
6195
+ # Boolean flag to treat function as an instance method.
6196
+ function_has_col_caller = column_function
6190
6197
  # Generate the function syntax based on whether the
6191
6198
  # function is column function or not.
6192
6199
  if column_function:
6193
6200
  name = quoted_name("{}.{}".format(col_name, func_name),
6194
6201
  False)
6202
+ # Dynamic function gets called on teradataml._SQLColumnExpression type object.
6203
+ # 'expression' attribute of _SQLColumnExpression object holds
6204
+ # corresponding SQLAlchemy.Expression type object.
6205
+ # SQLAlchemy.Expression type object should be available from FunctionElement.
6206
+ # This 'func_caller' attribute points to that Expression object.
6207
+ func_caller = self.expression
6195
6208
  else:
6196
6209
  name = quoted_name(func_name, False)
6197
6210
 
@@ -10809,4 +10822,160 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10809
10822
  whens = case([((self != 0) & (base != 0) & (base.ln() != 0),
10810
10823
  (self.ln() / base.ln()).cast(FLOAT))])
10811
10824
 
10812
- return whens
10825
+ return whens
10826
+
10827
+ def isnan(self):
10828
+ """
10829
+ DESCRIPTION:
10830
+ Function evaluates a variable or expression to determine if the
10831
+ floating-point argument is a NaN (Not-a-Number) value. When a database
10832
+ table contains a NaN value, the data is undefined and unrepresentable
10833
+ in floating-point arithmetic. For example, division by 0, or the square root
10834
+ of a negative number would return a NaN result.
10835
+
10836
+ RETURNS:
10837
+ ColumnExpression.
10838
+
10839
+ EXAMPLES:
10840
+ # Load the data to run the example.
10841
+ >>> load_example_data("teradataml","titanic")
10842
+
10843
+ # Create a DataFrame on 'titanic' table.
10844
+ >>> titanic = DataFrame.from_table('titanic')
10845
+ >>> df = titanic.select(["passenger", "age", "fare"])
10846
+ >>> print(df)
10847
+ age fare
10848
+ passenger
10849
+ 326 36.0 135.6333
10850
+ 183 9.0 31.3875
10851
+ 652 18.0 23.0000
10852
+ 40 14.0 11.2417
10853
+ 774 NaN 7.2250
10854
+ 366 30.0 7.2500
10855
+ 509 28.0 22.5250
10856
+ 795 25.0 7.8958
10857
+ 61 22.0 7.2292
10858
+ 469 NaN 7.7250
10859
+ >>>
10860
+
10861
+ # Example 1: Find whether 'fare' column contains NaN values or not.
10862
+ >>> nan_df = df.assign(nanornot = df.fare.isnan())
10863
+ >>> print(nan_df)
10864
+ age fare nanornot
10865
+ passenger
10866
+ 326 36.0 135.6333 0
10867
+ 183 9.0 31.3875 0
10868
+ 652 18.0 23.0000 0
10869
+ 40 14.0 11.2417 0
10870
+ 774 NaN 7.2250 0
10871
+ 366 30.0 7.2500 0
10872
+ 509 28.0 22.5250 0
10873
+ 795 25.0 7.8958 0
10874
+ 61 22.0 7.2292 0
10875
+ 469 NaN 7.7250 0
10876
+ >>>
10877
+ """
10878
+ return _SQLColumnExpression(literal_column(f"TD_ISNAN({self.compile()})"), type=INTEGER)
10879
+
10880
+ def isinf(self):
10881
+ """
10882
+ DESCRIPTION:
10883
+ Function evaluates a variable or expression to determine if the
10884
+ floating-point argument is an infinite number. This function determines
10885
+ if a database table contains positive or negative infinite values.
10886
+
10887
+ RETURNS:
10888
+ ColumnExpression.
10889
+
10890
+ EXAMPLES:
10891
+ # Load the data to run the example.
10892
+ >>> load_example_data("teradataml","titanic")
10893
+
10894
+ # Create a DataFrame on 'titanic' table.
10895
+ >>> titanic = DataFrame.from_table('titanic')
10896
+ >>> df = titanic.select(["passenger", "age", "fare"])
10897
+ >>> print(df)
10898
+ age fare
10899
+ passenger
10900
+ 326 36.0 135.6333
10901
+ 183 9.0 31.3875
10902
+ 652 18.0 23.0000
10903
+ 40 14.0 11.2417
10904
+ 774 NaN 7.2250
10905
+ 366 30.0 7.2500
10906
+ 509 28.0 22.5250
10907
+ 795 25.0 7.8958
10908
+ 61 22.0 7.2292
10909
+ 469 NaN 7.7250
10910
+ >>>
10911
+
10912
+ # Example 1: Find whether 'fare' column contains infinity values or not.
10913
+ >>> inf_df = df.assign(infornot = df.fare.isinf())
10914
+ >>> print(inf_df)
10915
+ age fare infornot
10916
+ passenger
10917
+ 326 36.0 135.6333 0
10918
+ 183 9.0 31.3875 0
10919
+ 652 18.0 23.0000 0
10920
+ 40 14.0 11.2417 0
10921
+ 774 NaN 7.2250 0
10922
+ 366 30.0 7.2500 0
10923
+ 509 28.0 22.5250 0
10924
+ 795 25.0 7.8958 0
10925
+ 61 22.0 7.2292 0
10926
+ 469 NaN 7.7250 0
10927
+ >>>
10928
+ """
10929
+ return _SQLColumnExpression(literal_column(f"TD_ISINF({self.compile()})"), type=INTEGER)
10930
+
10931
+ def isfinite(self):
10932
+ """
10933
+ DESCRIPTION:
10934
+ Function evaluates a variable or expression to determine if
10935
+ it is a finite floating value. A finite floating value is not
10936
+ a NaN (Not a Number) value and is not an infinity value.
10937
+
10938
+ RETURNS:
10939
+ ColumnExpression.
10940
+
10941
+ EXAMPLES:
10942
+ # Load the data to run the example.
10943
+ >>> load_example_data("teradataml","titanic")
10944
+
10945
+ # Create a DataFrame on 'titanic' table.
10946
+ >>> titanic = DataFrame.from_table('titanic')
10947
+ >>> df = titanic.select(["passenger", "age", "fare"])
10948
+ >>> print(df)
10949
+ age fare
10950
+ passenger
10951
+ 326 36.0 135.6333
10952
+ 183 9.0 31.3875
10953
+ 652 18.0 23.0000
10954
+ 40 14.0 11.2417
10955
+ 774 NaN 7.2250
10956
+ 366 30.0 7.2500
10957
+ 509 28.0 22.5250
10958
+ 795 25.0 7.8958
10959
+ 61 22.0 7.2292
10960
+ 469 NaN 7.7250
10961
+ >>>
10962
+
10963
+ # Example 1: Find whether 'fare' column contains finite values or not.
10964
+ >>> finite_df = df.assign(finiteornot = df.fare.isfinite())
10965
+ >>> print(finite_df)
10966
+ age fare finiteornot
10967
+ passenger
10968
+ 530 23.0 11.500 1
10969
+ 591 35.0 7.125 1
10970
+ 387 1.0 46.900 1
10971
+ 856 18.0 9.350 1
10972
+ 244 22.0 7.125 1
10973
+ 713 48.0 52.000 1
10974
+ 448 34.0 26.550 1
10975
+ 122 NaN 8.050 1
10976
+ 734 23.0 13.000 1
10977
+ 265 NaN 7.750 1
10978
+ >>>
10979
+
10980
+ """
10981
+ return _SQLColumnExpression(literal_column(f"TD_ISFINITE({self.compile()})"), type=INTEGER)