teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (108) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +71 -0
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +51 -24
  6. teradataml/analytics/json_parser/utils.py +11 -17
  7. teradataml/automl/__init__.py +103 -48
  8. teradataml/automl/data_preparation.py +55 -37
  9. teradataml/automl/data_transformation.py +131 -69
  10. teradataml/automl/feature_engineering.py +117 -185
  11. teradataml/automl/feature_exploration.py +9 -2
  12. teradataml/automl/model_evaluation.py +13 -25
  13. teradataml/automl/model_training.py +214 -75
  14. teradataml/catalog/model_cataloging_utils.py +1 -1
  15. teradataml/clients/auth_client.py +133 -0
  16. teradataml/common/aed_utils.py +3 -2
  17. teradataml/common/constants.py +11 -6
  18. teradataml/common/garbagecollector.py +5 -0
  19. teradataml/common/messagecodes.py +3 -1
  20. teradataml/common/messages.py +2 -1
  21. teradataml/common/utils.py +6 -0
  22. teradataml/context/context.py +49 -29
  23. teradataml/data/advertising.csv +201 -0
  24. teradataml/data/bank_marketing.csv +11163 -0
  25. teradataml/data/bike_sharing.csv +732 -0
  26. teradataml/data/boston2cols.csv +721 -0
  27. teradataml/data/breast_cancer.csv +570 -0
  28. teradataml/data/customer_segmentation_test.csv +2628 -0
  29. teradataml/data/customer_segmentation_train.csv +8069 -0
  30. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
  31. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
  32. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
  33. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
  34. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
  35. teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
  36. teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
  37. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
  38. teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
  39. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
  40. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
  41. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
  42. teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
  43. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
  44. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
  45. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
  46. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
  47. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
  48. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
  49. teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
  50. teradataml/data/glm_example.json +28 -1
  51. teradataml/data/housing_train_segment.csv +201 -0
  52. teradataml/data/insect2Cols.csv +61 -0
  53. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
  54. teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
  55. teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
  56. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
  57. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
  58. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
  59. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
  60. teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
  61. teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
  62. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
  63. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
  64. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
  65. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
  66. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
  67. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
  68. teradataml/data/kmeans_example.json +5 -0
  69. teradataml/data/kmeans_table.csv +10 -0
  70. teradataml/data/onehot_encoder_train.csv +4 -0
  71. teradataml/data/openml_example.json +29 -0
  72. teradataml/data/scale_attributes.csv +3 -0
  73. teradataml/data/scale_example.json +52 -1
  74. teradataml/data/scale_input_part_sparse.csv +31 -0
  75. teradataml/data/scale_input_partitioned.csv +16 -0
  76. teradataml/data/scale_input_sparse.csv +11 -0
  77. teradataml/data/scale_parameters.csv +3 -0
  78. teradataml/data/scripts/deploy_script.py +20 -1
  79. teradataml/data/scripts/sklearn/sklearn_fit.py +23 -27
  80. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +20 -28
  81. teradataml/data/scripts/sklearn/sklearn_function.template +13 -18
  82. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
  83. teradataml/data/scripts/sklearn/sklearn_neighbors.py +18 -27
  84. teradataml/data/scripts/sklearn/sklearn_score.py +20 -29
  85. teradataml/data/scripts/sklearn/sklearn_transform.py +30 -38
  86. teradataml/data/teradataml_example.json +77 -0
  87. teradataml/data/ztest_example.json +16 -0
  88. teradataml/dataframe/copy_to.py +8 -3
  89. teradataml/dataframe/data_transfer.py +120 -61
  90. teradataml/dataframe/dataframe.py +102 -17
  91. teradataml/dataframe/dataframe_utils.py +47 -9
  92. teradataml/dataframe/fastload.py +272 -89
  93. teradataml/dataframe/sql.py +84 -0
  94. teradataml/dbutils/dbutils.py +2 -2
  95. teradataml/lib/aed_0_1.dll +0 -0
  96. teradataml/opensource/sklearn/_sklearn_wrapper.py +102 -55
  97. teradataml/options/__init__.py +13 -4
  98. teradataml/options/configure.py +27 -6
  99. teradataml/scriptmgmt/UserEnv.py +19 -16
  100. teradataml/scriptmgmt/lls_utils.py +117 -14
  101. teradataml/table_operators/Script.py +2 -3
  102. teradataml/table_operators/TableOperator.py +58 -10
  103. teradataml/utils/validators.py +40 -2
  104. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +78 -6
  105. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/RECORD +108 -90
  106. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +0 -0
  107. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
  108. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +0 -0
@@ -19,11 +19,11 @@ from teradataml.common.sqlbundle import SQLBundle
19
19
  from teradataml.common.utils import UtilFuncs
20
20
  from teradataml.common.constants import CopyToConstants
21
21
  from teradataml.context.context import get_context, get_connection, \
22
- _get_context_temp_databasename
22
+ _get_context_temp_databasename, _get_current_databasename
23
23
  from teradataml.dataframe import dataframe as tdmldf
24
24
  from teradataml.dataframe.copy_to import copy_to_sql, _create_table_object, \
25
- _get_pd_df_column_names, _extract_column_info, \
26
- _check_columns_insertion_compatible, _get_index_labels
25
+ _get_pd_df_column_names, _extract_column_info, \
26
+ _check_columns_insertion_compatible, _get_index_labels
27
27
  from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
28
28
  from teradataml.dbutils.dbutils import _create_table, _execute_query_and_generate_pandas_df
29
29
  from teradataml.utils.validators import _Validators
@@ -335,7 +335,7 @@ def fastexport(df, export_to="pandas", index_column=None,
335
335
  if not csv_file:
336
336
  raise TeradataMlException(
337
337
  Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING, "csv_file",
338
- "{0}='{1}'".format("export_to","csv")),
338
+ "{0}='{1}'".format("export_to", "csv")),
339
339
  MessageCodes.DEPENDENT_ARG_MISSING)
340
340
 
341
341
  if not csv_file.lower().endswith(".csv"):
@@ -363,7 +363,7 @@ def fastexport(df, export_to="pandas", index_column=None,
363
363
  raise TeradataMlException(
364
364
  Messages.get_message(MessageCodes.DATA_EXPORT_FAILED, "fastexport",
365
365
  export_to, str(err)),
366
- MessageCodes.DATA_EXPORT_FAILED)
366
+ MessageCodes.DATA_EXPORT_FAILED)
367
367
 
368
368
 
369
369
  @collect_queryband(queryband="rdCsv")
@@ -601,7 +601,7 @@ def read_csv(filepath,
601
601
  Specifies whether to persist the errors/warnings(if any) information in Vantage
602
602
  or not.
603
603
  If "save_errors" is set to False:
604
- 1. Errors or warnings (in any) are not persisted into tables.
604
+ 1. Errors or warnings (if any) are not persisted into tables.
605
605
  2. Errors table genarated by FastloadCSV are not persisted.
606
606
  If "save_errors" is set to True:
607
607
  1. The errors or warnings information is persisted and names of error and
@@ -883,7 +883,7 @@ def read_csv(filepath,
883
883
 
884
884
  # Create SQLAlchemy table object from existing table.
885
885
  existing_table = UtilFuncs._get_sqlalchemy_table(table_name,
886
- schema_name=schema_name)
886
+ schema_name=schema_name)
887
887
 
888
888
  # Check compatibility of CSV columns with existing table columns.
889
889
  if types is not None:
@@ -904,7 +904,7 @@ def read_csv(filepath,
904
904
  primary_index=existing_table_primary_index)
905
905
  else:
906
906
  rc_dict = dt_obj._insert_from_csv_without_fastload(table_name=table_name,
907
- column_names=cols_name)
907
+ column_names=cols_name)
908
908
  # Return the read_csv result.
909
909
  return dt_obj._get_result(rc_dict)
910
910
 
@@ -923,6 +923,7 @@ class _DataTransferUtils():
923
923
  Teradata Vantage to outside world, for example Data Transfer using
924
924
  FastExport Protocol.
925
925
  """
926
+
926
927
  def __init__(self, df, index_column=None, num_rows=99999, all_rows=False,
927
928
  catch_errors_warnings=False, table_name=None,
928
929
  schema_name=None, if_exists='append', index=False,
@@ -934,7 +935,9 @@ class _DataTransferUtils():
934
935
  columns_list=None, sequence_column=None, seq_max=None,
935
936
  use_fastload=True, api_name='fastexport',
936
937
  open_sessions=None, chunksize=CopyToConstants.DBAPI_BATCHSIZE.value,
937
- match_column_order=True):
938
+ match_column_order=True, err_tbl_1_suffix=None,
939
+ err_tbl_2_suffix=None, err_tbl_name=None, warn_tbl_name=None,
940
+ err_staging_db=None):
938
941
  """
939
942
  DESCRIPTION:
940
943
  Constructor for the _DataTransferUtils class. It initialises
@@ -1088,6 +1091,35 @@ class _DataTransferUtils():
1088
1091
  Default Value: 16383
1089
1092
  Types: int
1090
1093
 
1094
+ err_tbl_1_suffix:
1095
+ Optional Argument.
1096
+ Specifies the suffix for error table 1 created by fastload job.
1097
+ Types: String
1098
+
1099
+ err_tbl_2_suffix:
1100
+ Optional Argument.
1101
+ Specifies the suffix for error table 2 created by fastload job.
1102
+ Types: String
1103
+
1104
+ err_tbl_name:
1105
+ Optional Argument.
1106
+ Specifies the name for error table.
1107
+ Types: String
1108
+
1109
+ warn_tbl_name:
1110
+ Optional Argument.
1111
+ Specifies the name for warning table.
1112
+ Types: String
1113
+
1114
+ err_staging_db:
1115
+ Optional Argument.
1116
+ Specifies the name of the database to be used for creating staging
1117
+ table and error tables.
1118
+ Note:
1119
+ Current session user must have CREATE, DELETE and INSERT table
1120
+ rights on err_staging_db database.
1121
+ Types: String
1122
+
1091
1123
  PARAMETERS:
1092
1124
  None.
1093
1125
 
@@ -1139,6 +1171,11 @@ class _DataTransferUtils():
1139
1171
  self.open_sessions = open_sessions
1140
1172
  self.chunksize = chunksize
1141
1173
  self.match_column_order = match_column_order
1174
+ self.err_tbl_1_suffix = err_tbl_1_suffix
1175
+ self.err_tbl_2_suffix = err_tbl_2_suffix
1176
+ self.err_tbl_name = err_tbl_name
1177
+ self.warn_tbl_name = warn_tbl_name
1178
+ self.err_staging_db = err_staging_db
1142
1179
 
1143
1180
  # Validate arguments.
1144
1181
  if self.api_name == 'fastexport':
@@ -1243,6 +1280,7 @@ class _DataTransferUtils():
1243
1280
  dt_obj = _DataTransferUtils()
1244
1281
  dt_obj._validate_csv_sep_quotechar()
1245
1282
  """
1283
+
1246
1284
  # Function to validate char value for length and allowed characters.
1247
1285
  def validate_char_arg_csv(arg_name, arg):
1248
1286
 
@@ -1444,7 +1482,7 @@ class _DataTransferUtils():
1444
1482
  csv_file_name=target_csv)
1445
1483
 
1446
1484
  """
1447
- fastexport_esc_func=""
1485
+ fastexport_esc_func = ""
1448
1486
  open_session_esc_func = ""
1449
1487
  if require is not None:
1450
1488
  if require:
@@ -1492,7 +1530,7 @@ class _DataTransferUtils():
1492
1530
  write_csv_escape_func = DriverEscapeFunctions.WRITE_TO_CSV.value.format(csv_file_name)
1493
1531
  field_sep_esc_func = DriverEscapeFunctions.FIELD_SEP.value.format(field_sep)
1494
1532
  field_quote_esc_func = DriverEscapeFunctions.FIELD_QUOTE.value.format(field_quote)
1495
-
1533
+
1496
1534
  query = "{0}{1}{2}{3}{4}{5}".format(fastexport_esc_func,
1497
1535
  open_session_esc_func,
1498
1536
  field_sep_esc_func,
@@ -1639,7 +1677,7 @@ class _DataTransferUtils():
1639
1677
 
1640
1678
  # End of functions specific to exporting table data in Vantage into pandas DataFrame.
1641
1679
 
1642
- # General functions to get warrnings and errors.
1680
+ # General functions to get warrnings and errors.
1643
1681
  def _get_errors_warnings(self, cur, insert_stmt, escape_function):
1644
1682
  """
1645
1683
  Internal function executes teradatasql provided escape functions
@@ -1672,7 +1710,7 @@ class _DataTransferUtils():
1672
1710
  dt_obj._get_errors_warnings(cur, insert_stmt, escape_function)
1673
1711
  """
1674
1712
  errorwarninglist = self._process_escape_functions(cur,
1675
- escape_function= escape_function,
1713
+ escape_function=escape_function,
1676
1714
  insert_query=insert_stmt)
1677
1715
 
1678
1716
  from teradatasql import vernumber
@@ -1714,11 +1752,10 @@ class _DataTransferUtils():
1714
1752
 
1715
1753
  return pd.DataFrame()
1716
1754
 
1717
- def _create_error_warnings_table(self, pdf, msg_type, logon_seq_number):
1755
+ def _create_error_warnings_table(self, pdf, msg_type, logon_seq_number, table_name=None):
1718
1756
  """
1719
1757
  DESCRIPTION:
1720
- Internal function creates the errors and warnings table in Vantage,
1721
- if save_errors is set to True.
1758
+ Internal function creates the errors and warnings table in Vantage.
1722
1759
 
1723
1760
  PARAMETERS:
1724
1761
  pdf:
@@ -1747,12 +1784,13 @@ class _DataTransferUtils():
1747
1784
  dt_obj = _DataTransferUtils(df, table_name, types)
1748
1785
  dt_obj._create_error_warnings_table(pdf, msg_type, logon_seq_number)
1749
1786
  """
1750
- if self.save_errors:
1751
- err_warn_tablename = "td_fl_{0}_{1}_{2}".format(self.table_name, msg_type, logon_seq_number)
1752
- copy_to_sql(pdf, err_warn_tablename, if_exists='replace')
1753
- return err_warn_tablename
1754
-
1755
- return ''
1787
+ if not table_name:
1788
+ table_name = "td_fl_{0}_{1}_{2}".format(self.table_name, msg_type, logon_seq_number)
1789
+ copy_to_sql(pdf, table_name, schema_name=self.err_staging_db,
1790
+ if_exists='replace')
1791
+ return "{}.{}".format(self.err_staging_db if self.err_staging_db
1792
+ else _get_current_databasename(),
1793
+ table_name)
1756
1794
 
1757
1795
  def _process_escape_functions(self, cur, escape_function, insert_query=None):
1758
1796
  """
@@ -1834,16 +1872,23 @@ class _DataTransferUtils():
1834
1872
  ins_query = dt_obj._form_insert_query()
1835
1873
  """
1836
1874
 
1837
- csv_esc_func = ""
1838
- open_sessions_esc_func = ""
1839
- field_sep_esc_func = ""
1840
- field_quote_esc_func = ""
1875
+ escape_funcs = ""
1841
1876
 
1877
+ # Get the fastload escape function.
1878
+ if self.use_fastload:
1879
+ escape_funcs = escape_funcs + DriverEscapeFunctions.REQUIRE_FASTLOAD.value
1880
+
1881
+ # Get the escape function clause for open_sessions.
1882
+ if self.open_sessions is not None:
1883
+ escape_funcs = escape_funcs + DriverEscapeFunctions.OPEN_SESSIONS.value.format(self.open_sessions)
1884
+
1885
+ # Create the list of values to be inserted.
1842
1886
  if self.api_name == "fastload":
1843
1887
  col_names = _get_pd_df_column_names(self.df)
1844
1888
  insert_values = ", ".join(['?' for i in range(len(col_names) + len(self.df.index.names)
1845
1889
  if self.index is True else len(col_names))])
1846
1890
 
1891
+ # Get escape functions related to read_csv.
1847
1892
  if self.api_name == "read_csv":
1848
1893
  # Get the column names.
1849
1894
  if self.if_exists == 'append' and column_names is not None:
@@ -1852,7 +1897,7 @@ class _DataTransferUtils():
1852
1897
  col_names, _ = _extract_column_info(self.df, self.types)
1853
1898
 
1854
1899
  # Get read_csv escape function.
1855
- csv_esc_func = DriverEscapeFunctions.READ_CSV.value.format(self.df)
1900
+ escape_funcs = escape_funcs + DriverEscapeFunctions.READ_CSV.value.format(self.df)
1856
1901
  insert_values = ", ".join(['?' for i in range(len(col_names))])
1857
1902
 
1858
1903
  # Create escape function for sep.
@@ -1861,7 +1906,7 @@ class _DataTransferUtils():
1861
1906
  field_sep = "''''"
1862
1907
  elif self.sep == "\"":
1863
1908
  field_sep = "\"\"\"\""
1864
- field_sep_esc_func = DriverEscapeFunctions.FIELD_SEP.value.format(field_sep)
1909
+ escape_funcs = escape_funcs + DriverEscapeFunctions.FIELD_SEP.value.format(field_sep)
1865
1910
 
1866
1911
  # Create escape function for quotechar.
1867
1912
  field_quote = "'{0}'".format(self.quotechar)
@@ -1869,27 +1914,28 @@ class _DataTransferUtils():
1869
1914
  field_quote = "''''"
1870
1915
  elif self.quotechar == "\"":
1871
1916
  field_quote = "\"\"\"\""
1872
- field_quote_esc_func = DriverEscapeFunctions.FIELD_QUOTE.value.format(field_quote)
1917
+ escape_funcs = escape_funcs + DriverEscapeFunctions.FIELD_QUOTE.value.format(field_quote)
1873
1918
 
1874
1919
  # Create base insert query.
1875
1920
  base_insert_query = "INSERT INTO {0} VALUES ({1});".format(table, insert_values)
1876
1921
 
1877
- # Get the fastload escape function.
1878
- fastload_esc_func = DriverEscapeFunctions.REQUIRE_FASTLOAD.value \
1879
- if self.use_fastload else ""
1922
+ # Get the escape function clauses for error table and DB related escape functions.
1923
+ # TODO: This condition will be optimized with ELE-6743.
1924
+ if self.api_name == "fastload" and self.save_errors and not self.err_tbl_name:
1925
+ escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_TBL_MNG_FLAG.value.format("off")
1880
1926
 
1881
- # Get the escape function clause for open_sessions
1882
- if self.open_sessions is not None:
1883
- open_sessions_esc_func = DriverEscapeFunctions.OPEN_SESSIONS.value.format(self.open_sessions)
1927
+ if self.err_tbl_1_suffix:
1928
+ escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_TBL_1.value.format(self.err_tbl_1_suffix)
1884
1929
 
1885
- query = "{0}{1}{2}{3}{4}{5}".format(fastload_esc_func,
1886
- open_sessions_esc_func,
1887
- field_sep_esc_func,
1888
- field_quote_esc_func,
1889
- csv_esc_func,
1890
- base_insert_query)
1891
- return query
1930
+ if self.err_tbl_2_suffix:
1931
+ escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_TBL_2.value.format(self.err_tbl_2_suffix)
1932
+
1933
+ if self.err_staging_db:
1934
+ escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_STAGING_DB.value.format(self.err_staging_db)
1892
1935
 
1936
+ # Generate final insert query by appending all escape functions.
1937
+ query = "{0}{1}".format(escape_funcs, base_insert_query)
1938
+ return query
1893
1939
 
1894
1940
  def _table_exists(self, con):
1895
1941
  """
@@ -1914,8 +1960,7 @@ class _DataTransferUtils():
1914
1960
  """
1915
1961
  return con.dialect.has_table(get_connection(), self.table_name, self.schema_name)
1916
1962
 
1917
-
1918
- def _get_fully_qualified_table_name(self, table_name=None):
1963
+ def _get_fully_qualified_table_name(self, table_name=None, schema_name=None):
1919
1964
  """
1920
1965
  DESCRIPTION:
1921
1966
  Function returns schema qualified table name
@@ -1929,6 +1974,11 @@ class _DataTransferUtils():
1929
1974
  Specifies the table name.
1930
1975
  Types: str
1931
1976
 
1977
+ schema_name:
1978
+ Optional Argument.
1979
+ Specifies the schema name.
1980
+ Types: str
1981
+
1932
1982
  RETURNS:
1933
1983
  str.
1934
1984
 
@@ -1942,12 +1992,14 @@ class _DataTransferUtils():
1942
1992
  table_name = table_name if table_name else self.table_name
1943
1993
 
1944
1994
  table = '"{}"'.format(table_name)
1945
- if self.schema_name is not None:
1995
+ if schema_name is not None:
1996
+ table = '"{}"."{}"'.format(schema_name, table_name)
1997
+ elif self.schema_name is not None:
1946
1998
  table = '"{}"."{}"'.format(self.schema_name, table_name)
1947
1999
 
1948
2000
  return table
1949
2001
 
1950
- def _create_table(self, con, table_name=None):
2002
+ def _create_table(self, con, table_name=None, schema_name=None):
1951
2003
  """
1952
2004
  DESCRIPTION:
1953
2005
  Internal function creates table in the Vantage.
@@ -1963,6 +2015,11 @@ class _DataTransferUtils():
1963
2015
  Specifies the table name.
1964
2016
  Types: str
1965
2017
 
2018
+ schema_name:
2019
+ Optional Argument.
2020
+ Specifies the schema name where table needs to be created.
2021
+ Types: str
2022
+
1966
2023
  RETURNS:
1967
2024
  None.
1968
2025
 
@@ -1974,9 +2031,9 @@ class _DataTransferUtils():
1974
2031
  dt_obj._create_table(con)
1975
2032
  """
1976
2033
  table_name = table_name if table_name else self.table_name
1977
-
2034
+ schema_name = schema_name if schema_name else self.schema_name
1978
2035
  table = _create_table_object(df=self.df, table_name=table_name, types=self.types, con=con,
1979
- schema_name=self.schema_name, primary_index=self.primary_index,
2036
+ schema_name=schema_name, primary_index=self.primary_index,
1980
2037
  temporary=self.temporary, set_table=self.set_table, index=self.index,
1981
2038
  index_label=self.index_label)
1982
2039
 
@@ -2032,7 +2089,7 @@ class _DataTransferUtils():
2032
2089
 
2033
2090
  # Turn off autocommit before the Fastload insertion.
2034
2091
  self._process_escape_functions(cur, escape_function= \
2035
- DriverEscapeFunctions.AUTOCOMMIT_OFF)
2092
+ DriverEscapeFunctions.AUTOCOMMIT_OFF)
2036
2093
 
2037
2094
  # Initialize dict template for saving error/warning information.
2038
2095
  err_dict = {}
@@ -2052,7 +2109,7 @@ class _DataTransferUtils():
2052
2109
 
2053
2110
  # Get logon sequence number to be used for error/warning table names
2054
2111
  logon_seq_number = self._process_escape_functions(cur, escape_function= \
2055
- DriverEscapeFunctions.LOGON_SEQ_NUM,
2112
+ DriverEscapeFunctions.LOGON_SEQ_NUM,
2056
2113
  insert_query=ins)
2057
2114
 
2058
2115
  # Commit the rows
@@ -2064,16 +2121,16 @@ class _DataTransferUtils():
2064
2121
  if len(warn) != 0:
2065
2122
  warn_dict['error_message'].extend(warn)
2066
2123
 
2067
- # Get error and warning informations for error and warning tables, persist
2124
+ # Get error and warning information for error and warning tables, persist
2068
2125
  # error and warning tables to Vantage if user has specified save_error as True
2069
2126
  # else show it as pandas dataframe on console.
2070
2127
  pd_err_df = self._get_pandas_df_from_errors_warnings(err_dict)
2071
- if not pd_err_df.empty:
2128
+ if not pd_err_df.empty and self.save_errors:
2072
2129
  msg_type = "err"
2073
2130
  error_tablename = self._create_error_warnings_table(pd_err_df, msg_type, logon_seq_number[0][0])
2074
2131
 
2075
2132
  pd_warn_df = self._get_pandas_df_from_errors_warnings(warn_dict)
2076
- if not pd_warn_df.empty:
2133
+ if not pd_warn_df.empty and self.save_errors:
2077
2134
  msg_type = "warn"
2078
2135
  warn_tablename = self._create_error_warnings_table(pd_warn_df, msg_type, logon_seq_number[0][0])
2079
2136
 
@@ -2100,7 +2157,7 @@ class _DataTransferUtils():
2100
2157
  finally:
2101
2158
  # Turn on autocommit.
2102
2159
  self._process_escape_functions(cur, escape_function= \
2103
- DriverEscapeFunctions.AUTOCOMMIT_ON)
2160
+ DriverEscapeFunctions.AUTOCOMMIT_ON)
2104
2161
  cur.close()
2105
2162
 
2106
2163
  def _get_result(self, result_dict=None):
@@ -2262,7 +2319,7 @@ class _DataTransferUtils():
2262
2319
  awu_matrix.append(['quotechar', self.quotechar, True, (str)])
2263
2320
  awu_matrix.append(['catch_errors_warnings', self.catch_errors_warnings, False, (bool)])
2264
2321
  awu_matrix.append(['use_fastload', self.use_fastload, False, (bool)])
2265
- awu_matrix.append(['open_sessions',self.open_sessions, True, (int), False])
2322
+ awu_matrix.append(['open_sessions', self.open_sessions, True, (int), False])
2266
2323
  awu_matrix.append(['chunksize', self.chunksize, False, (int)])
2267
2324
  awu_matrix.append(['match_column_order', self.match_column_order, True, (bool)])
2268
2325
  if isinstance(self.df, pd.DataFrame):
@@ -2307,8 +2364,8 @@ class _DataTransferUtils():
2307
2364
 
2308
2365
  if (is_multi_index and ((isinstance(self.index_label, str) and index_levels != 1) or
2309
2366
  (is_index_list and index_levels != len(self.index_label)))) or \
2310
- (not is_multi_index and is_index_list and
2311
- (is_index_list and num_index > 1)):
2367
+ (not is_multi_index and is_index_list and
2368
+ (is_index_list and num_index > 1)):
2312
2369
  valid_arg_msg = 'String or list of Strings with the number of ' \
2313
2370
  'Strings matching the number of levels' \
2314
2371
  ' in the index'
@@ -2522,16 +2579,18 @@ class _DataTransferUtils():
2522
2579
 
2523
2580
  # Load the data from CSV to staging table.
2524
2581
  rc_dict = self._insert_from_csv_with_fastload(table_name=stag_table_name,
2525
- column_names=column_names)
2582
+ column_names=column_names)
2526
2583
 
2527
2584
  # Insert all rows from staging table to already existing table.
2528
2585
  df_utils._insert_all_from_table(self.table_name,
2529
- self._get_fully_qualified_table_name(stag_table_name),
2586
+ stag_table_name,
2530
2587
  column_names,
2531
- self.schema_name)
2588
+ to_schema_name=self.schema_name,
2589
+ from_schema_name=self.schema_name)
2532
2590
 
2533
2591
  return rc_dict
2534
2592
  finally:
2593
+ # Drop the staging table.
2535
2594
  if stage_table_created:
2536
2595
  UtilFuncs._drop_table(self._get_fully_qualified_table_name(stag_table_name))
2537
2596
 
@@ -2733,7 +2792,7 @@ class _DataTransferUtils():
2733
2792
  # Get open_sessions argument.
2734
2793
  open_sessions = kwargs.pop("open_sessions", None)
2735
2794
  if not require_fastexport and open_sessions is not None:
2736
- raise TeradataMlException("'{0}' can only be used when '{1}' is set to True."\
2795
+ raise TeradataMlException("'{0}' can only be used when '{1}' is set to True." \
2737
2796
  .format("open_sessions", "fastexport or require"),
2738
2797
  MessageCodes.DEPENDENT_ARGUMENT)
2739
2798
 
@@ -24,6 +24,7 @@ import teradataml.context.context as tdmlctx
24
24
  from collections import OrderedDict, namedtuple
25
25
  from sqlalchemy.sql import ClauseElement
26
26
  from teradataml import execute_sql
27
+ from teradataml import GarbageCollector
27
28
  from teradataml.dataframe.sql import _MetaExpression
28
29
  from teradataml.dataframe.sql_interfaces import ColumnExpression
29
30
  from teradataml.dataframe.sql_functions import case
@@ -5017,7 +5018,7 @@ class DataFrame():
5017
5018
  'median', 'var'
5018
5019
 
5019
5020
  Acceptable formats for function(s) are
5020
- string, dictionary or list of strings/functions.
5021
+ string, dictionary, list of strings/functions/ColumnExpression or ColumnExpression.
5021
5022
 
5022
5023
  Accepted combinations are:
5023
5024
  1. String function name
@@ -5025,12 +5026,57 @@ class DataFrame():
5025
5026
  3. Dictionary containing column name as key and
5026
5027
  aggregate function name (string or list of
5027
5028
  strings) as value
5029
+ 4. ColumnExpression built using the aggregate functions.
5030
+ 5. List of ColumnExpression built using the aggregate functions.
5031
+
5032
+ Note:
5033
+ * The name of the output columns are generated based on aggregate functions and column names.
5034
+ For Example,
5035
+ 1. "func" passed as a string.
5036
+ >>> df.agg('mean')
5037
+ Assume that the column names of the dataframe are employee_no, first_name, marks, dob, joined_date.
5038
+ After the above operation, the output column names are:
5039
+ mean_employee_no, mean_marks, mean_dob, mean_joined_date
5040
+
5041
+ 2. "func" passed as a list of string functions.
5042
+ >>> df.agg(['min', 'sum'])
5043
+ Assume that the column names of the dataframe are employee_no, first_name, marks, dob, joined_date.
5044
+ After the above operation, the output column names are:
5045
+ min_employee_no, sum_employee_no, min_first_name, min_marks, sum_marks, min_dob, min_joined_date
5046
+
5047
+ 3. "func" passed as a dictionary containing column name as key and aggregate function name as value.
5048
+ >>> df.agg({'employee_no' : ['min', 'sum', 'var'], 'first_name' : ['min']})
5049
+ Output column names after the above operation are:
5050
+ min_employee_no, sum_employee_no, var_employee_no, min_first_name
5051
+
5052
+ 4. "func" passed as a ColumnExpression built using the aggregate functions.
5053
+ >>> df.agg(df.first_name.count())
5054
+ Output column name after the above operation is:
5055
+ count(first_name)
5056
+
5057
+ 5. "func" passed as a list of ColumnExpression built using the aggregate functions.
5058
+ >>> df.agg([df.employee_no.min(), df.first_name.count()])
5059
+ Output column names after the above operation are:
5060
+ min(employee_no), count(first_name)
5061
+
5062
+ * On ColumnExpression or list of ColumnExpression alias() can be used to
5063
+ return the output columns with aliased name.
5064
+ For Example,
5065
+ >>> df.agg(df.first_name.count().alias("total_names"))
5066
+ Output column name after the above operation is:
5067
+ total_names
5068
+
5069
+ >>> df.agg([df.joined_date.min().alias("min_date"), df.first_name.count().alias("total_names")])
5070
+ Output column names after the above operation are:
5071
+ min_date, total_names
5072
+
5028
5073
 
5029
5074
  RETURNS:
5030
5075
  teradataml DataFrame object with operations
5031
5076
  mentioned in parameter 'func' performed on specified
5032
5077
  columns.
5033
5078
 
5079
+
5034
5080
  RAISES:
5035
5081
  TeradataMLException
5036
5082
  1. TDMLDF_AGGREGATE_FAILED - If operations on given columns
@@ -5072,8 +5118,8 @@ class DataFrame():
5072
5118
  valid datatype.
5073
5119
 
5074
5120
  Possible error message:
5075
- Invalid type(s) passed to argument 'func', should be:"\
5076
- "['str', 'list', 'dict'].
5121
+ Invalid type(s) passed to argument 'func', should be:
5122
+ ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression'].
5077
5123
 
5078
5124
  EXAMPLES :
5079
5125
  # Load the data to run the example.
@@ -5090,21 +5136,49 @@ class DataFrame():
5090
5136
  112 None None None 18/12/05
5091
5137
  >>>
5092
5138
 
5093
- # Dictionary of column names to string function/list of string functions as parameter.
5139
+ # Get the minimum, sum and variance of employee number and minimum and mean of name,
5140
+ # by passing dictionary of column names to string function/list of string functions as parameter.
5094
5141
  >>> df.agg({'employee_no' : ['min', 'sum', 'var'], 'first_name' : ['min', 'mean']})
5095
- min_employee_no sum_employee_no var_employee_no min_first_name
5096
- 0 100 313 44.333333 abcd
5142
+ min_employee_no sum_employee_no var_employee_no min_first_name
5143
+ 0 100 313 44.333333 abcd
5097
5144
 
5098
- # List of string functions as parameter.
5145
+ # Get the minimum and sum of all the columns in the dataframe,
5146
+ # by passing list of string functions as parameter.
5099
5147
  >>> df.agg(['min', 'sum'])
5100
- min_employee_no sum_employee_no min_first_name min_marks sum_marks min_dob min_joined_date
5101
- 0 100 313 abcd None None None 1902-05-12
5148
+ min_employee_no sum_employee_no min_first_name min_marks sum_marks min_dob min_joined_date
5149
+ 0 100 313 abcd None None None 1902-05-12
5102
5150
 
5103
- # A string function as parameter.
5151
+ # Get the mean of all the columns in the dataframe, by passing string function as parameter.
5104
5152
  >>> df.agg('mean')
5105
5153
  mean_employee_no mean_marks mean_dob mean_joined_date
5106
5154
  0 104.333333 None None 60/12/04
5107
5155
 
5156
+ # Get the total names in the dataframe, by running count() on the "first_name"
5157
+ # and passing ColumnExpression as parameter.
5158
+ >>> df.agg(df.first_name.count())
5159
+ count(first_name)
5160
+ 0 2
5161
+
5162
+ # Get the minimum of joining date and total of names in the dataframe,
5163
+ # by running min() on joined_date and count() on the "first_name"
5164
+ # and passing list of ColumnExpression as parameter.
5165
+ >>> df.agg([df.employee_no.min(), df.first_name.count()])
5166
+ min(employee_no) count(first_name)
5167
+ 0 100 2
5168
+
5169
+ # Get the total names in the dataframe, by running count() on the "first_name" and
5170
+ # use alias() to have the output column named as "total_names".
5171
+ >>> df.agg(df.first_name.count().alias("total_names"))
5172
+ total_names
5173
+ 0 2
5174
+
5175
+ # Get the minimum of joining date and total names in the dataframe,
5176
+ # by running min() on joined_date and count() on the "first_name" and
5177
+ # use alias() to have the output column named as "min_date" and "total_names".
5178
+ >>> df.agg([df.joined_date.min().alias("min_date"), df.first_name.count().alias("total_names")])
5179
+ min_date total_names
5180
+ 0 02/12/05 2
5181
+
5108
5182
  # Select only subset of columns from the DataFrame.
5109
5183
  >>> df1 = df.select(['employee_no', 'first_name', 'joined_date'])
5110
5184
 
@@ -5145,9 +5219,9 @@ class DataFrame():
5145
5219
  raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS, "func"),
5146
5220
  MessageCodes.MISSING_ARGS)
5147
5221
 
5148
- if not isinstance(func, str) and not isinstance(func, list) and not isinstance(func, dict):
5222
+ if not isinstance(func, (str, list, dict, ColumnExpression)):
5149
5223
  raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
5150
- 'func', ['str', 'list', 'dict']),
5224
+ 'func', ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression']),
5151
5225
  MessageCodes.UNSUPPORTED_DATATYPE)
5152
5226
 
5153
5227
  return self._get_dataframe_aggregate(func)
@@ -5169,6 +5243,8 @@ class DataFrame():
5169
5243
  3. Dictionary containing column name as key and
5170
5244
  aggregate function name (string or list of
5171
5245
  strings) as value
5246
+ 4. ColumnExpression built using the aggregate functions.
5247
+ 5. List of ColumnExpression built using the aggregate functions.
5172
5248
 
5173
5249
  **kwargs: Keyword arguments. Mainly used for Time Series Aggragates.
5174
5250
 
@@ -10013,9 +10089,10 @@ class DataFrame():
10013
10089
  case_when_then = {}
10014
10090
  list_of_fracs = frac
10015
10091
 
10016
- # When stratify column is passed for sample then perform TrainTestSplit
10017
- # for data sampling.
10018
- if stratify_column is not None:
10092
+ # When stratify column is passed for sample or when seed is passed for
10093
+ # reproducibilty of result then
10094
+ # perform TrainTestSplit for data sampling.
10095
+ if stratify_column is not None or seed is not None:
10019
10096
  # Local import TrainTestSplit function.
10020
10097
  from teradataml.analytics.sqle import TrainTestSplit
10021
10098
 
@@ -10029,7 +10106,15 @@ class DataFrame():
10029
10106
  train_size=list_of_fracs[0],
10030
10107
  test_size=list_of_fracs[1],
10031
10108
  stratify_column=stratify_column,
10032
- seed=seed)
10109
+ seed=seed,
10110
+ persist=True)
10111
+
10112
+ # Retrieve the table name from TrainTestSplit_out object.
10113
+ table_name = TrainTestSplit_out.result._table_name
10114
+
10115
+ # Add the table to garbage collector.
10116
+ table_added = GarbageCollector._add_to_garbagecollector(table_name)
10117
+
10033
10118
  # Retrieve the sampled result and updated the column name and values
10034
10119
  # for backward compatibility.
10035
10120
  _sampled_df = TrainTestSplit_out.result
@@ -16626,7 +16711,7 @@ class _TDUAF(DataFrame):
16626
16711
  # UAF Functions do not accept double quotes.
16627
16712
  db_name = UtilFuncs._extract_db_name(table_name)
16628
16713
  if db_name:
16629
- table_name = "{}.{}".format(db_name, UtilFuncs._extract_table_name(table_name))
16714
+ table_name = '"{}"."{}"'.format(db_name, UtilFuncs._extract_table_name(table_name))
16630
16715
  else:
16631
16716
  table_name = UtilFuncs._extract_table_name(table_name)
16632
16717