teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +71 -0
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +51 -24
- teradataml/analytics/json_parser/utils.py +11 -17
- teradataml/automl/__init__.py +103 -48
- teradataml/automl/data_preparation.py +55 -37
- teradataml/automl/data_transformation.py +131 -69
- teradataml/automl/feature_engineering.py +117 -185
- teradataml/automl/feature_exploration.py +9 -2
- teradataml/automl/model_evaluation.py +13 -25
- teradataml/automl/model_training.py +214 -75
- teradataml/catalog/model_cataloging_utils.py +1 -1
- teradataml/clients/auth_client.py +133 -0
- teradataml/common/aed_utils.py +3 -2
- teradataml/common/constants.py +11 -6
- teradataml/common/garbagecollector.py +5 -0
- teradataml/common/messagecodes.py +3 -1
- teradataml/common/messages.py +2 -1
- teradataml/common/utils.py +6 -0
- teradataml/context/context.py +49 -29
- teradataml/data/advertising.csv +201 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
- teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
- teradataml/data/glm_example.json +28 -1
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
- teradataml/data/kmeans_example.json +5 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +29 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +52 -1
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scripts/deploy_script.py +20 -1
- teradataml/data/scripts/sklearn/sklearn_fit.py +23 -27
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +20 -28
- teradataml/data/scripts/sklearn/sklearn_function.template +13 -18
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +18 -27
- teradataml/data/scripts/sklearn/sklearn_score.py +20 -29
- teradataml/data/scripts/sklearn/sklearn_transform.py +30 -38
- teradataml/data/teradataml_example.json +77 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +120 -61
- teradataml/dataframe/dataframe.py +102 -17
- teradataml/dataframe/dataframe_utils.py +47 -9
- teradataml/dataframe/fastload.py +272 -89
- teradataml/dataframe/sql.py +84 -0
- teradataml/dbutils/dbutils.py +2 -2
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +102 -55
- teradataml/options/__init__.py +13 -4
- teradataml/options/configure.py +27 -6
- teradataml/scriptmgmt/UserEnv.py +19 -16
- teradataml/scriptmgmt/lls_utils.py +117 -14
- teradataml/table_operators/Script.py +2 -3
- teradataml/table_operators/TableOperator.py +58 -10
- teradataml/utils/validators.py +40 -2
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +78 -6
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/RECORD +108 -90
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +0 -0
|
@@ -19,11 +19,11 @@ from teradataml.common.sqlbundle import SQLBundle
|
|
|
19
19
|
from teradataml.common.utils import UtilFuncs
|
|
20
20
|
from teradataml.common.constants import CopyToConstants
|
|
21
21
|
from teradataml.context.context import get_context, get_connection, \
|
|
22
|
-
|
|
22
|
+
_get_context_temp_databasename, _get_current_databasename
|
|
23
23
|
from teradataml.dataframe import dataframe as tdmldf
|
|
24
24
|
from teradataml.dataframe.copy_to import copy_to_sql, _create_table_object, \
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
_get_pd_df_column_names, _extract_column_info, \
|
|
26
|
+
_check_columns_insertion_compatible, _get_index_labels
|
|
27
27
|
from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
|
|
28
28
|
from teradataml.dbutils.dbutils import _create_table, _execute_query_and_generate_pandas_df
|
|
29
29
|
from teradataml.utils.validators import _Validators
|
|
@@ -335,7 +335,7 @@ def fastexport(df, export_to="pandas", index_column=None,
|
|
|
335
335
|
if not csv_file:
|
|
336
336
|
raise TeradataMlException(
|
|
337
337
|
Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING, "csv_file",
|
|
338
|
-
"{0}='{1}'".format("export_to","csv")),
|
|
338
|
+
"{0}='{1}'".format("export_to", "csv")),
|
|
339
339
|
MessageCodes.DEPENDENT_ARG_MISSING)
|
|
340
340
|
|
|
341
341
|
if not csv_file.lower().endswith(".csv"):
|
|
@@ -363,7 +363,7 @@ def fastexport(df, export_to="pandas", index_column=None,
|
|
|
363
363
|
raise TeradataMlException(
|
|
364
364
|
Messages.get_message(MessageCodes.DATA_EXPORT_FAILED, "fastexport",
|
|
365
365
|
export_to, str(err)),
|
|
366
|
-
|
|
366
|
+
MessageCodes.DATA_EXPORT_FAILED)
|
|
367
367
|
|
|
368
368
|
|
|
369
369
|
@collect_queryband(queryband="rdCsv")
|
|
@@ -601,7 +601,7 @@ def read_csv(filepath,
|
|
|
601
601
|
Specifies whether to persist the errors/warnings(if any) information in Vantage
|
|
602
602
|
or not.
|
|
603
603
|
If "save_errors" is set to False:
|
|
604
|
-
1. Errors or warnings (
|
|
604
|
+
1. Errors or warnings (if any) are not persisted into tables.
|
|
605
605
|
2. Errors table genarated by FastloadCSV are not persisted.
|
|
606
606
|
If "save_errors" is set to True:
|
|
607
607
|
1. The errors or warnings information is persisted and names of error and
|
|
@@ -883,7 +883,7 @@ def read_csv(filepath,
|
|
|
883
883
|
|
|
884
884
|
# Create SQLAlchemy table object from existing table.
|
|
885
885
|
existing_table = UtilFuncs._get_sqlalchemy_table(table_name,
|
|
886
|
-
|
|
886
|
+
schema_name=schema_name)
|
|
887
887
|
|
|
888
888
|
# Check compatibility of CSV columns with existing table columns.
|
|
889
889
|
if types is not None:
|
|
@@ -904,7 +904,7 @@ def read_csv(filepath,
|
|
|
904
904
|
primary_index=existing_table_primary_index)
|
|
905
905
|
else:
|
|
906
906
|
rc_dict = dt_obj._insert_from_csv_without_fastload(table_name=table_name,
|
|
907
|
-
|
|
907
|
+
column_names=cols_name)
|
|
908
908
|
# Return the read_csv result.
|
|
909
909
|
return dt_obj._get_result(rc_dict)
|
|
910
910
|
|
|
@@ -923,6 +923,7 @@ class _DataTransferUtils():
|
|
|
923
923
|
Teradata Vantage to outside world, for example Data Transfer using
|
|
924
924
|
FastExport Protocol.
|
|
925
925
|
"""
|
|
926
|
+
|
|
926
927
|
def __init__(self, df, index_column=None, num_rows=99999, all_rows=False,
|
|
927
928
|
catch_errors_warnings=False, table_name=None,
|
|
928
929
|
schema_name=None, if_exists='append', index=False,
|
|
@@ -934,7 +935,9 @@ class _DataTransferUtils():
|
|
|
934
935
|
columns_list=None, sequence_column=None, seq_max=None,
|
|
935
936
|
use_fastload=True, api_name='fastexport',
|
|
936
937
|
open_sessions=None, chunksize=CopyToConstants.DBAPI_BATCHSIZE.value,
|
|
937
|
-
match_column_order=True
|
|
938
|
+
match_column_order=True, err_tbl_1_suffix=None,
|
|
939
|
+
err_tbl_2_suffix=None, err_tbl_name=None, warn_tbl_name=None,
|
|
940
|
+
err_staging_db=None):
|
|
938
941
|
"""
|
|
939
942
|
DESCRIPTION:
|
|
940
943
|
Constructor for the _DataTransferUtils class. It initialises
|
|
@@ -1088,6 +1091,35 @@ class _DataTransferUtils():
|
|
|
1088
1091
|
Default Value: 16383
|
|
1089
1092
|
Types: int
|
|
1090
1093
|
|
|
1094
|
+
err_tbl_1_suffix:
|
|
1095
|
+
Optional Argument.
|
|
1096
|
+
Specifies the suffix for error table 1 created by fastload job.
|
|
1097
|
+
Types: String
|
|
1098
|
+
|
|
1099
|
+
err_tbl_2_suffix:
|
|
1100
|
+
Optional Argument.
|
|
1101
|
+
Specifies the suffix for error table 2 created by fastload job.
|
|
1102
|
+
Types: String
|
|
1103
|
+
|
|
1104
|
+
err_tbl_name:
|
|
1105
|
+
Optional Argument.
|
|
1106
|
+
Specifies the name for error table.
|
|
1107
|
+
Types: String
|
|
1108
|
+
|
|
1109
|
+
warn_tbl_name:
|
|
1110
|
+
Optional Argument.
|
|
1111
|
+
Specifies the name for warning table.
|
|
1112
|
+
Types: String
|
|
1113
|
+
|
|
1114
|
+
err_staging_db:
|
|
1115
|
+
Optional Argument.
|
|
1116
|
+
Specifies the name of the database to be used for creating staging
|
|
1117
|
+
table and error tables.
|
|
1118
|
+
Note:
|
|
1119
|
+
Current session user must have CREATE, DELETE and INSERT table
|
|
1120
|
+
rights on err_staging_db database.
|
|
1121
|
+
Types: String
|
|
1122
|
+
|
|
1091
1123
|
PARAMETERS:
|
|
1092
1124
|
None.
|
|
1093
1125
|
|
|
@@ -1139,6 +1171,11 @@ class _DataTransferUtils():
|
|
|
1139
1171
|
self.open_sessions = open_sessions
|
|
1140
1172
|
self.chunksize = chunksize
|
|
1141
1173
|
self.match_column_order = match_column_order
|
|
1174
|
+
self.err_tbl_1_suffix = err_tbl_1_suffix
|
|
1175
|
+
self.err_tbl_2_suffix = err_tbl_2_suffix
|
|
1176
|
+
self.err_tbl_name = err_tbl_name
|
|
1177
|
+
self.warn_tbl_name = warn_tbl_name
|
|
1178
|
+
self.err_staging_db = err_staging_db
|
|
1142
1179
|
|
|
1143
1180
|
# Validate arguments.
|
|
1144
1181
|
if self.api_name == 'fastexport':
|
|
@@ -1243,6 +1280,7 @@ class _DataTransferUtils():
|
|
|
1243
1280
|
dt_obj = _DataTransferUtils()
|
|
1244
1281
|
dt_obj._validate_csv_sep_quotechar()
|
|
1245
1282
|
"""
|
|
1283
|
+
|
|
1246
1284
|
# Function to validate char value for length and allowed characters.
|
|
1247
1285
|
def validate_char_arg_csv(arg_name, arg):
|
|
1248
1286
|
|
|
@@ -1444,7 +1482,7 @@ class _DataTransferUtils():
|
|
|
1444
1482
|
csv_file_name=target_csv)
|
|
1445
1483
|
|
|
1446
1484
|
"""
|
|
1447
|
-
fastexport_esc_func=""
|
|
1485
|
+
fastexport_esc_func = ""
|
|
1448
1486
|
open_session_esc_func = ""
|
|
1449
1487
|
if require is not None:
|
|
1450
1488
|
if require:
|
|
@@ -1492,7 +1530,7 @@ class _DataTransferUtils():
|
|
|
1492
1530
|
write_csv_escape_func = DriverEscapeFunctions.WRITE_TO_CSV.value.format(csv_file_name)
|
|
1493
1531
|
field_sep_esc_func = DriverEscapeFunctions.FIELD_SEP.value.format(field_sep)
|
|
1494
1532
|
field_quote_esc_func = DriverEscapeFunctions.FIELD_QUOTE.value.format(field_quote)
|
|
1495
|
-
|
|
1533
|
+
|
|
1496
1534
|
query = "{0}{1}{2}{3}{4}{5}".format(fastexport_esc_func,
|
|
1497
1535
|
open_session_esc_func,
|
|
1498
1536
|
field_sep_esc_func,
|
|
@@ -1639,7 +1677,7 @@ class _DataTransferUtils():
|
|
|
1639
1677
|
|
|
1640
1678
|
# End of functions specific to exporting table data in Vantage into pandas DataFrame.
|
|
1641
1679
|
|
|
1642
|
-
|
|
1680
|
+
# General functions to get warrnings and errors.
|
|
1643
1681
|
def _get_errors_warnings(self, cur, insert_stmt, escape_function):
|
|
1644
1682
|
"""
|
|
1645
1683
|
Internal function executes teradatasql provided escape functions
|
|
@@ -1672,7 +1710,7 @@ class _DataTransferUtils():
|
|
|
1672
1710
|
dt_obj._get_errors_warnings(cur, insert_stmt, escape_function)
|
|
1673
1711
|
"""
|
|
1674
1712
|
errorwarninglist = self._process_escape_functions(cur,
|
|
1675
|
-
escape_function=
|
|
1713
|
+
escape_function=escape_function,
|
|
1676
1714
|
insert_query=insert_stmt)
|
|
1677
1715
|
|
|
1678
1716
|
from teradatasql import vernumber
|
|
@@ -1714,11 +1752,10 @@ class _DataTransferUtils():
|
|
|
1714
1752
|
|
|
1715
1753
|
return pd.DataFrame()
|
|
1716
1754
|
|
|
1717
|
-
def _create_error_warnings_table(self, pdf, msg_type, logon_seq_number):
|
|
1755
|
+
def _create_error_warnings_table(self, pdf, msg_type, logon_seq_number, table_name=None):
|
|
1718
1756
|
"""
|
|
1719
1757
|
DESCRIPTION:
|
|
1720
|
-
Internal function creates the errors and warnings table in Vantage
|
|
1721
|
-
if save_errors is set to True.
|
|
1758
|
+
Internal function creates the errors and warnings table in Vantage.
|
|
1722
1759
|
|
|
1723
1760
|
PARAMETERS:
|
|
1724
1761
|
pdf:
|
|
@@ -1747,12 +1784,13 @@ class _DataTransferUtils():
|
|
|
1747
1784
|
dt_obj = _DataTransferUtils(df, table_name, types)
|
|
1748
1785
|
dt_obj._create_error_warnings_table(pdf, msg_type, logon_seq_number)
|
|
1749
1786
|
"""
|
|
1750
|
-
if
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1787
|
+
if not table_name:
|
|
1788
|
+
table_name = "td_fl_{0}_{1}_{2}".format(self.table_name, msg_type, logon_seq_number)
|
|
1789
|
+
copy_to_sql(pdf, table_name, schema_name=self.err_staging_db,
|
|
1790
|
+
if_exists='replace')
|
|
1791
|
+
return "{}.{}".format(self.err_staging_db if self.err_staging_db
|
|
1792
|
+
else _get_current_databasename(),
|
|
1793
|
+
table_name)
|
|
1756
1794
|
|
|
1757
1795
|
def _process_escape_functions(self, cur, escape_function, insert_query=None):
|
|
1758
1796
|
"""
|
|
@@ -1834,16 +1872,23 @@ class _DataTransferUtils():
|
|
|
1834
1872
|
ins_query = dt_obj._form_insert_query()
|
|
1835
1873
|
"""
|
|
1836
1874
|
|
|
1837
|
-
|
|
1838
|
-
open_sessions_esc_func = ""
|
|
1839
|
-
field_sep_esc_func = ""
|
|
1840
|
-
field_quote_esc_func = ""
|
|
1875
|
+
escape_funcs = ""
|
|
1841
1876
|
|
|
1877
|
+
# Get the fastload escape function.
|
|
1878
|
+
if self.use_fastload:
|
|
1879
|
+
escape_funcs = escape_funcs + DriverEscapeFunctions.REQUIRE_FASTLOAD.value
|
|
1880
|
+
|
|
1881
|
+
# Get the escape function clause for open_sessions.
|
|
1882
|
+
if self.open_sessions is not None:
|
|
1883
|
+
escape_funcs = escape_funcs + DriverEscapeFunctions.OPEN_SESSIONS.value.format(self.open_sessions)
|
|
1884
|
+
|
|
1885
|
+
# Create the list of values to be inserted.
|
|
1842
1886
|
if self.api_name == "fastload":
|
|
1843
1887
|
col_names = _get_pd_df_column_names(self.df)
|
|
1844
1888
|
insert_values = ", ".join(['?' for i in range(len(col_names) + len(self.df.index.names)
|
|
1845
1889
|
if self.index is True else len(col_names))])
|
|
1846
1890
|
|
|
1891
|
+
# Get escape functions related to read_csv.
|
|
1847
1892
|
if self.api_name == "read_csv":
|
|
1848
1893
|
# Get the column names.
|
|
1849
1894
|
if self.if_exists == 'append' and column_names is not None:
|
|
@@ -1852,7 +1897,7 @@ class _DataTransferUtils():
|
|
|
1852
1897
|
col_names, _ = _extract_column_info(self.df, self.types)
|
|
1853
1898
|
|
|
1854
1899
|
# Get read_csv escape function.
|
|
1855
|
-
|
|
1900
|
+
escape_funcs = escape_funcs + DriverEscapeFunctions.READ_CSV.value.format(self.df)
|
|
1856
1901
|
insert_values = ", ".join(['?' for i in range(len(col_names))])
|
|
1857
1902
|
|
|
1858
1903
|
# Create escape function for sep.
|
|
@@ -1861,7 +1906,7 @@ class _DataTransferUtils():
|
|
|
1861
1906
|
field_sep = "''''"
|
|
1862
1907
|
elif self.sep == "\"":
|
|
1863
1908
|
field_sep = "\"\"\"\""
|
|
1864
|
-
|
|
1909
|
+
escape_funcs = escape_funcs + DriverEscapeFunctions.FIELD_SEP.value.format(field_sep)
|
|
1865
1910
|
|
|
1866
1911
|
# Create escape function for quotechar.
|
|
1867
1912
|
field_quote = "'{0}'".format(self.quotechar)
|
|
@@ -1869,27 +1914,28 @@ class _DataTransferUtils():
|
|
|
1869
1914
|
field_quote = "''''"
|
|
1870
1915
|
elif self.quotechar == "\"":
|
|
1871
1916
|
field_quote = "\"\"\"\""
|
|
1872
|
-
|
|
1917
|
+
escape_funcs = escape_funcs + DriverEscapeFunctions.FIELD_QUOTE.value.format(field_quote)
|
|
1873
1918
|
|
|
1874
1919
|
# Create base insert query.
|
|
1875
1920
|
base_insert_query = "INSERT INTO {0} VALUES ({1});".format(table, insert_values)
|
|
1876
1921
|
|
|
1877
|
-
# Get the
|
|
1878
|
-
|
|
1879
|
-
|
|
1922
|
+
# Get the escape function clauses for error table and DB related escape functions.
|
|
1923
|
+
# TODO: This condition will be optimized with ELE-6743.
|
|
1924
|
+
if self.api_name == "fastload" and self.save_errors and not self.err_tbl_name:
|
|
1925
|
+
escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_TBL_MNG_FLAG.value.format("off")
|
|
1880
1926
|
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
open_sessions_esc_func = DriverEscapeFunctions.OPEN_SESSIONS.value.format(self.open_sessions)
|
|
1927
|
+
if self.err_tbl_1_suffix:
|
|
1928
|
+
escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_TBL_1.value.format(self.err_tbl_1_suffix)
|
|
1884
1929
|
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
base_insert_query)
|
|
1891
|
-
return query
|
|
1930
|
+
if self.err_tbl_2_suffix:
|
|
1931
|
+
escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_TBL_2.value.format(self.err_tbl_2_suffix)
|
|
1932
|
+
|
|
1933
|
+
if self.err_staging_db:
|
|
1934
|
+
escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_STAGING_DB.value.format(self.err_staging_db)
|
|
1892
1935
|
|
|
1936
|
+
# Generate final insert query by appending all escape functions.
|
|
1937
|
+
query = "{0}{1}".format(escape_funcs, base_insert_query)
|
|
1938
|
+
return query
|
|
1893
1939
|
|
|
1894
1940
|
def _table_exists(self, con):
|
|
1895
1941
|
"""
|
|
@@ -1914,8 +1960,7 @@ class _DataTransferUtils():
|
|
|
1914
1960
|
"""
|
|
1915
1961
|
return con.dialect.has_table(get_connection(), self.table_name, self.schema_name)
|
|
1916
1962
|
|
|
1917
|
-
|
|
1918
|
-
def _get_fully_qualified_table_name(self, table_name=None):
|
|
1963
|
+
def _get_fully_qualified_table_name(self, table_name=None, schema_name=None):
|
|
1919
1964
|
"""
|
|
1920
1965
|
DESCRIPTION:
|
|
1921
1966
|
Function returns schema qualified table name
|
|
@@ -1929,6 +1974,11 @@ class _DataTransferUtils():
|
|
|
1929
1974
|
Specifies the table name.
|
|
1930
1975
|
Types: str
|
|
1931
1976
|
|
|
1977
|
+
schema_name:
|
|
1978
|
+
Optional Argument.
|
|
1979
|
+
Specifies the schema name.
|
|
1980
|
+
Types: str
|
|
1981
|
+
|
|
1932
1982
|
RETURNS:
|
|
1933
1983
|
str.
|
|
1934
1984
|
|
|
@@ -1942,12 +1992,14 @@ class _DataTransferUtils():
|
|
|
1942
1992
|
table_name = table_name if table_name else self.table_name
|
|
1943
1993
|
|
|
1944
1994
|
table = '"{}"'.format(table_name)
|
|
1945
|
-
if
|
|
1995
|
+
if schema_name is not None:
|
|
1996
|
+
table = '"{}"."{}"'.format(schema_name, table_name)
|
|
1997
|
+
elif self.schema_name is not None:
|
|
1946
1998
|
table = '"{}"."{}"'.format(self.schema_name, table_name)
|
|
1947
1999
|
|
|
1948
2000
|
return table
|
|
1949
2001
|
|
|
1950
|
-
def _create_table(self, con, table_name=None):
|
|
2002
|
+
def _create_table(self, con, table_name=None, schema_name=None):
|
|
1951
2003
|
"""
|
|
1952
2004
|
DESCRIPTION:
|
|
1953
2005
|
Internal function creates table in the Vantage.
|
|
@@ -1963,6 +2015,11 @@ class _DataTransferUtils():
|
|
|
1963
2015
|
Specifies the table name.
|
|
1964
2016
|
Types: str
|
|
1965
2017
|
|
|
2018
|
+
schema_name:
|
|
2019
|
+
Optional Argument.
|
|
2020
|
+
Specifies the schema name where table needs to be created.
|
|
2021
|
+
Types: str
|
|
2022
|
+
|
|
1966
2023
|
RETURNS:
|
|
1967
2024
|
None.
|
|
1968
2025
|
|
|
@@ -1974,9 +2031,9 @@ class _DataTransferUtils():
|
|
|
1974
2031
|
dt_obj._create_table(con)
|
|
1975
2032
|
"""
|
|
1976
2033
|
table_name = table_name if table_name else self.table_name
|
|
1977
|
-
|
|
2034
|
+
schema_name = schema_name if schema_name else self.schema_name
|
|
1978
2035
|
table = _create_table_object(df=self.df, table_name=table_name, types=self.types, con=con,
|
|
1979
|
-
schema_name=
|
|
2036
|
+
schema_name=schema_name, primary_index=self.primary_index,
|
|
1980
2037
|
temporary=self.temporary, set_table=self.set_table, index=self.index,
|
|
1981
2038
|
index_label=self.index_label)
|
|
1982
2039
|
|
|
@@ -2032,7 +2089,7 @@ class _DataTransferUtils():
|
|
|
2032
2089
|
|
|
2033
2090
|
# Turn off autocommit before the Fastload insertion.
|
|
2034
2091
|
self._process_escape_functions(cur, escape_function= \
|
|
2035
|
-
|
|
2092
|
+
DriverEscapeFunctions.AUTOCOMMIT_OFF)
|
|
2036
2093
|
|
|
2037
2094
|
# Initialize dict template for saving error/warning information.
|
|
2038
2095
|
err_dict = {}
|
|
@@ -2052,7 +2109,7 @@ class _DataTransferUtils():
|
|
|
2052
2109
|
|
|
2053
2110
|
# Get logon sequence number to be used for error/warning table names
|
|
2054
2111
|
logon_seq_number = self._process_escape_functions(cur, escape_function= \
|
|
2055
|
-
|
|
2112
|
+
DriverEscapeFunctions.LOGON_SEQ_NUM,
|
|
2056
2113
|
insert_query=ins)
|
|
2057
2114
|
|
|
2058
2115
|
# Commit the rows
|
|
@@ -2064,16 +2121,16 @@ class _DataTransferUtils():
|
|
|
2064
2121
|
if len(warn) != 0:
|
|
2065
2122
|
warn_dict['error_message'].extend(warn)
|
|
2066
2123
|
|
|
2067
|
-
# Get error and warning
|
|
2124
|
+
# Get error and warning information for error and warning tables, persist
|
|
2068
2125
|
# error and warning tables to Vantage if user has specified save_error as True
|
|
2069
2126
|
# else show it as pandas dataframe on console.
|
|
2070
2127
|
pd_err_df = self._get_pandas_df_from_errors_warnings(err_dict)
|
|
2071
|
-
if not pd_err_df.empty:
|
|
2128
|
+
if not pd_err_df.empty and self.save_errors:
|
|
2072
2129
|
msg_type = "err"
|
|
2073
2130
|
error_tablename = self._create_error_warnings_table(pd_err_df, msg_type, logon_seq_number[0][0])
|
|
2074
2131
|
|
|
2075
2132
|
pd_warn_df = self._get_pandas_df_from_errors_warnings(warn_dict)
|
|
2076
|
-
if not pd_warn_df.empty:
|
|
2133
|
+
if not pd_warn_df.empty and self.save_errors:
|
|
2077
2134
|
msg_type = "warn"
|
|
2078
2135
|
warn_tablename = self._create_error_warnings_table(pd_warn_df, msg_type, logon_seq_number[0][0])
|
|
2079
2136
|
|
|
@@ -2100,7 +2157,7 @@ class _DataTransferUtils():
|
|
|
2100
2157
|
finally:
|
|
2101
2158
|
# Turn on autocommit.
|
|
2102
2159
|
self._process_escape_functions(cur, escape_function= \
|
|
2103
|
-
|
|
2160
|
+
DriverEscapeFunctions.AUTOCOMMIT_ON)
|
|
2104
2161
|
cur.close()
|
|
2105
2162
|
|
|
2106
2163
|
def _get_result(self, result_dict=None):
|
|
@@ -2262,7 +2319,7 @@ class _DataTransferUtils():
|
|
|
2262
2319
|
awu_matrix.append(['quotechar', self.quotechar, True, (str)])
|
|
2263
2320
|
awu_matrix.append(['catch_errors_warnings', self.catch_errors_warnings, False, (bool)])
|
|
2264
2321
|
awu_matrix.append(['use_fastload', self.use_fastload, False, (bool)])
|
|
2265
|
-
awu_matrix.append(['open_sessions',self.open_sessions, True, (int), False])
|
|
2322
|
+
awu_matrix.append(['open_sessions', self.open_sessions, True, (int), False])
|
|
2266
2323
|
awu_matrix.append(['chunksize', self.chunksize, False, (int)])
|
|
2267
2324
|
awu_matrix.append(['match_column_order', self.match_column_order, True, (bool)])
|
|
2268
2325
|
if isinstance(self.df, pd.DataFrame):
|
|
@@ -2307,8 +2364,8 @@ class _DataTransferUtils():
|
|
|
2307
2364
|
|
|
2308
2365
|
if (is_multi_index and ((isinstance(self.index_label, str) and index_levels != 1) or
|
|
2309
2366
|
(is_index_list and index_levels != len(self.index_label)))) or \
|
|
2310
|
-
|
|
2311
|
-
|
|
2367
|
+
(not is_multi_index and is_index_list and
|
|
2368
|
+
(is_index_list and num_index > 1)):
|
|
2312
2369
|
valid_arg_msg = 'String or list of Strings with the number of ' \
|
|
2313
2370
|
'Strings matching the number of levels' \
|
|
2314
2371
|
' in the index'
|
|
@@ -2522,16 +2579,18 @@ class _DataTransferUtils():
|
|
|
2522
2579
|
|
|
2523
2580
|
# Load the data from CSV to staging table.
|
|
2524
2581
|
rc_dict = self._insert_from_csv_with_fastload(table_name=stag_table_name,
|
|
2525
|
-
|
|
2582
|
+
column_names=column_names)
|
|
2526
2583
|
|
|
2527
2584
|
# Insert all rows from staging table to already existing table.
|
|
2528
2585
|
df_utils._insert_all_from_table(self.table_name,
|
|
2529
|
-
|
|
2586
|
+
stag_table_name,
|
|
2530
2587
|
column_names,
|
|
2531
|
-
self.schema_name
|
|
2588
|
+
to_schema_name=self.schema_name,
|
|
2589
|
+
from_schema_name=self.schema_name)
|
|
2532
2590
|
|
|
2533
2591
|
return rc_dict
|
|
2534
2592
|
finally:
|
|
2593
|
+
# Drop the staging table.
|
|
2535
2594
|
if stage_table_created:
|
|
2536
2595
|
UtilFuncs._drop_table(self._get_fully_qualified_table_name(stag_table_name))
|
|
2537
2596
|
|
|
@@ -2733,7 +2792,7 @@ class _DataTransferUtils():
|
|
|
2733
2792
|
# Get open_sessions argument.
|
|
2734
2793
|
open_sessions = kwargs.pop("open_sessions", None)
|
|
2735
2794
|
if not require_fastexport and open_sessions is not None:
|
|
2736
|
-
raise TeradataMlException("'{0}' can only be used when '{1}' is set to True."\
|
|
2795
|
+
raise TeradataMlException("'{0}' can only be used when '{1}' is set to True." \
|
|
2737
2796
|
.format("open_sessions", "fastexport or require"),
|
|
2738
2797
|
MessageCodes.DEPENDENT_ARGUMENT)
|
|
2739
2798
|
|
|
@@ -24,6 +24,7 @@ import teradataml.context.context as tdmlctx
|
|
|
24
24
|
from collections import OrderedDict, namedtuple
|
|
25
25
|
from sqlalchemy.sql import ClauseElement
|
|
26
26
|
from teradataml import execute_sql
|
|
27
|
+
from teradataml import GarbageCollector
|
|
27
28
|
from teradataml.dataframe.sql import _MetaExpression
|
|
28
29
|
from teradataml.dataframe.sql_interfaces import ColumnExpression
|
|
29
30
|
from teradataml.dataframe.sql_functions import case
|
|
@@ -5017,7 +5018,7 @@ class DataFrame():
|
|
|
5017
5018
|
'median', 'var'
|
|
5018
5019
|
|
|
5019
5020
|
Acceptable formats for function(s) are
|
|
5020
|
-
string, dictionary
|
|
5021
|
+
string, dictionary, list of strings/functions/ColumnExpression or ColumnExpression.
|
|
5021
5022
|
|
|
5022
5023
|
Accepted combinations are:
|
|
5023
5024
|
1. String function name
|
|
@@ -5025,12 +5026,57 @@ class DataFrame():
|
|
|
5025
5026
|
3. Dictionary containing column name as key and
|
|
5026
5027
|
aggregate function name (string or list of
|
|
5027
5028
|
strings) as value
|
|
5029
|
+
4. ColumnExpression built using the aggregate functions.
|
|
5030
|
+
5. List of ColumnExpression built using the aggregate functions.
|
|
5031
|
+
|
|
5032
|
+
Note:
|
|
5033
|
+
* The name of the output columns are generated based on aggregate functions and column names.
|
|
5034
|
+
For Example,
|
|
5035
|
+
1. "func" passed as a string.
|
|
5036
|
+
>>> df.agg('mean')
|
|
5037
|
+
Assume that the column names of the dataframe are employee_no, first_name, marks, dob, joined_date.
|
|
5038
|
+
After the above operation, the output column names are:
|
|
5039
|
+
mean_employee_no, mean_marks, mean_dob, mean_joined_date
|
|
5040
|
+
|
|
5041
|
+
2. "func" passed as a list of string functions.
|
|
5042
|
+
>>> df.agg(['min', 'sum'])
|
|
5043
|
+
Assume that the column names of the dataframe are employee_no, first_name, marks, dob, joined_date.
|
|
5044
|
+
After the above operation, the output column names are:
|
|
5045
|
+
min_employee_no, sum_employee_no, min_first_name, min_marks, sum_marks, min_dob, min_joined_date
|
|
5046
|
+
|
|
5047
|
+
3. "func" passed as a dictionary containing column name as key and aggregate function name as value.
|
|
5048
|
+
>>> df.agg({'employee_no' : ['min', 'sum', 'var'], 'first_name' : ['min']})
|
|
5049
|
+
Output column names after the above operation are:
|
|
5050
|
+
min_employee_no, sum_employee_no, var_employee_no, min_first_name
|
|
5051
|
+
|
|
5052
|
+
4. "func" passed as a ColumnExpression built using the aggregate functions.
|
|
5053
|
+
>>> df.agg(df.first_name.count())
|
|
5054
|
+
Output column name after the above operation is:
|
|
5055
|
+
count(first_name)
|
|
5056
|
+
|
|
5057
|
+
5. "func" passed as a list of ColumnExpression built using the aggregate functions.
|
|
5058
|
+
>>> df.agg([df.employee_no.min(), df.first_name.count()])
|
|
5059
|
+
Output column names after the above operation are:
|
|
5060
|
+
min(employee_no), count(first_name)
|
|
5061
|
+
|
|
5062
|
+
* On ColumnExpression or list of ColumnExpression alias() can be used to
|
|
5063
|
+
return the output columns with aliased name.
|
|
5064
|
+
For Example,
|
|
5065
|
+
>>> df.agg(df.first_name.count().alias("total_names"))
|
|
5066
|
+
Output column name after the above operation is:
|
|
5067
|
+
total_names
|
|
5068
|
+
|
|
5069
|
+
>>> df.agg([df.joined_date.min().alias("min_date"), df.first_name.count().alias("total_names")])
|
|
5070
|
+
Output column names after the above operation are:
|
|
5071
|
+
min_date, total_names
|
|
5072
|
+
|
|
5028
5073
|
|
|
5029
5074
|
RETURNS:
|
|
5030
5075
|
teradataml DataFrame object with operations
|
|
5031
5076
|
mentioned in parameter 'func' performed on specified
|
|
5032
5077
|
columns.
|
|
5033
5078
|
|
|
5079
|
+
|
|
5034
5080
|
RAISES:
|
|
5035
5081
|
TeradataMLException
|
|
5036
5082
|
1. TDMLDF_AGGREGATE_FAILED - If operations on given columns
|
|
@@ -5072,8 +5118,8 @@ class DataFrame():
|
|
|
5072
5118
|
valid datatype.
|
|
5073
5119
|
|
|
5074
5120
|
Possible error message:
|
|
5075
|
-
Invalid type(s) passed to argument 'func', should be:
|
|
5076
|
-
|
|
5121
|
+
Invalid type(s) passed to argument 'func', should be:
|
|
5122
|
+
['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression'].
|
|
5077
5123
|
|
|
5078
5124
|
EXAMPLES :
|
|
5079
5125
|
# Load the data to run the example.
|
|
@@ -5090,21 +5136,49 @@ class DataFrame():
|
|
|
5090
5136
|
112 None None None 18/12/05
|
|
5091
5137
|
>>>
|
|
5092
5138
|
|
|
5093
|
-
#
|
|
5139
|
+
# Get the minimum, sum and variance of employee number and minimum and mean of name,
|
|
5140
|
+
# by passing dictionary of column names to string function/list of string functions as parameter.
|
|
5094
5141
|
>>> df.agg({'employee_no' : ['min', 'sum', 'var'], 'first_name' : ['min', 'mean']})
|
|
5095
|
-
|
|
5096
|
-
|
|
5142
|
+
min_employee_no sum_employee_no var_employee_no min_first_name
|
|
5143
|
+
0 100 313 44.333333 abcd
|
|
5097
5144
|
|
|
5098
|
-
#
|
|
5145
|
+
# Get the minimum and sum of all the columns in the dataframe,
|
|
5146
|
+
# by passing list of string functions as parameter.
|
|
5099
5147
|
>>> df.agg(['min', 'sum'])
|
|
5100
|
-
|
|
5101
|
-
|
|
5148
|
+
min_employee_no sum_employee_no min_first_name min_marks sum_marks min_dob min_joined_date
|
|
5149
|
+
0 100 313 abcd None None None 1902-05-12
|
|
5102
5150
|
|
|
5103
|
-
#
|
|
5151
|
+
# Get the mean of all the columns in the dataframe, by passing string function as parameter.
|
|
5104
5152
|
>>> df.agg('mean')
|
|
5105
5153
|
mean_employee_no mean_marks mean_dob mean_joined_date
|
|
5106
5154
|
0 104.333333 None None 60/12/04
|
|
5107
5155
|
|
|
5156
|
+
# Get the total names in the dataframe, by running count() on the "first_name"
|
|
5157
|
+
# and passing ColumnExpression as parameter.
|
|
5158
|
+
>>> df.agg(df.first_name.count())
|
|
5159
|
+
count(first_name)
|
|
5160
|
+
0 2
|
|
5161
|
+
|
|
5162
|
+
# Get the minimum of joining date and total of names in the dataframe,
|
|
5163
|
+
# by running min() on joined_date and count() on the "first_name"
|
|
5164
|
+
# and passing list of ColumnExpression as parameter.
|
|
5165
|
+
>>> df.agg([df.employee_no.min(), df.first_name.count()])
|
|
5166
|
+
min(employee_no) count(first_name)
|
|
5167
|
+
0 100 2
|
|
5168
|
+
|
|
5169
|
+
# Get the total names in the dataframe, by running count() on the "first_name" and
|
|
5170
|
+
# use alias() to have the output column named as "total_names".
|
|
5171
|
+
>>> df.agg(df.first_name.count().alias("total_names"))
|
|
5172
|
+
total_names
|
|
5173
|
+
0 2
|
|
5174
|
+
|
|
5175
|
+
# Get the minimum of joining date and total names in the dataframe,
|
|
5176
|
+
# by running min() on joined_date and count() on the "first_name" and
|
|
5177
|
+
# use alias() to have the output column named as "min_date" and "total_names".
|
|
5178
|
+
>>> df.agg([df.joined_date.min().alias("min_date"), df.first_name.count().alias("total_names")])
|
|
5179
|
+
min_date total_names
|
|
5180
|
+
0 02/12/05 2
|
|
5181
|
+
|
|
5108
5182
|
# Select only subset of columns from the DataFrame.
|
|
5109
5183
|
>>> df1 = df.select(['employee_no', 'first_name', 'joined_date'])
|
|
5110
5184
|
|
|
@@ -5145,9 +5219,9 @@ class DataFrame():
|
|
|
5145
5219
|
raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS, "func"),
|
|
5146
5220
|
MessageCodes.MISSING_ARGS)
|
|
5147
5221
|
|
|
5148
|
-
if not isinstance(func, str
|
|
5222
|
+
if not isinstance(func, (str, list, dict, ColumnExpression)):
|
|
5149
5223
|
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
|
|
5150
|
-
|
|
5224
|
+
'func', ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression']),
|
|
5151
5225
|
MessageCodes.UNSUPPORTED_DATATYPE)
|
|
5152
5226
|
|
|
5153
5227
|
return self._get_dataframe_aggregate(func)
|
|
@@ -5169,6 +5243,8 @@ class DataFrame():
|
|
|
5169
5243
|
3. Dictionary containing column name as key and
|
|
5170
5244
|
aggregate function name (string or list of
|
|
5171
5245
|
strings) as value
|
|
5246
|
+
4. ColumnExpression built using the aggregate functions.
|
|
5247
|
+
5. List of ColumnExpression built using the aggregate functions.
|
|
5172
5248
|
|
|
5173
5249
|
**kwargs: Keyword arguments. Mainly used for Time Series Aggragates.
|
|
5174
5250
|
|
|
@@ -10013,9 +10089,10 @@ class DataFrame():
|
|
|
10013
10089
|
case_when_then = {}
|
|
10014
10090
|
list_of_fracs = frac
|
|
10015
10091
|
|
|
10016
|
-
# When stratify column is passed for sample
|
|
10017
|
-
#
|
|
10018
|
-
|
|
10092
|
+
# When stratify column is passed for sample or when seed is passed for
|
|
10093
|
+
# reproducibilty of result then
|
|
10094
|
+
# perform TrainTestSplit for data sampling.
|
|
10095
|
+
if stratify_column is not None or seed is not None:
|
|
10019
10096
|
# Local import TrainTestSplit function.
|
|
10020
10097
|
from teradataml.analytics.sqle import TrainTestSplit
|
|
10021
10098
|
|
|
@@ -10029,7 +10106,15 @@ class DataFrame():
|
|
|
10029
10106
|
train_size=list_of_fracs[0],
|
|
10030
10107
|
test_size=list_of_fracs[1],
|
|
10031
10108
|
stratify_column=stratify_column,
|
|
10032
|
-
seed=seed
|
|
10109
|
+
seed=seed,
|
|
10110
|
+
persist=True)
|
|
10111
|
+
|
|
10112
|
+
# Retrieve the table name from TrainTestSplit_out object.
|
|
10113
|
+
table_name = TrainTestSplit_out.result._table_name
|
|
10114
|
+
|
|
10115
|
+
# Add the table to garbage collector.
|
|
10116
|
+
table_added = GarbageCollector._add_to_garbagecollector(table_name)
|
|
10117
|
+
|
|
10033
10118
|
# Retrieve the sampled result and updated the column name and values
|
|
10034
10119
|
# for backward compatibility.
|
|
10035
10120
|
_sampled_df = TrainTestSplit_out.result
|
|
@@ -16626,7 +16711,7 @@ class _TDUAF(DataFrame):
|
|
|
16626
16711
|
# UAF Functions do not accept double quotes.
|
|
16627
16712
|
db_name = UtilFuncs._extract_db_name(table_name)
|
|
16628
16713
|
if db_name:
|
|
16629
|
-
table_name = "{}.{}".format(db_name, UtilFuncs._extract_table_name(table_name))
|
|
16714
|
+
table_name = '"{}"."{}"'.format(db_name, UtilFuncs._extract_table_name(table_name))
|
|
16630
16715
|
else:
|
|
16631
16716
|
table_name = UtilFuncs._extract_table_name(table_name)
|
|
16632
16717
|
|