teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +183 -0
- teradataml/__init__.py +6 -3
- teradataml/_version.py +2 -2
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +275 -40
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +1 -0
- teradataml/analytics/json_parser/utils.py +17 -21
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +10 -2
- teradataml/analytics/table_operator/__init__.py +3 -2
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +62 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1553 -319
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +276 -319
- teradataml/automl/data_transformation.py +163 -81
- teradataml/automl/feature_engineering.py +402 -239
- teradataml/automl/feature_exploration.py +9 -2
- teradataml/automl/model_evaluation.py +48 -51
- teradataml/automl/model_training.py +291 -189
- teradataml/catalog/byom.py +8 -8
- teradataml/catalog/model_cataloging_utils.py +1 -1
- teradataml/clients/auth_client.py +133 -0
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/aed_utils.py +3 -2
- teradataml/common/constants.py +48 -6
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +156 -120
- teradataml/common/messagecodes.py +6 -1
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +1 -1
- teradataml/common/utils.py +103 -11
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +121 -31
- teradataml/data/advertising.csv +201 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +10 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
- teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/glm_example.json +28 -1
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
- teradataml/data/kmeans_example.json +5 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +29 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +52 -1
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scripts/deploy_script.py +21 -2
- teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
- teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
- teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
- teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/templates/open_source_ml.json +2 -1
- teradataml/data/teradataml_example.json +97 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/copy_to.py +9 -4
- teradataml/dataframe/data_transfer.py +125 -64
- teradataml/dataframe/dataframe.py +575 -57
- teradataml/dataframe/dataframe_utils.py +47 -9
- teradataml/dataframe/fastload.py +273 -90
- teradataml/dataframe/functions.py +339 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +740 -18
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +324 -18
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
- teradataml/options/__init__.py +16 -5
- teradataml/options/configure.py +39 -6
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +26 -19
- teradataml/scriptmgmt/lls_utils.py +120 -16
- teradataml/table_operators/Script.py +4 -5
- teradataml/table_operators/TableOperator.py +160 -26
- teradataml/table_operators/table_operator_util.py +88 -41
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/validators.py +41 -3
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
|
@@ -35,6 +35,7 @@ from teradataml.utils.utils import execute_sql
|
|
|
35
35
|
from teradatasqlalchemy.types import FLOAT, NUMBER, DECIMAL, PERIOD_TIMESTAMP
|
|
36
36
|
from teradatasqlalchemy.dialect import preparer, dialect as td_dialect
|
|
37
37
|
import teradataml.dataframe as tdmldf
|
|
38
|
+
from teradataml.dataframe.sql_interfaces import ColumnExpression
|
|
38
39
|
|
|
39
40
|
from sqlalchemy.sql import select
|
|
40
41
|
from sqlalchemy.sql.expression import text
|
|
@@ -792,6 +793,8 @@ class DataFrameUtils():
|
|
|
792
793
|
2. List of functions
|
|
793
794
|
3. Dictionary containing column name as key and aggregate
|
|
794
795
|
function name (string or list of strings) as value
|
|
796
|
+
4. ColumnExpression built using the aggregate functions.
|
|
797
|
+
5. List of ColumnExpression built using the aggregate functions.
|
|
795
798
|
|
|
796
799
|
percentile:
|
|
797
800
|
Optional Argument.
|
|
@@ -911,7 +914,6 @@ class DataFrameUtils():
|
|
|
911
914
|
DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=func,
|
|
912
915
|
describe_op=describe_op, percentile=percentile,
|
|
913
916
|
tdp=tdp, **kwargs)
|
|
914
|
-
|
|
915
917
|
if column_supported:
|
|
916
918
|
all_unsupported_columns = False
|
|
917
919
|
new_column_names.append(new_column_name)
|
|
@@ -985,6 +987,8 @@ class DataFrameUtils():
|
|
|
985
987
|
2. List of functions
|
|
986
988
|
3. Dictionary containing column name as key and aggregate
|
|
987
989
|
function name (string or list of strings) as value
|
|
990
|
+
4. ColumnExpression built using the aggregate functions.
|
|
991
|
+
5. List of ColumnExpression built using the aggregate functions.
|
|
988
992
|
|
|
989
993
|
percentile:
|
|
990
994
|
Optional Argument.
|
|
@@ -1064,6 +1068,28 @@ class DataFrameUtils():
|
|
|
1064
1068
|
# ['bottom', 'bottom with ties', 'delta_t', 'mad', 'top', 'top with ties']
|
|
1065
1069
|
# Thus, no extra processing is required for time series aggregates over here.
|
|
1066
1070
|
|
|
1071
|
+
if isinstance(func, ColumnExpression) or (isinstance(func, list) and isinstance(func[0], ColumnExpression)):
|
|
1072
|
+
column_agg_expr = []
|
|
1073
|
+
new_column_names = []
|
|
1074
|
+
new_column_types = []
|
|
1075
|
+
if isinstance(func, ColumnExpression):
|
|
1076
|
+
func= UtilFuncs._as_list(func)
|
|
1077
|
+
|
|
1078
|
+
# validate that func is a list of ColumnExpression
|
|
1079
|
+
for expr in func:
|
|
1080
|
+
if not isinstance(expr, ColumnExpression):
|
|
1081
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
|
|
1082
|
+
'func', ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression']),
|
|
1083
|
+
MessageCodes.UNSUPPORTED_DATATYPE)
|
|
1084
|
+
|
|
1085
|
+
for operations in func:
|
|
1086
|
+
alias = operations.alias_name
|
|
1087
|
+
column_agg_expr.append(operations.compile_label(alias))
|
|
1088
|
+
new_column_names.append(alias)
|
|
1089
|
+
new_column_types.append(operations.type)
|
|
1090
|
+
aggregate_expr = ", ".join(column_agg_expr)
|
|
1091
|
+
return aggregate_expr, new_column_names, new_column_types
|
|
1092
|
+
|
|
1067
1093
|
# 'operations' contains dict of columns -> list of aggregate operations
|
|
1068
1094
|
operations = DataFrameUtils._validate_agg_function(func, column_names)
|
|
1069
1095
|
|
|
@@ -1588,16 +1614,17 @@ class DataFrameUtils():
|
|
|
1588
1614
|
return col_names, col_types
|
|
1589
1615
|
|
|
1590
1616
|
@staticmethod
|
|
1591
|
-
def _insert_all_from_table(to_table_name, from_table_name, column_list,
|
|
1592
|
-
temporary=False):
|
|
1617
|
+
def _insert_all_from_table(to_table_name, from_table_name, column_list, to_schema_name=None,
|
|
1618
|
+
from_schema_name=None, temporary=False):
|
|
1593
1619
|
"""
|
|
1594
1620
|
Inserts all records from one table into the second, using columns ordered by column list.
|
|
1595
1621
|
|
|
1596
1622
|
PARAMETERS:
|
|
1597
1623
|
to_table_name - String specifying name of the SQL Table to insert to.
|
|
1598
|
-
|
|
1624
|
+
from_table_name - String specifying name of the SQL Table to insert from.
|
|
1599
1625
|
column_list - List of strings specifying column names used in the insertion.
|
|
1600
|
-
|
|
1626
|
+
to_schema_name - Name of the database schema to insert table data into.
|
|
1627
|
+
from_schema_name - Name of the database schema to insert table data from.
|
|
1601
1628
|
temporary - Specifies whether to create Vantage tables as permanent or volatile.
|
|
1602
1629
|
Default: False
|
|
1603
1630
|
Note: When True:
|
|
@@ -1618,16 +1645,25 @@ class DataFrameUtils():
|
|
|
1618
1645
|
# Construct INSERT command.
|
|
1619
1646
|
column_order_string = ', '.join([tdp.quote("{0}".format(element)) for element in column_list])
|
|
1620
1647
|
|
|
1621
|
-
|
|
1622
|
-
|
|
1648
|
+
# Generate full name of the destination table.
|
|
1649
|
+
if to_schema_name:
|
|
1650
|
+
full_to_table_name = tdp.quote(to_schema_name) + "." + tdp.quote(to_table_name)
|
|
1623
1651
|
elif temporary:
|
|
1624
1652
|
full_to_table_name = tdp.quote(to_table_name)
|
|
1625
1653
|
else:
|
|
1626
1654
|
full_to_table_name = tdp.quote(_get_current_databasename()) + "." + tdp.quote(
|
|
1627
1655
|
to_table_name)
|
|
1628
1656
|
|
|
1629
|
-
|
|
1657
|
+
# Generate full name of source table.
|
|
1658
|
+
if from_schema_name:
|
|
1659
|
+
full_from_table_name = tdp.quote(from_schema_name) + "." + tdp.quote(from_table_name)
|
|
1660
|
+
else:
|
|
1661
|
+
full_from_table_name = tdp.quote(_get_current_databasename()) + "." + tdp.quote(
|
|
1662
|
+
from_table_name)
|
|
1630
1663
|
|
|
1664
|
+
insert_sql = SQLBundle._build_insert_from_table_query(full_to_table_name,
|
|
1665
|
+
full_from_table_name,
|
|
1666
|
+
column_order_string)
|
|
1631
1667
|
# Execute INSERT command.
|
|
1632
1668
|
return UtilFuncs._execute_ddl_statement(insert_sql)
|
|
1633
1669
|
|
|
@@ -1797,7 +1833,9 @@ class DataFrameUtils():
|
|
|
1797
1833
|
aed_utils = AedUtils()
|
|
1798
1834
|
if len(dfs) == 1:
|
|
1799
1835
|
operation = aed_utils._aed_get_node_query_type(dfs[0]._nodeid)
|
|
1800
|
-
if operation
|
|
1836
|
+
if operation in ["table", "assign"]:
|
|
1837
|
+
# Assign might have removed some columns and if it is only one dataframe,
|
|
1838
|
+
# then return the same dataframe.
|
|
1801
1839
|
# Return the same dataframe if it is DataFrame object from table.
|
|
1802
1840
|
return dfs[0]
|
|
1803
1841
|
|
teradataml/dataframe/fastload.py
CHANGED
|
@@ -16,6 +16,8 @@ import pandas as pd
|
|
|
16
16
|
|
|
17
17
|
from sqlalchemy import MetaData, Table, Column
|
|
18
18
|
from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
|
|
19
|
+
|
|
20
|
+
from teradataml.context.context import _get_current_databasename
|
|
19
21
|
from teradataml.dataframe import dataframe
|
|
20
22
|
from teradataml.context.context import *
|
|
21
23
|
from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
|
|
@@ -28,13 +30,15 @@ from teradataml.dataframe.copy_to import copy_to_sql, \
|
|
|
28
30
|
_create_pti_table_object, _extract_column_info, \
|
|
29
31
|
_check_columns_insertion_compatible
|
|
30
32
|
from teradataml.dataframe.data_transfer import _DataTransferUtils
|
|
31
|
-
from
|
|
33
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
32
34
|
|
|
33
35
|
|
|
34
36
|
@collect_queryband(queryband="fstLd")
|
|
35
37
|
def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
|
|
36
38
|
index_label=None, primary_index=None, types=None, batch_size=None,
|
|
37
|
-
save_errors=False, open_sessions=None
|
|
39
|
+
save_errors=False, open_sessions=None, err_tbl_1_suffix=None,
|
|
40
|
+
err_tbl_2_suffix=None, err_tbl_name=None, warn_tbl_name=None,
|
|
41
|
+
err_staging_db=None):
|
|
38
42
|
"""
|
|
39
43
|
The fastload() API writes records from a Pandas DataFrame to Teradata Vantage
|
|
40
44
|
using Fastload. FastLoad API can be used to quickly load large amounts of data
|
|
@@ -49,13 +53,24 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
|
|
|
49
53
|
loaded.
|
|
50
54
|
4. If there are any incorrect rows i.e. due to constraint violations, data type
|
|
51
55
|
conversion errors, etc., FastLoad protocol ignores those rows and inserts
|
|
52
|
-
all valid rows.
|
|
56
|
+
all valid rows.
|
|
53
57
|
5. Rows in the DataFrame that failed to get inserted are categorized into errors
|
|
54
58
|
and warnings by FastLoad protocol and these errors and warnings are stored
|
|
55
|
-
into respective error and warning tables by FastLoad API.
|
|
56
|
-
6.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
+
into respective error and warning tables by FastLoad API.
|
|
60
|
+
6. fastload() creates 2 error tables when data is erroneous. These error tables are
|
|
61
|
+
refered as ERR_1 and ERR_2 tables.
|
|
62
|
+
* ERR_1 table is used to capture rows that violate the constraints or have format
|
|
63
|
+
errors. It typically contains information about rows that could not be inserted
|
|
64
|
+
into the target table due to data conversion errors, constraint violations, etc.
|
|
65
|
+
* ERR_2 table is used to log any duplicate rows found during the load process and
|
|
66
|
+
which are not loaded in target table, since fastLoad does not allow duplicate
|
|
67
|
+
rows to be loaded into the target table.
|
|
68
|
+
7. When "save_errors" argument is set to True, ERR_1 and ERR_2 tables are presisted.
|
|
69
|
+
The fully qualified names of ERR_1, ERR_2 and warning tables are shown once the
|
|
70
|
+
fastload operation is complete.
|
|
71
|
+
8. If user wants both error and warnings information from pandas dataframe to be
|
|
72
|
+
persisted rather than that from ERR_1 and ERR_2 tables, then "save_errors" should
|
|
73
|
+
be set to True and "err_tbl_name" must be provided.
|
|
59
74
|
|
|
60
75
|
For additional information about FastLoad protocol through teradatasql driver,
|
|
61
76
|
please refer the FASTLOAD section of https://pypi.org/project/teradatasql/#FastLoad
|
|
@@ -165,10 +180,19 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
|
|
|
165
180
|
save_errors:
|
|
166
181
|
Optional Argument.
|
|
167
182
|
Specifies whether to persist the error/warning information in Vantage
|
|
168
|
-
or not.
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
183
|
+
or not.
|
|
184
|
+
Notes:
|
|
185
|
+
* When "save_errors" is set to True, ERR_1 and ERR_2 tables are presisted.
|
|
186
|
+
The fully qualified names of ERR_1, ERR_2 and warning table are returned
|
|
187
|
+
in a dictionary containing keys named as "ERR_1_table", "ERR_2_table",
|
|
188
|
+
"warnings_table" respectively.
|
|
189
|
+
* When "save_errors" is set to True and "err_tbl_name" is also provided,
|
|
190
|
+
"err_tbl_name" takes precedence and error information is persisted into
|
|
191
|
+
a single table using pandas dataframe rather than in ERR_1 and ERR_2 tables.
|
|
192
|
+
* When "save_errors" is set to False, errors and warnings information is
|
|
193
|
+
not persisted as tables, but it is returned as pandas dataframes in a
|
|
194
|
+
dictionary containing keys named as "errors_dataframe" and "warnings_dataframe"
|
|
195
|
+
respectively.
|
|
172
196
|
Default Value: False
|
|
173
197
|
Types: bool
|
|
174
198
|
|
|
@@ -183,59 +207,182 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
|
|
|
183
207
|
Default Value: None
|
|
184
208
|
Types: int
|
|
185
209
|
|
|
210
|
+
err_tbl_1_suffix:
|
|
211
|
+
Optional Argument.
|
|
212
|
+
Specifies the suffix for error table 1 created by fastload job.
|
|
213
|
+
Default Value: "_ERR_1"
|
|
214
|
+
Types: String
|
|
215
|
+
|
|
216
|
+
err_tbl_2_suffix:
|
|
217
|
+
Optional Argument.
|
|
218
|
+
Specifies the suffix for error table 2 created by fastload job.
|
|
219
|
+
Default Value: "_ERR_2"
|
|
220
|
+
Types: String
|
|
221
|
+
|
|
222
|
+
err_tbl_name:
|
|
223
|
+
Optional Argument.
|
|
224
|
+
Specifies the name for error table. This argument takes precedence
|
|
225
|
+
over "save_errors" and saves error information in single table,
|
|
226
|
+
rather than ERR_1 and ERR_2 error tables.
|
|
227
|
+
Default value: "td_fl_<table_name>_err_<unique_id>" where table_name
|
|
228
|
+
is name of target/staging table and unique_id is logon
|
|
229
|
+
sequence number of fastload job.
|
|
230
|
+
Types: String
|
|
231
|
+
|
|
232
|
+
warn_tbl_name:
|
|
233
|
+
Optional Argument.
|
|
234
|
+
Specifies the name for warning table.
|
|
235
|
+
Default value: "td_fl_<table_name>_warn_<unique_id>" where table_name
|
|
236
|
+
is name of target/staging table and unique_id is logon
|
|
237
|
+
sequence number of fastload job.
|
|
238
|
+
Types: String
|
|
239
|
+
|
|
240
|
+
err_staging_db:
|
|
241
|
+
Optional Argument.
|
|
242
|
+
Specifies the name of the database to be used for creating staging
|
|
243
|
+
table and error/warning tables.
|
|
244
|
+
Note:
|
|
245
|
+
Current session user must have CREATE, DROP and INSERT table
|
|
246
|
+
permissions on err_staging_db database.
|
|
247
|
+
Types: String
|
|
248
|
+
|
|
186
249
|
RETURNS:
|
|
187
250
|
A dict containing the following attributes:
|
|
188
251
|
1. errors_dataframe: It is a Pandas DataFrame containing error messages
|
|
189
|
-
thrown by fastload. DataFrame is empty if there are no errors
|
|
252
|
+
thrown by fastload. DataFrame is empty if there are no errors or
|
|
253
|
+
"save_errors" is set to True.
|
|
190
254
|
2. warnings_dataframe: It is a Pandas DataFrame containing warning messages
|
|
191
255
|
thrown by fastload. DataFrame is empty if there are no warnings.
|
|
192
|
-
3. errors_table:
|
|
193
|
-
argument save_errors is False.
|
|
194
|
-
4. warnings_table:
|
|
195
|
-
argument save_errors is False.
|
|
256
|
+
3. errors_table: Fully qualified name of the table containing errors. It is
|
|
257
|
+
an empty string (''), if argument "save_errors" is set to False.
|
|
258
|
+
4. warnings_table: Fully qualified name of the table containing warnings. It is
|
|
259
|
+
an empty string (''), if argument "save_errors" is set to False.
|
|
260
|
+
5. ERR_1_table: Fully qualified name of the ERR 1 table created by fastload
|
|
261
|
+
job. It is an empty string (''), if argument "save_errors" is set to False.
|
|
262
|
+
6. ERR_2_table: Fully qualified name of the ERR 2 table created by fastload
|
|
263
|
+
job. It is an empty string (''), if argument "save_errors" is set to False.
|
|
196
264
|
|
|
197
265
|
RAISES:
|
|
198
266
|
TeradataMlException
|
|
199
267
|
|
|
200
268
|
EXAMPLES:
|
|
201
269
|
Saving a Pandas DataFrame using Fastload:
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
270
|
+
>>> from teradataml.dataframe.fastload import fastload
|
|
271
|
+
>>> from teradatasqlalchemy.types import *
|
|
272
|
+
|
|
273
|
+
>>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
|
|
274
|
+
'emp_sage': [100, 200, 300, 400],
|
|
275
|
+
'emp_id': [133, 144, 155, 177],
|
|
276
|
+
'marks': [99.99, 97.32, 94.67, 91.00]
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
>>> pandas_df = pd.DataFrame(df)
|
|
280
|
+
|
|
281
|
+
# Example 1: Default execution.
|
|
282
|
+
>>> fastload(df = pandas_df, table_name = 'my_table')
|
|
283
|
+
|
|
284
|
+
# Example 2: Save a Pandas DataFrame with primary_index.
|
|
285
|
+
>>> pandas_df = pandas_df.set_index(['emp_id'])
|
|
286
|
+
>>> fastload(df = pandas_df, table_name = 'my_table_1', primary_index='emp_id')
|
|
287
|
+
|
|
288
|
+
# Example 3: Save a Pandas DataFrame using fastload() with index and primary_index.
|
|
289
|
+
>>> fastload(df = pandas_df, table_name = 'my_table_2', index=True,
|
|
290
|
+
primary_index='index_label')
|
|
291
|
+
|
|
292
|
+
# Example 4: Save a Pandas DataFrame using types, appending to the table if it already exists.
|
|
293
|
+
>>> fastload(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
|
|
294
|
+
index = True, index_label = 'my_index_label',
|
|
295
|
+
primary_index = ['emp_id'], if_exists = 'append',
|
|
296
|
+
types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
|
|
297
|
+
'emp_id': BIGINT, 'marks': DECIMAL})
|
|
298
|
+
|
|
299
|
+
# Example 5: Save a Pandas DataFrame using levels in index of type MultiIndex.
|
|
300
|
+
>>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
|
|
301
|
+
>>> fastload(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
|
|
302
|
+
index = True, index_label = ['index1', 'index2'],
|
|
303
|
+
primary_index = ['index1'], if_exists = 'replace')
|
|
304
|
+
|
|
305
|
+
# Example 6: Save a Pandas DataFrame by opening specified number of teradata data transfer sessions.
|
|
306
|
+
>>> fastload(df = pandas_df, table_name = 'my_table_5', open_sessions = 2)
|
|
307
|
+
|
|
308
|
+
# Example 7: Save a Pandas Dataframe to a table in specified target database "schema_name".
|
|
309
|
+
# Save errors and warnings to database specified with "err_staging_db".
|
|
310
|
+
# Save errors to table named as "err_tbl_name" and warnings to "warn_tbl_name".
|
|
311
|
+
# Given that, user is connected to a database different from "schema_name"
|
|
312
|
+
# and "err_staging_db".
|
|
313
|
+
|
|
314
|
+
# Create a pandas dataframe having one duplicate and one fualty row.
|
|
315
|
+
>>>> data_dict = {"C_ID": [301, 301, 302, 303, 304, 305, 306, 307, 308],
|
|
316
|
+
"C_timestamp": ['2014-01-06 09:01:25', '2014-01-06 09:01:25',
|
|
317
|
+
'2015-01-06 09:01:25.25.122200', '2017-01-06 09:01:25.11111',
|
|
318
|
+
'2013-01-06 09:01:25', '2019-03-06 10:15:28',
|
|
319
|
+
'2014-01-06 09:01:25.1098', '2014-03-06 10:01:02',
|
|
320
|
+
'2014-03-06 10:01:20.0000']}
|
|
321
|
+
>>> my_df = pd.DataFrame(data_dict)
|
|
322
|
+
|
|
323
|
+
# Fastlaod data in non-default schema "target_db" and save erors and warnings in given tables.
|
|
324
|
+
>>> fastload(df=my_df, table_name='fastload_with_err_warn_tbl_stag_db',
|
|
325
|
+
if_exists='replace', primary_index='C_ID', schema_name='target_db',
|
|
326
|
+
types={'C_ID': INTEGER, 'C_timestamp': TIMESTAMP(6)},
|
|
327
|
+
err_tbl_name='fld_errors', warn_tbl_name='fld_warnings',
|
|
328
|
+
err_staging_db='stage_db')
|
|
329
|
+
Processed 9 rows in batch 1.
|
|
330
|
+
{'errors_dataframe': batch_no error_message
|
|
331
|
+
0 1 [Session 14527] [Teradata Database] [Error 26...,
|
|
332
|
+
'warnings_dataframe': batch_no error_message
|
|
333
|
+
0 batch_summary [Session 14526] [Teradata SQL Driver] [Warnin...,
|
|
334
|
+
'errors_table': 'stage_db.fld_errors',
|
|
335
|
+
'warnings_table': 'stage_db.fld_warnings',
|
|
336
|
+
'ERR_1_table': '',
|
|
337
|
+
'ERR_2_table': ''}
|
|
338
|
+
|
|
339
|
+
# Validate loaded data table.
|
|
340
|
+
>>> DataFrame(in_schema("target_db", "fastload_with_err_warn_tbl_stag_db"))
|
|
341
|
+
C_ID C_timestamp
|
|
342
|
+
303 2017-01-06 09:01:25.111110
|
|
343
|
+
306 2014-01-06 09:01:25.109800
|
|
344
|
+
304 2013-01-06 09:01:25.000000
|
|
345
|
+
307 2014-03-06 10:01:02.000000
|
|
346
|
+
305 2019-03-06 10:15:28.000000
|
|
347
|
+
301 2014-01-06 09:01:25.000000
|
|
348
|
+
308 2014-03-06 10:01:20.000000
|
|
349
|
+
|
|
350
|
+
# Validate error and warning tables.
|
|
351
|
+
>>> DataFrame(in_schema("stage_db", "fld_errors"))
|
|
352
|
+
batch_no error_message
|
|
353
|
+
1 [Session 14527] [Teradata Database] [Error 2673] FastLoad failed to insert 1 of 9 batched rows. Batched row 3 failed to insert because of Teradata Database error 2673 in "target_db"."fastload_with_err_warn_tbl_stag_db"."C_timestamp"
|
|
354
|
+
|
|
355
|
+
>>> DataFrame(in_schema("stage_db", "fld_warnings"))
|
|
356
|
+
batch_no error_message
|
|
357
|
+
batch_summary [Session 14526] [Teradata SQL Driver] [Warning 518] Found 1 duplicate or faulty row(s) while ending FastLoad of database table "target_db"."fastload_with_err_warn_tbl_stag_db": expected a row count of 8, got a row count of 7
|
|
358
|
+
|
|
359
|
+
# Example 8: Save a Pandas Dataframe to a table in specified target database "schema_name".
|
|
360
|
+
# Save errors in ERR_1 and ERR_2 tables having user defined suffixes provided
|
|
361
|
+
# in "err_tbl_1_suffix" and "err_tbl_2_suffix".
|
|
362
|
+
# Source Pandas dataframe is same as Example 7.
|
|
363
|
+
|
|
364
|
+
>>> fastload(df=my_df, table_name = 'fastload_with_err_warn_tbl_stag_db',
|
|
365
|
+
schema_name = 'target_db', if_exists = 'append',
|
|
366
|
+
types={'C_ID': INTEGER, 'C_timestamp': TIMESTAMP(6)},
|
|
367
|
+
err_staging_db='stage_db', save_errors=True,
|
|
368
|
+
err_tbl_1_suffix="_user_err_1", err_tbl_2_suffix="_user_err_2")
|
|
369
|
+
{'errors_dataframe': Empty DataFrame
|
|
370
|
+
Columns: []
|
|
371
|
+
Index: [],
|
|
372
|
+
'warnings_dataframe': batch_no error_message
|
|
373
|
+
0 batch_summary [Session 14699] [Teradata SQL Driver] [Warnin...,
|
|
374
|
+
'errors_table': '',
|
|
375
|
+
'warnings_table': 'stage_db.td_fl_fastload_with_err_warn_tbl_stag_db_warn_1730',
|
|
376
|
+
'ERR_1_table': 'stage_db.ml__fl_stag_1716272404181579_user_err_1',
|
|
377
|
+
'ERR_2_table': 'stage_db.ml__fl_stag_1716272404181579_user_err_2'}
|
|
378
|
+
|
|
379
|
+
# Validate ERR_1 and ERR_2 tables.
|
|
380
|
+
>>> DataFrame(in_schema("stage_db", "ml__fl_stag_1716270574550744_user_err_1"))
|
|
381
|
+
ErrorCode ErrorFieldName DataParcel
|
|
382
|
+
2673 F_C_timestamp b'12E...'
|
|
383
|
+
|
|
384
|
+
>>> DataFrame(in_schema("stage_db", "ml__fl_stag_1716270574550744_user_err_2"))
|
|
385
|
+
C_ID C_timestamp
|
|
239
386
|
|
|
240
387
|
"""
|
|
241
388
|
# Deriving global connection using get_connection()
|
|
@@ -253,7 +400,10 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
|
|
|
253
400
|
index=index, index_label=index_label, primary_index=primary_index,
|
|
254
401
|
types=types, batch_size=batch_size,
|
|
255
402
|
save_errors=save_errors, api_name='fastload',
|
|
256
|
-
use_fastload=True, open_sessions=open_sessions
|
|
403
|
+
use_fastload=True, open_sessions=open_sessions,
|
|
404
|
+
err_tbl_1_suffix=err_tbl_1_suffix, err_tbl_2_suffix=err_tbl_2_suffix,
|
|
405
|
+
err_tbl_name=err_tbl_name, warn_tbl_name=warn_tbl_name,
|
|
406
|
+
err_staging_db=err_staging_db)
|
|
257
407
|
# Validate DataFrame & related flags; Proceed only when True
|
|
258
408
|
dt_obj._validate()
|
|
259
409
|
|
|
@@ -316,7 +466,8 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
|
|
|
316
466
|
if not table_exists or if_exists.lower() == 'replace':
|
|
317
467
|
dt_obj._create_or_replace_table(con, table_exists=table_exists)
|
|
318
468
|
|
|
319
|
-
|
|
469
|
+
# Insert data to target table using fastload.
|
|
470
|
+
fl_dict = _insert_from_pd_dataframe_with_fastload(dt_obj, table_name, batch_size)
|
|
320
471
|
|
|
321
472
|
# Check column compatibility for insertion when table exists and if_exists = 'append'
|
|
322
473
|
if table_exists and if_exists.lower() == 'append':
|
|
@@ -332,28 +483,37 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
|
|
|
332
483
|
try:
|
|
333
484
|
# Create staging table and use FastLoad to load data.
|
|
334
485
|
# Then copy all the rows from staging table to target table using insert_into sql.
|
|
335
|
-
|
|
486
|
+
# If err_staging_db is not provided, create staging table
|
|
487
|
+
# object in default connected DB.
|
|
488
|
+
if err_staging_db is None:
|
|
489
|
+
err_staging_db = _get_current_databasename()
|
|
490
|
+
stag_table_name = UtilFuncs._generate_temp_table_name(databasename=err_staging_db,
|
|
491
|
+
prefix="fl_stag",
|
|
336
492
|
gc_on_quit=False,
|
|
337
493
|
quote=False,
|
|
338
494
|
table_type=TeradataConstants.TERADATA_TABLE)
|
|
339
495
|
|
|
340
|
-
# Get the table name without schema name for further steps
|
|
341
|
-
stag_table_name =
|
|
342
|
-
# Create staging table object
|
|
343
|
-
dt_obj._create_table(con, table_name=stag_table_name
|
|
496
|
+
# Get the table name without schema name for further steps.
|
|
497
|
+
stag_table_name = UtilFuncs._extract_table_name(stag_table_name)
|
|
498
|
+
# Create staging table object.
|
|
499
|
+
dt_obj._create_table(con, table_name=stag_table_name,
|
|
500
|
+
schema_name=err_staging_db)
|
|
344
501
|
|
|
345
|
-
# Insert data to staging table using
|
|
346
|
-
fl_dict =
|
|
502
|
+
# Insert data to staging table using fastload.
|
|
503
|
+
fl_dict = _insert_from_pd_dataframe_with_fastload(dt_obj, stag_table_name, batch_size, err_staging_db)
|
|
347
504
|
|
|
348
|
-
# Insert data from staging table to target
|
|
505
|
+
# Insert data from staging table to target table.
|
|
349
506
|
df_utils._insert_all_from_table(table_name,
|
|
350
|
-
|
|
351
|
-
cols[0],
|
|
507
|
+
stag_table_name,
|
|
508
|
+
cols[0],
|
|
509
|
+
schema_name,
|
|
510
|
+
err_staging_db)
|
|
352
511
|
except:
|
|
353
512
|
raise
|
|
354
513
|
finally:
|
|
514
|
+
# Drop the staging table.
|
|
355
515
|
if stag_table_name:
|
|
356
|
-
UtilFuncs._drop_table(dt_obj._get_fully_qualified_table_name(stag_table_name))
|
|
516
|
+
UtilFuncs._drop_table(dt_obj._get_fully_qualified_table_name(stag_table_name, err_staging_db))
|
|
357
517
|
|
|
358
518
|
except (TeradataMlException, ValueError, TypeError):
|
|
359
519
|
raise
|
|
@@ -363,10 +523,10 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
|
|
|
363
523
|
return fl_dict
|
|
364
524
|
|
|
365
525
|
|
|
366
|
-
def
|
|
526
|
+
def _insert_from_pd_dataframe_with_fastload(dt_obj, table_name, batch_size, to_schema_name=None):
|
|
367
527
|
"""
|
|
368
|
-
This is an internal function used to
|
|
369
|
-
iterate rows, and insert rows manually. Used for
|
|
528
|
+
This is an internal function used to sequentially extract column info from pandas DataFrame,
|
|
529
|
+
iterate rows, and insert rows manually. Used for insertions to Tables with Pandas index.
|
|
370
530
|
This uses DBAPI's escape functions for Fastload which is a batch insertion method.
|
|
371
531
|
|
|
372
532
|
PARAMETERS:
|
|
@@ -382,6 +542,10 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
|
|
|
382
542
|
Specifies the number of rows to be inserted in a batch.
|
|
383
543
|
Types: Int
|
|
384
544
|
|
|
545
|
+
to_schema_name:
|
|
546
|
+
Optional Argument.
|
|
547
|
+
Specifies name of the database schema where target table needs to be created.
|
|
548
|
+
|
|
385
549
|
RETURNS:
|
|
386
550
|
dict
|
|
387
551
|
|
|
@@ -389,7 +553,7 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
|
|
|
389
553
|
Exception
|
|
390
554
|
|
|
391
555
|
EXAMPLES:
|
|
392
|
-
|
|
556
|
+
_insert_from_pd_dataframe_with_fastload(dt_obj, table_name, batch_size=100)
|
|
393
557
|
"""
|
|
394
558
|
conn = get_connection().connection
|
|
395
559
|
# Create a cursor from connection object
|
|
@@ -430,9 +594,9 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
|
|
|
430
594
|
# FastLoad. Returns an empty string if the request is not a FastLoad.
|
|
431
595
|
|
|
432
596
|
# Quoted, schema-qualified table name.
|
|
433
|
-
table = dt_obj._get_fully_qualified_table_name(table_name)
|
|
597
|
+
table = dt_obj._get_fully_qualified_table_name(table_name, to_schema_name)
|
|
434
598
|
|
|
435
|
-
# Form the INSERT query for
|
|
599
|
+
# Form the INSERT query for fastload.
|
|
436
600
|
ins = dt_obj._form_insert_query(table)
|
|
437
601
|
|
|
438
602
|
# Turn off autocommit before the Fastload insertion
|
|
@@ -440,20 +604,20 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
|
|
|
440
604
|
DriverEscapeFunctions.AUTOCOMMIT_OFF)
|
|
441
605
|
|
|
442
606
|
# Initialize dict template for saving error/warning information
|
|
443
|
-
err_dict = {key:[] for key in ['batch_no', 'error_message']}
|
|
444
|
-
warn_dict = {key:[] for key in ['batch_no', 'error_message']}
|
|
607
|
+
err_dict = {key: [] for key in ['batch_no', 'error_message']}
|
|
608
|
+
warn_dict = {key: [] for key in ['batch_no', 'error_message']}
|
|
445
609
|
|
|
446
610
|
batch_number = 1
|
|
447
611
|
num_batches = int(dt_obj.df.shape[0]/batch_size)
|
|
448
612
|
|
|
449
|
-
|
|
450
613
|
# Empty queryband buffer before SQL call.
|
|
451
614
|
UtilFuncs._set_queryband()
|
|
615
|
+
|
|
452
616
|
for i in range(0, dt_obj.df.shape[0], batch_size):
|
|
453
617
|
# Add the remaining rows to last batch after second last batch
|
|
454
618
|
if (batch_number == num_batches) :
|
|
455
619
|
last_elem = dt_obj.df.shape[0]
|
|
456
|
-
else
|
|
620
|
+
else:
|
|
457
621
|
last_elem = i + batch_size
|
|
458
622
|
|
|
459
623
|
pdf = dt_obj.df.iloc[i:last_elem]
|
|
@@ -466,10 +630,10 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
|
|
|
466
630
|
if dt_obj.index is True:
|
|
467
631
|
insert_list2.extend(row[0]) if is_multi_index else insert_list2.append(row[0])
|
|
468
632
|
insert_list.append(insert_list2)
|
|
469
|
-
# Execute insert statement
|
|
470
|
-
cur.execute
|
|
633
|
+
# Execute insert statement.
|
|
634
|
+
cur.execute(ins, insert_list)
|
|
471
635
|
|
|
472
|
-
# Get error and warning information
|
|
636
|
+
# Get error and warning information from cursor.
|
|
473
637
|
err, _ = dt_obj._process_fastexport_errors_warnings(ins)
|
|
474
638
|
if len(err) != 0:
|
|
475
639
|
err_dict['batch_no'].extend([batch_number] * len(err))
|
|
@@ -487,7 +651,6 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
|
|
|
487
651
|
logon_seq_number = dt_obj._process_escape_functions(cur, escape_function= \
|
|
488
652
|
DriverEscapeFunctions.LOGON_SEQ_NUM,
|
|
489
653
|
insert_query=ins)
|
|
490
|
-
|
|
491
654
|
# Commit the rows
|
|
492
655
|
conn.commit()
|
|
493
656
|
|
|
@@ -497,19 +660,38 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
|
|
|
497
660
|
if len(warn) != 0:
|
|
498
661
|
warn_dict['batch_no'].extend(['batch_summary'] * len(warn))
|
|
499
662
|
warn_dict['error_message'].extend(warn)
|
|
500
|
-
|
|
501
|
-
# Get error and warning
|
|
663
|
+
|
|
664
|
+
# Get error and warning information for error and warning tables, persist
|
|
502
665
|
# error and warning tables to Vantage if user has specified save_error as True
|
|
503
666
|
# else show it as pandas dataframe on console.
|
|
504
667
|
pd_err_df = dt_obj._get_pandas_df_from_errors_warnings(err_dict)
|
|
505
|
-
if not pd_err_df.empty:
|
|
506
|
-
msg_type = "err"
|
|
507
|
-
error_tablename = dt_obj._create_error_warnings_table(pd_err_df, msg_type, logon_seq_number[0][0])
|
|
508
|
-
|
|
509
668
|
pd_warn_df = dt_obj._get_pandas_df_from_errors_warnings(warn_dict)
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
669
|
+
|
|
670
|
+
# Create persistent tables using pandas df if
|
|
671
|
+
# save_errors=True or
|
|
672
|
+
# tables names for errors or warning are provided by user.
|
|
673
|
+
if dt_obj.save_errors or dt_obj.err_tbl_name:
|
|
674
|
+
if not pd_err_df.empty:
|
|
675
|
+
error_tablename = dt_obj._create_error_warnings_table(pd_err_df, "err", logon_seq_number[0][0],
|
|
676
|
+
dt_obj.err_tbl_name)
|
|
677
|
+
if dt_obj.save_errors or dt_obj.warn_tbl_name:
|
|
678
|
+
if not pd_warn_df.empty:
|
|
679
|
+
warn_tablename = dt_obj._create_error_warnings_table(pd_warn_df, "warn", logon_seq_number[0][0],
|
|
680
|
+
dt_obj.warn_tbl_name)
|
|
681
|
+
|
|
682
|
+
# Generate ERR_1 and ERR_2 table names if save_errors=True and
|
|
683
|
+
# errors are not stored in user provided error table name.
|
|
684
|
+
if dt_obj.save_errors and not dt_obj.err_tbl_name:
|
|
685
|
+
err_1_table = "{}.{}{}".format(dt_obj.err_staging_db if dt_obj.err_staging_db else _get_current_databasename(),
|
|
686
|
+
table_name,
|
|
687
|
+
dt_obj.err_tbl_1_suffix if dt_obj.err_tbl_1_suffix else "_ERR_1")
|
|
688
|
+
err_2_table = "{}.{}{}".format(dt_obj.err_staging_db if dt_obj.err_staging_db else _get_current_databasename(),
|
|
689
|
+
table_name,
|
|
690
|
+
dt_obj.err_tbl_2_suffix if dt_obj.err_tbl_2_suffix else "_ERR_2")
|
|
691
|
+
|
|
692
|
+
else:
|
|
693
|
+
err_1_table = ""
|
|
694
|
+
err_2_table = ""
|
|
513
695
|
|
|
514
696
|
except Exception:
|
|
515
697
|
conn.rollback()
|
|
@@ -520,7 +702,8 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
|
|
|
520
702
|
cur.close()
|
|
521
703
|
|
|
522
704
|
return {"errors_dataframe": pd_err_df, "warnings_dataframe": pd_warn_df,
|
|
523
|
-
"errors_table": error_tablename, "warnings_table": warn_tablename
|
|
705
|
+
"errors_table": error_tablename, "warnings_table": warn_tablename,
|
|
706
|
+
"ERR_1_table": err_1_table, "ERR_2_table": err_2_table}
|
|
524
707
|
|
|
525
708
|
|
|
526
709
|
def _get_batchsize(df):
|