teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (263) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +183 -0
  4. teradataml/__init__.py +6 -3
  5. teradataml/_version.py +2 -2
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +275 -40
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +1 -0
  11. teradataml/analytics/json_parser/utils.py +17 -21
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +10 -2
  15. teradataml/analytics/table_operator/__init__.py +3 -2
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +62 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1553 -319
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +276 -319
  22. teradataml/automl/data_transformation.py +163 -81
  23. teradataml/automl/feature_engineering.py +402 -239
  24. teradataml/automl/feature_exploration.py +9 -2
  25. teradataml/automl/model_evaluation.py +48 -51
  26. teradataml/automl/model_training.py +291 -189
  27. teradataml/catalog/byom.py +8 -8
  28. teradataml/catalog/model_cataloging_utils.py +1 -1
  29. teradataml/clients/auth_client.py +133 -0
  30. teradataml/clients/pkce_client.py +1 -1
  31. teradataml/common/aed_utils.py +3 -2
  32. teradataml/common/constants.py +48 -6
  33. teradataml/common/deprecations.py +13 -7
  34. teradataml/common/garbagecollector.py +156 -120
  35. teradataml/common/messagecodes.py +6 -1
  36. teradataml/common/messages.py +3 -1
  37. teradataml/common/sqlbundle.py +1 -1
  38. teradataml/common/utils.py +103 -11
  39. teradataml/common/wrapper_utils.py +1 -1
  40. teradataml/context/context.py +121 -31
  41. teradataml/data/advertising.csv +201 -0
  42. teradataml/data/bank_marketing.csv +11163 -0
  43. teradataml/data/bike_sharing.csv +732 -0
  44. teradataml/data/boston2cols.csv +721 -0
  45. teradataml/data/breast_cancer.csv +570 -0
  46. teradataml/data/complaints_test_tokenized.csv +353 -0
  47. teradataml/data/complaints_tokens_model.csv +348 -0
  48. teradataml/data/covid_confirm_sd.csv +83 -0
  49. teradataml/data/customer_segmentation_test.csv +2628 -0
  50. teradataml/data/customer_segmentation_train.csv +8069 -0
  51. teradataml/data/dataframe_example.json +10 -0
  52. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
  53. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
  54. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
  55. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
  56. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  57. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
  58. teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
  59. teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
  60. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
  61. teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
  62. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  63. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
  64. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
  65. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
  66. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  67. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  68. teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
  69. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
  70. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
  71. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
  72. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  73. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
  74. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  75. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  76. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  77. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
  78. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
  79. teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
  80. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  81. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  82. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  83. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  84. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  85. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  86. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  87. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  88. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  89. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  90. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  91. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  92. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  93. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  94. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  95. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  96. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  97. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  98. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  99. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  100. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  101. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  102. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  103. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  104. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  105. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  106. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  107. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  108. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  109. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  110. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  111. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  112. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  113. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  114. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  115. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  116. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  117. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  118. teradataml/data/dwt2d_dataTable.csv +65 -0
  119. teradataml/data/dwt_dataTable.csv +8 -0
  120. teradataml/data/dwt_filterTable.csv +3 -0
  121. teradataml/data/finance_data4.csv +13 -0
  122. teradataml/data/glm_example.json +28 -1
  123. teradataml/data/grocery_transaction.csv +19 -0
  124. teradataml/data/housing_train_segment.csv +201 -0
  125. teradataml/data/idwt2d_dataTable.csv +5 -0
  126. teradataml/data/idwt_dataTable.csv +8 -0
  127. teradataml/data/idwt_filterTable.csv +3 -0
  128. teradataml/data/insect2Cols.csv +61 -0
  129. teradataml/data/interval_data.csv +5 -0
  130. teradataml/data/jsons/paired_functions.json +14 -0
  131. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
  132. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  133. teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
  134. teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
  135. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
  136. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
  137. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
  138. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  139. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  140. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
  141. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  142. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  143. teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
  144. teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
  145. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
  146. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
  147. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
  148. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  149. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  150. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  151. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
  152. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
  153. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
  154. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  155. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  156. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  157. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  158. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  159. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  160. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  161. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  162. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  163. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  164. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  165. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  166. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  167. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  168. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  169. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  170. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  171. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  172. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  173. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  174. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  175. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  176. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  177. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  178. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  179. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  180. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  181. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  182. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  183. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  184. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  185. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  186. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  187. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  188. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  189. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  190. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  191. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  192. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  193. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  194. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  195. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  196. teradataml/data/kmeans_example.json +5 -0
  197. teradataml/data/kmeans_table.csv +10 -0
  198. teradataml/data/load_example_data.py +8 -2
  199. teradataml/data/naivebayestextclassifier_example.json +1 -1
  200. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  201. teradataml/data/onehot_encoder_train.csv +4 -0
  202. teradataml/data/openml_example.json +29 -0
  203. teradataml/data/peppers.png +0 -0
  204. teradataml/data/real_values.csv +14 -0
  205. teradataml/data/sax_example.json +8 -0
  206. teradataml/data/scale_attributes.csv +3 -0
  207. teradataml/data/scale_example.json +52 -1
  208. teradataml/data/scale_input_part_sparse.csv +31 -0
  209. teradataml/data/scale_input_partitioned.csv +16 -0
  210. teradataml/data/scale_input_sparse.csv +11 -0
  211. teradataml/data/scale_parameters.csv +3 -0
  212. teradataml/data/scripts/deploy_script.py +21 -2
  213. teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
  214. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
  215. teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
  216. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
  217. teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
  218. teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
  219. teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
  220. teradataml/data/star_pivot.csv +8 -0
  221. teradataml/data/templates/open_source_ml.json +2 -1
  222. teradataml/data/teradataml_example.json +97 -1
  223. teradataml/data/timestamp_data.csv +4 -0
  224. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  225. teradataml/data/uaf_example.json +55 -1
  226. teradataml/data/unpivot_example.json +15 -0
  227. teradataml/data/url_data.csv +9 -0
  228. teradataml/data/windowdfft.csv +16 -0
  229. teradataml/data/ztest_example.json +16 -0
  230. teradataml/dataframe/copy_to.py +9 -4
  231. teradataml/dataframe/data_transfer.py +125 -64
  232. teradataml/dataframe/dataframe.py +575 -57
  233. teradataml/dataframe/dataframe_utils.py +47 -9
  234. teradataml/dataframe/fastload.py +273 -90
  235. teradataml/dataframe/functions.py +339 -0
  236. teradataml/dataframe/row.py +160 -0
  237. teradataml/dataframe/setop.py +2 -2
  238. teradataml/dataframe/sql.py +740 -18
  239. teradataml/dataframe/window.py +1 -1
  240. teradataml/dbutils/dbutils.py +324 -18
  241. teradataml/geospatial/geodataframe.py +1 -1
  242. teradataml/geospatial/geodataframecolumn.py +1 -1
  243. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  244. teradataml/lib/aed_0_1.dll +0 -0
  245. teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
  246. teradataml/options/__init__.py +16 -5
  247. teradataml/options/configure.py +39 -6
  248. teradataml/options/display.py +2 -2
  249. teradataml/plot/axis.py +4 -4
  250. teradataml/scriptmgmt/UserEnv.py +26 -19
  251. teradataml/scriptmgmt/lls_utils.py +120 -16
  252. teradataml/table_operators/Script.py +4 -5
  253. teradataml/table_operators/TableOperator.py +160 -26
  254. teradataml/table_operators/table_operator_util.py +88 -41
  255. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  256. teradataml/telemetry_utils/__init__.py +0 -0
  257. teradataml/telemetry_utils/queryband.py +52 -0
  258. teradataml/utils/validators.py +41 -3
  259. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
  260. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
  261. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  262. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  263. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -35,6 +35,7 @@ from teradataml.utils.utils import execute_sql
35
35
  from teradatasqlalchemy.types import FLOAT, NUMBER, DECIMAL, PERIOD_TIMESTAMP
36
36
  from teradatasqlalchemy.dialect import preparer, dialect as td_dialect
37
37
  import teradataml.dataframe as tdmldf
38
+ from teradataml.dataframe.sql_interfaces import ColumnExpression
38
39
 
39
40
  from sqlalchemy.sql import select
40
41
  from sqlalchemy.sql.expression import text
@@ -792,6 +793,8 @@ class DataFrameUtils():
792
793
  2. List of functions
793
794
  3. Dictionary containing column name as key and aggregate
794
795
  function name (string or list of strings) as value
796
+ 4. ColumnExpression built using the aggregate functions.
797
+ 5. List of ColumnExpression built using the aggregate functions.
795
798
 
796
799
  percentile:
797
800
  Optional Argument.
@@ -911,7 +914,6 @@ class DataFrameUtils():
911
914
  DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=func,
912
915
  describe_op=describe_op, percentile=percentile,
913
916
  tdp=tdp, **kwargs)
914
-
915
917
  if column_supported:
916
918
  all_unsupported_columns = False
917
919
  new_column_names.append(new_column_name)
@@ -985,6 +987,8 @@ class DataFrameUtils():
985
987
  2. List of functions
986
988
  3. Dictionary containing column name as key and aggregate
987
989
  function name (string or list of strings) as value
990
+ 4. ColumnExpression built using the aggregate functions.
991
+ 5. List of ColumnExpression built using the aggregate functions.
988
992
 
989
993
  percentile:
990
994
  Optional Argument.
@@ -1064,6 +1068,28 @@ class DataFrameUtils():
1064
1068
  # ['bottom', 'bottom with ties', 'delta_t', 'mad', 'top', 'top with ties']
1065
1069
  # Thus, no extra processing is required for time series aggregates over here.
1066
1070
 
1071
+ if isinstance(func, ColumnExpression) or (isinstance(func, list) and isinstance(func[0], ColumnExpression)):
1072
+ column_agg_expr = []
1073
+ new_column_names = []
1074
+ new_column_types = []
1075
+ if isinstance(func, ColumnExpression):
1076
+ func= UtilFuncs._as_list(func)
1077
+
1078
+ # validate that func is a list of ColumnExpression
1079
+ for expr in func:
1080
+ if not isinstance(expr, ColumnExpression):
1081
+ raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
1082
+ 'func', ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression']),
1083
+ MessageCodes.UNSUPPORTED_DATATYPE)
1084
+
1085
+ for operations in func:
1086
+ alias = operations.alias_name
1087
+ column_agg_expr.append(operations.compile_label(alias))
1088
+ new_column_names.append(alias)
1089
+ new_column_types.append(operations.type)
1090
+ aggregate_expr = ", ".join(column_agg_expr)
1091
+ return aggregate_expr, new_column_names, new_column_types
1092
+
1067
1093
  # 'operations' contains dict of columns -> list of aggregate operations
1068
1094
  operations = DataFrameUtils._validate_agg_function(func, column_names)
1069
1095
 
@@ -1588,16 +1614,17 @@ class DataFrameUtils():
1588
1614
  return col_names, col_types
1589
1615
 
1590
1616
  @staticmethod
1591
- def _insert_all_from_table(to_table_name, from_table_name, column_list, schema_name,
1592
- temporary=False):
1617
+ def _insert_all_from_table(to_table_name, from_table_name, column_list, to_schema_name=None,
1618
+ from_schema_name=None, temporary=False):
1593
1619
  """
1594
1620
  Inserts all records from one table into the second, using columns ordered by column list.
1595
1621
 
1596
1622
  PARAMETERS:
1597
1623
  to_table_name - String specifying name of the SQL Table to insert to.
1598
- insert_from_table_name - String specifying name of the SQL Table to insert from.
1624
+ from_table_name - String specifying name of the SQL Table to insert from.
1599
1625
  column_list - List of strings specifying column names used in the insertion.
1600
- schema_name - Name of the database schema to insert table data into.
1626
+ to_schema_name - Name of the database schema to insert table data into.
1627
+ from_schema_name - Name of the database schema to insert table data from.
1601
1628
  temporary - Specifies whether to create Vantage tables as permanent or volatile.
1602
1629
  Default: False
1603
1630
  Note: When True:
@@ -1618,16 +1645,25 @@ class DataFrameUtils():
1618
1645
  # Construct INSERT command.
1619
1646
  column_order_string = ', '.join([tdp.quote("{0}".format(element)) for element in column_list])
1620
1647
 
1621
- if schema_name:
1622
- full_to_table_name = tdp.quote(schema_name) + "." + tdp.quote(to_table_name)
1648
+ # Generate full name of the destination table.
1649
+ if to_schema_name:
1650
+ full_to_table_name = tdp.quote(to_schema_name) + "." + tdp.quote(to_table_name)
1623
1651
  elif temporary:
1624
1652
  full_to_table_name = tdp.quote(to_table_name)
1625
1653
  else:
1626
1654
  full_to_table_name = tdp.quote(_get_current_databasename()) + "." + tdp.quote(
1627
1655
  to_table_name)
1628
1656
 
1629
- insert_sql = SQLBundle._build_insert_from_table_query(full_to_table_name, from_table_name, column_order_string)
1657
+ # Generate full name of source table.
1658
+ if from_schema_name:
1659
+ full_from_table_name = tdp.quote(from_schema_name) + "." + tdp.quote(from_table_name)
1660
+ else:
1661
+ full_from_table_name = tdp.quote(_get_current_databasename()) + "." + tdp.quote(
1662
+ from_table_name)
1630
1663
 
1664
+ insert_sql = SQLBundle._build_insert_from_table_query(full_to_table_name,
1665
+ full_from_table_name,
1666
+ column_order_string)
1631
1667
  # Execute INSERT command.
1632
1668
  return UtilFuncs._execute_ddl_statement(insert_sql)
1633
1669
 
@@ -1797,7 +1833,9 @@ class DataFrameUtils():
1797
1833
  aed_utils = AedUtils()
1798
1834
  if len(dfs) == 1:
1799
1835
  operation = aed_utils._aed_get_node_query_type(dfs[0]._nodeid)
1800
- if operation == "table":
1836
+ if operation in ["table", "assign"]:
1837
+ # Assign might have removed some columns and if it is only one dataframe,
1838
+ # then return the same dataframe.
1801
1839
  # Return the same dataframe if it is DataFrame object from table.
1802
1840
  return dfs[0]
1803
1841
 
@@ -16,6 +16,8 @@ import pandas as pd
16
16
 
17
17
  from sqlalchemy import MetaData, Table, Column
18
18
  from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
19
+
20
+ from teradataml.context.context import _get_current_databasename
19
21
  from teradataml.dataframe import dataframe
20
22
  from teradataml.context.context import *
21
23
  from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
@@ -28,13 +30,15 @@ from teradataml.dataframe.copy_to import copy_to_sql, \
28
30
  _create_pti_table_object, _extract_column_info, \
29
31
  _check_columns_insertion_compatible
30
32
  from teradataml.dataframe.data_transfer import _DataTransferUtils
31
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
33
+ from teradataml.telemetry_utils.queryband import collect_queryband
32
34
 
33
35
 
34
36
  @collect_queryband(queryband="fstLd")
35
37
  def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
36
38
  index_label=None, primary_index=None, types=None, batch_size=None,
37
- save_errors=False, open_sessions=None):
39
+ save_errors=False, open_sessions=None, err_tbl_1_suffix=None,
40
+ err_tbl_2_suffix=None, err_tbl_name=None, warn_tbl_name=None,
41
+ err_staging_db=None):
38
42
  """
39
43
  The fastload() API writes records from a Pandas DataFrame to Teradata Vantage
40
44
  using Fastload. FastLoad API can be used to quickly load large amounts of data
@@ -49,13 +53,24 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
49
53
  loaded.
50
54
  4. If there are any incorrect rows i.e. due to constraint violations, data type
51
55
  conversion errors, etc., FastLoad protocol ignores those rows and inserts
52
- all valid rows.
56
+ all valid rows.
53
57
  5. Rows in the DataFrame that failed to get inserted are categorized into errors
54
58
  and warnings by FastLoad protocol and these errors and warnings are stored
55
- into respective error and warning tables by FastLoad API.
56
- 6. If save_errors argument is True, the names of error and warning tables are
57
- shown once the fastload operation is complete. These tables will be persisted
58
- using copy_to_sql.
59
+ into respective error and warning tables by FastLoad API.
60
+ 6. fastload() creates 2 error tables when data is erroneous. These error tables are
61
+ refered as ERR_1 and ERR_2 tables.
62
+ * ERR_1 table is used to capture rows that violate the constraints or have format
63
+ errors. It typically contains information about rows that could not be inserted
64
+ into the target table due to data conversion errors, constraint violations, etc.
65
+ * ERR_2 table is used to log any duplicate rows found during the load process and
66
+ which are not loaded in target table, since fastLoad does not allow duplicate
67
+ rows to be loaded into the target table.
68
+ 7. When "save_errors" argument is set to True, ERR_1 and ERR_2 tables are presisted.
69
+ The fully qualified names of ERR_1, ERR_2 and warning tables are shown once the
70
+ fastload operation is complete.
71
+ 8. If user wants both error and warnings information from pandas dataframe to be
72
+ persisted rather than that from ERR_1 and ERR_2 tables, then "save_errors" should
73
+ be set to True and "err_tbl_name" must be provided.
59
74
 
60
75
  For additional information about FastLoad protocol through teradatasql driver,
61
76
  please refer the FASTLOAD section of https://pypi.org/project/teradatasql/#FastLoad
@@ -165,10 +180,19 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
165
180
  save_errors:
166
181
  Optional Argument.
167
182
  Specifies whether to persist the error/warning information in Vantage
168
- or not. If save_errors is set to False, error/warnings are not persisted
169
- as tables. If argument is set to True, the error and warnings information
170
- are presisted and names of error and warning tables are returned. Otherwise,
171
- the function returns None for the names of the tables.
183
+ or not.
184
+ Notes:
185
+ * When "save_errors" is set to True, ERR_1 and ERR_2 tables are presisted.
186
+ The fully qualified names of ERR_1, ERR_2 and warning table are returned
187
+ in a dictionary containing keys named as "ERR_1_table", "ERR_2_table",
188
+ "warnings_table" respectively.
189
+ * When "save_errors" is set to True and "err_tbl_name" is also provided,
190
+ "err_tbl_name" takes precedence and error information is persisted into
191
+ a single table using pandas dataframe rather than in ERR_1 and ERR_2 tables.
192
+ * When "save_errors" is set to False, errors and warnings information is
193
+ not persisted as tables, but it is returned as pandas dataframes in a
194
+ dictionary containing keys named as "errors_dataframe" and "warnings_dataframe"
195
+ respectively.
172
196
  Default Value: False
173
197
  Types: bool
174
198
 
@@ -183,59 +207,182 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
183
207
  Default Value: None
184
208
  Types: int
185
209
 
210
+ err_tbl_1_suffix:
211
+ Optional Argument.
212
+ Specifies the suffix for error table 1 created by fastload job.
213
+ Default Value: "_ERR_1"
214
+ Types: String
215
+
216
+ err_tbl_2_suffix:
217
+ Optional Argument.
218
+ Specifies the suffix for error table 2 created by fastload job.
219
+ Default Value: "_ERR_2"
220
+ Types: String
221
+
222
+ err_tbl_name:
223
+ Optional Argument.
224
+ Specifies the name for error table. This argument takes precedence
225
+ over "save_errors" and saves error information in single table,
226
+ rather than ERR_1 and ERR_2 error tables.
227
+ Default value: "td_fl_<table_name>_err_<unique_id>" where table_name
228
+ is name of target/staging table and unique_id is logon
229
+ sequence number of fastload job.
230
+ Types: String
231
+
232
+ warn_tbl_name:
233
+ Optional Argument.
234
+ Specifies the name for warning table.
235
+ Default value: "td_fl_<table_name>_warn_<unique_id>" where table_name
236
+ is name of target/staging table and unique_id is logon
237
+ sequence number of fastload job.
238
+ Types: String
239
+
240
+ err_staging_db:
241
+ Optional Argument.
242
+ Specifies the name of the database to be used for creating staging
243
+ table and error/warning tables.
244
+ Note:
245
+ Current session user must have CREATE, DROP and INSERT table
246
+ permissions on err_staging_db database.
247
+ Types: String
248
+
186
249
  RETURNS:
187
250
  A dict containing the following attributes:
188
251
  1. errors_dataframe: It is a Pandas DataFrame containing error messages
189
- thrown by fastload. DataFrame is empty if there are no errors.
252
+ thrown by fastload. DataFrame is empty if there are no errors or
253
+ "save_errors" is set to True.
190
254
  2. warnings_dataframe: It is a Pandas DataFrame containing warning messages
191
255
  thrown by fastload. DataFrame is empty if there are no warnings.
192
- 3. errors_table: Name of the table containing errors. It is None, if
193
- argument save_errors is False.
194
- 4. warnings_table: Name of the table containing warnings. It is None, if
195
- argument save_errors is False.
256
+ 3. errors_table: Fully qualified name of the table containing errors. It is
257
+ an empty string (''), if argument "save_errors" is set to False.
258
+ 4. warnings_table: Fully qualified name of the table containing warnings. It is
259
+ an empty string (''), if argument "save_errors" is set to False.
260
+ 5. ERR_1_table: Fully qualified name of the ERR 1 table created by fastload
261
+ job. It is an empty string (''), if argument "save_errors" is set to False.
262
+ 6. ERR_2_table: Fully qualified name of the ERR 2 table created by fastload
263
+ job. It is an empty string (''), if argument "save_errors" is set to False.
196
264
 
197
265
  RAISES:
198
266
  TeradataMlException
199
267
 
200
268
  EXAMPLES:
201
269
  Saving a Pandas DataFrame using Fastload:
202
- >>> from teradataml.dataframe.fastload import fastload
203
- >>> from teradatasqlalchemy.types import *
204
-
205
- >>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
206
- 'emp_sage': [100, 200, 300, 400],
207
- 'emp_id': [133, 144, 155, 177],
208
- 'marks': [99.99, 97.32, 94.67, 91.00]
209
- }
210
-
211
- >>> pandas_df = pd.DataFrame(df)
212
-
213
- # a) Default execution
214
- >>> fastload(df = pandas_df, table_name = 'my_table')
215
-
216
- # b) Save a Pandas DataFrame with primary_index
217
- >>> pandas_df = pandas_df.set_index(['emp_id'])
218
- >>> fastload(df = pandas_df, table_name = 'my_table_1', primary_index='emp_id')
219
-
220
- # c) Save a Pandas DataFrame using fastload() with index and primary_index
221
- >>> fastload(df = pandas_df, table_name = 'my_table_2', index=True,
222
- primary_index='index_label')
223
-
224
- # d) Save a Pandas DataFrame using types, appending to the table if it already exists
225
- >>> fastload(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
226
- index = True, index_label = 'my_index_label',
227
- primary_index = ['emp_id'], if_exists = 'append',
228
- types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
229
- 'emp_id': BIGINT, 'marks': DECIMAL})
230
-
231
- # e) Save a Pandas DataFrame using levels in index of type MultiIndex
232
- >>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
233
- >>> fastload(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
234
- index = True, index_label = ['index1', 'index2'],
235
- primary_index = ['index1'], if_exists = 'replace')
236
-
237
- # f) Save a Pandas DataFrame by opening spcified number of teradata data transfer sessions
238
- >>> fastload(df = pandas_df, table_name = 'my_table_5', open_sessions = 2)
270
+ >>> from teradataml.dataframe.fastload import fastload
271
+ >>> from teradatasqlalchemy.types import *
272
+
273
+ >>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
274
+ 'emp_sage': [100, 200, 300, 400],
275
+ 'emp_id': [133, 144, 155, 177],
276
+ 'marks': [99.99, 97.32, 94.67, 91.00]
277
+ }
278
+
279
+ >>> pandas_df = pd.DataFrame(df)
280
+
281
+ # Example 1: Default execution.
282
+ >>> fastload(df = pandas_df, table_name = 'my_table')
283
+
284
+ # Example 2: Save a Pandas DataFrame with primary_index.
285
+ >>> pandas_df = pandas_df.set_index(['emp_id'])
286
+ >>> fastload(df = pandas_df, table_name = 'my_table_1', primary_index='emp_id')
287
+
288
+ # Example 3: Save a Pandas DataFrame using fastload() with index and primary_index.
289
+ >>> fastload(df = pandas_df, table_name = 'my_table_2', index=True,
290
+ primary_index='index_label')
291
+
292
+ # Example 4: Save a Pandas DataFrame using types, appending to the table if it already exists.
293
+ >>> fastload(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
294
+ index = True, index_label = 'my_index_label',
295
+ primary_index = ['emp_id'], if_exists = 'append',
296
+ types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
297
+ 'emp_id': BIGINT, 'marks': DECIMAL})
298
+
299
+ # Example 5: Save a Pandas DataFrame using levels in index of type MultiIndex.
300
+ >>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
301
+ >>> fastload(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
302
+ index = True, index_label = ['index1', 'index2'],
303
+ primary_index = ['index1'], if_exists = 'replace')
304
+
305
+ # Example 6: Save a Pandas DataFrame by opening specified number of teradata data transfer sessions.
306
+ >>> fastload(df = pandas_df, table_name = 'my_table_5', open_sessions = 2)
307
+
308
+ # Example 7: Save a Pandas Dataframe to a table in specified target database "schema_name".
309
+ # Save errors and warnings to database specified with "err_staging_db".
310
+ # Save errors to table named as "err_tbl_name" and warnings to "warn_tbl_name".
311
+ # Given that, user is connected to a database different from "schema_name"
312
+ # and "err_staging_db".
313
+
314
+ # Create a pandas dataframe having one duplicate and one fualty row.
315
+ >>>> data_dict = {"C_ID": [301, 301, 302, 303, 304, 305, 306, 307, 308],
316
+ "C_timestamp": ['2014-01-06 09:01:25', '2014-01-06 09:01:25',
317
+ '2015-01-06 09:01:25.25.122200', '2017-01-06 09:01:25.11111',
318
+ '2013-01-06 09:01:25', '2019-03-06 10:15:28',
319
+ '2014-01-06 09:01:25.1098', '2014-03-06 10:01:02',
320
+ '2014-03-06 10:01:20.0000']}
321
+ >>> my_df = pd.DataFrame(data_dict)
322
+
323
+ # Fastlaod data in non-default schema "target_db" and save erors and warnings in given tables.
324
+ >>> fastload(df=my_df, table_name='fastload_with_err_warn_tbl_stag_db',
325
+ if_exists='replace', primary_index='C_ID', schema_name='target_db',
326
+ types={'C_ID': INTEGER, 'C_timestamp': TIMESTAMP(6)},
327
+ err_tbl_name='fld_errors', warn_tbl_name='fld_warnings',
328
+ err_staging_db='stage_db')
329
+ Processed 9 rows in batch 1.
330
+ {'errors_dataframe': batch_no error_message
331
+ 0 1 [Session 14527] [Teradata Database] [Error 26...,
332
+ 'warnings_dataframe': batch_no error_message
333
+ 0 batch_summary [Session 14526] [Teradata SQL Driver] [Warnin...,
334
+ 'errors_table': 'stage_db.fld_errors',
335
+ 'warnings_table': 'stage_db.fld_warnings',
336
+ 'ERR_1_table': '',
337
+ 'ERR_2_table': ''}
338
+
339
+ # Validate loaded data table.
340
+ >>> DataFrame(in_schema("target_db", "fastload_with_err_warn_tbl_stag_db"))
341
+ C_ID C_timestamp
342
+ 303 2017-01-06 09:01:25.111110
343
+ 306 2014-01-06 09:01:25.109800
344
+ 304 2013-01-06 09:01:25.000000
345
+ 307 2014-03-06 10:01:02.000000
346
+ 305 2019-03-06 10:15:28.000000
347
+ 301 2014-01-06 09:01:25.000000
348
+ 308 2014-03-06 10:01:20.000000
349
+
350
+ # Validate error and warning tables.
351
+ >>> DataFrame(in_schema("stage_db", "fld_errors"))
352
+ batch_no error_message
353
+ 1 [Session 14527] [Teradata Database] [Error 2673] FastLoad failed to insert 1 of 9 batched rows. Batched row 3 failed to insert because of Teradata Database error 2673 in "target_db"."fastload_with_err_warn_tbl_stag_db"."C_timestamp"
354
+
355
+ >>> DataFrame(in_schema("stage_db", "fld_warnings"))
356
+ batch_no error_message
357
+ batch_summary [Session 14526] [Teradata SQL Driver] [Warning 518] Found 1 duplicate or faulty row(s) while ending FastLoad of database table "target_db"."fastload_with_err_warn_tbl_stag_db": expected a row count of 8, got a row count of 7
358
+
359
+ # Example 8: Save a Pandas Dataframe to a table in specified target database "schema_name".
360
+ # Save errors in ERR_1 and ERR_2 tables having user defined suffixes provided
361
+ # in "err_tbl_1_suffix" and "err_tbl_2_suffix".
362
+ # Source Pandas dataframe is same as Example 7.
363
+
364
+ >>> fastload(df=my_df, table_name = 'fastload_with_err_warn_tbl_stag_db',
365
+ schema_name = 'target_db', if_exists = 'append',
366
+ types={'C_ID': INTEGER, 'C_timestamp': TIMESTAMP(6)},
367
+ err_staging_db='stage_db', save_errors=True,
368
+ err_tbl_1_suffix="_user_err_1", err_tbl_2_suffix="_user_err_2")
369
+ {'errors_dataframe': Empty DataFrame
370
+ Columns: []
371
+ Index: [],
372
+ 'warnings_dataframe': batch_no error_message
373
+ 0 batch_summary [Session 14699] [Teradata SQL Driver] [Warnin...,
374
+ 'errors_table': '',
375
+ 'warnings_table': 'stage_db.td_fl_fastload_with_err_warn_tbl_stag_db_warn_1730',
376
+ 'ERR_1_table': 'stage_db.ml__fl_stag_1716272404181579_user_err_1',
377
+ 'ERR_2_table': 'stage_db.ml__fl_stag_1716272404181579_user_err_2'}
378
+
379
+ # Validate ERR_1 and ERR_2 tables.
380
+ >>> DataFrame(in_schema("stage_db", "ml__fl_stag_1716270574550744_user_err_1"))
381
+ ErrorCode ErrorFieldName DataParcel
382
+ 2673 F_C_timestamp b'12E...'
383
+
384
+ >>> DataFrame(in_schema("stage_db", "ml__fl_stag_1716270574550744_user_err_2"))
385
+ C_ID C_timestamp
239
386
 
240
387
  """
241
388
  # Deriving global connection using get_connection()
@@ -253,7 +400,10 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
253
400
  index=index, index_label=index_label, primary_index=primary_index,
254
401
  types=types, batch_size=batch_size,
255
402
  save_errors=save_errors, api_name='fastload',
256
- use_fastload=True, open_sessions=open_sessions)
403
+ use_fastload=True, open_sessions=open_sessions,
404
+ err_tbl_1_suffix=err_tbl_1_suffix, err_tbl_2_suffix=err_tbl_2_suffix,
405
+ err_tbl_name=err_tbl_name, warn_tbl_name=warn_tbl_name,
406
+ err_staging_db=err_staging_db)
257
407
  # Validate DataFrame & related flags; Proceed only when True
258
408
  dt_obj._validate()
259
409
 
@@ -316,7 +466,8 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
316
466
  if not table_exists or if_exists.lower() == 'replace':
317
467
  dt_obj._create_or_replace_table(con, table_exists=table_exists)
318
468
 
319
- fl_dict = _insert_from_dataframe(dt_obj, table_name, batch_size)
469
+ # Insert data to target table using fastload.
470
+ fl_dict = _insert_from_pd_dataframe_with_fastload(dt_obj, table_name, batch_size)
320
471
 
321
472
  # Check column compatibility for insertion when table exists and if_exists = 'append'
322
473
  if table_exists and if_exists.lower() == 'append':
@@ -332,28 +483,37 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
332
483
  try:
333
484
  # Create staging table and use FastLoad to load data.
334
485
  # Then copy all the rows from staging table to target table using insert_into sql.
335
- stag_table_name = UtilFuncs._generate_temp_table_name(prefix="fl_stag",
486
+ # If err_staging_db is not provided, create staging table
487
+ # object in default connected DB.
488
+ if err_staging_db is None:
489
+ err_staging_db = _get_current_databasename()
490
+ stag_table_name = UtilFuncs._generate_temp_table_name(databasename=err_staging_db,
491
+ prefix="fl_stag",
336
492
  gc_on_quit=False,
337
493
  quote=False,
338
494
  table_type=TeradataConstants.TERADATA_TABLE)
339
495
 
340
- # Get the table name without schema name for further steps
341
- stag_table_name = stag_table_name.split('.')[-1].replace('"', '')
342
- # Create staging table object
343
- dt_obj._create_table(con, table_name=stag_table_name)
496
+ # Get the table name without schema name for further steps.
497
+ stag_table_name = UtilFuncs._extract_table_name(stag_table_name)
498
+ # Create staging table object.
499
+ dt_obj._create_table(con, table_name=stag_table_name,
500
+ schema_name=err_staging_db)
344
501
 
345
- # Insert data to staging table using faslload
346
- fl_dict = _insert_from_dataframe(dt_obj, stag_table_name, batch_size)
502
+ # Insert data to staging table using fastload.
503
+ fl_dict = _insert_from_pd_dataframe_with_fastload(dt_obj, stag_table_name, batch_size, err_staging_db)
347
504
 
348
- # Insert data from staging table to target data.
505
+ # Insert data from staging table to target table.
349
506
  df_utils._insert_all_from_table(table_name,
350
- dt_obj._get_fully_qualified_table_name(table_name=stag_table_name),
351
- cols[0], schema_name)
507
+ stag_table_name,
508
+ cols[0],
509
+ schema_name,
510
+ err_staging_db)
352
511
  except:
353
512
  raise
354
513
  finally:
514
+ # Drop the staging table.
355
515
  if stag_table_name:
356
- UtilFuncs._drop_table(dt_obj._get_fully_qualified_table_name(stag_table_name))
516
+ UtilFuncs._drop_table(dt_obj._get_fully_qualified_table_name(stag_table_name, err_staging_db))
357
517
 
358
518
  except (TeradataMlException, ValueError, TypeError):
359
519
  raise
@@ -363,10 +523,10 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
363
523
  return fl_dict
364
524
 
365
525
 
366
- def _insert_from_dataframe(dt_obj, table_name, batch_size):
526
+ def _insert_from_pd_dataframe_with_fastload(dt_obj, table_name, batch_size, to_schema_name=None):
367
527
  """
368
- This is an internal function used to to sequentially extract column info from DataFrame,
369
- iterate rows, and insert rows manually. Used for Insertions to Tables with Pandas index.
528
+ This is an internal function used to sequentially extract column info from pandas DataFrame,
529
+ iterate rows, and insert rows manually. Used for insertions to Tables with Pandas index.
370
530
  This uses DBAPI's escape functions for Fastload which is a batch insertion method.
371
531
 
372
532
  PARAMETERS:
@@ -382,6 +542,10 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
382
542
  Specifies the number of rows to be inserted in a batch.
383
543
  Types: Int
384
544
 
545
+ to_schema_name:
546
+ Optional Argument.
547
+ Specifies name of the database schema where target table needs to be created.
548
+
385
549
  RETURNS:
386
550
  dict
387
551
 
@@ -389,7 +553,7 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
389
553
  Exception
390
554
 
391
555
  EXAMPLES:
392
- _insert_from_dataframe(dt_obj, table_name, batch_size=100)
556
+ _insert_from_pd_dataframe_with_fastload(dt_obj, table_name, batch_size=100)
393
557
  """
394
558
  conn = get_connection().connection
395
559
  # Create a cursor from connection object
@@ -430,9 +594,9 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
430
594
  # FastLoad. Returns an empty string if the request is not a FastLoad.
431
595
 
432
596
  # Quoted, schema-qualified table name.
433
- table = dt_obj._get_fully_qualified_table_name(table_name)
597
+ table = dt_obj._get_fully_qualified_table_name(table_name, to_schema_name)
434
598
 
435
- # Form the INSERT query for fastlod.
599
+ # Form the INSERT query for fastload.
436
600
  ins = dt_obj._form_insert_query(table)
437
601
 
438
602
  # Turn off autocommit before the Fastload insertion
@@ -440,20 +604,20 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
440
604
  DriverEscapeFunctions.AUTOCOMMIT_OFF)
441
605
 
442
606
  # Initialize dict template for saving error/warning information
443
- err_dict = {key:[] for key in ['batch_no', 'error_message']}
444
- warn_dict = {key:[] for key in ['batch_no', 'error_message']}
607
+ err_dict = {key: [] for key in ['batch_no', 'error_message']}
608
+ warn_dict = {key: [] for key in ['batch_no', 'error_message']}
445
609
 
446
610
  batch_number = 1
447
611
  num_batches = int(dt_obj.df.shape[0]/batch_size)
448
612
 
449
-
450
613
  # Empty queryband buffer before SQL call.
451
614
  UtilFuncs._set_queryband()
615
+
452
616
  for i in range(0, dt_obj.df.shape[0], batch_size):
453
617
  # Add the remaining rows to last batch after second last batch
454
618
  if (batch_number == num_batches) :
455
619
  last_elem = dt_obj.df.shape[0]
456
- else :
620
+ else:
457
621
  last_elem = i + batch_size
458
622
 
459
623
  pdf = dt_obj.df.iloc[i:last_elem]
@@ -466,10 +630,10 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
466
630
  if dt_obj.index is True:
467
631
  insert_list2.extend(row[0]) if is_multi_index else insert_list2.append(row[0])
468
632
  insert_list.append(insert_list2)
469
- # Execute insert statement
470
- cur.execute (ins, insert_list)
633
+ # Execute insert statement.
634
+ cur.execute(ins, insert_list)
471
635
 
472
- # Get error and warning information
636
+ # Get error and warning information from cursor.
473
637
  err, _ = dt_obj._process_fastexport_errors_warnings(ins)
474
638
  if len(err) != 0:
475
639
  err_dict['batch_no'].extend([batch_number] * len(err))
@@ -487,7 +651,6 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
487
651
  logon_seq_number = dt_obj._process_escape_functions(cur, escape_function= \
488
652
  DriverEscapeFunctions.LOGON_SEQ_NUM,
489
653
  insert_query=ins)
490
-
491
654
  # Commit the rows
492
655
  conn.commit()
493
656
 
@@ -497,19 +660,38 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
497
660
  if len(warn) != 0:
498
661
  warn_dict['batch_no'].extend(['batch_summary'] * len(warn))
499
662
  warn_dict['error_message'].extend(warn)
500
-
501
- # Get error and warning informations for error and warning tables, persist
663
+
664
+ # Get error and warning information for error and warning tables, persist
502
665
  # error and warning tables to Vantage if user has specified save_error as True
503
666
  # else show it as pandas dataframe on console.
504
667
  pd_err_df = dt_obj._get_pandas_df_from_errors_warnings(err_dict)
505
- if not pd_err_df.empty:
506
- msg_type = "err"
507
- error_tablename = dt_obj._create_error_warnings_table(pd_err_df, msg_type, logon_seq_number[0][0])
508
-
509
668
  pd_warn_df = dt_obj._get_pandas_df_from_errors_warnings(warn_dict)
510
- if not pd_warn_df.empty:
511
- msg_type = "warn"
512
- warn_tablename = dt_obj._create_error_warnings_table(pd_warn_df, msg_type, logon_seq_number[0][0])
669
+
670
+ # Create persistent tables using pandas df if
671
+ # save_errors=True or
672
+ # tables names for errors or warning are provided by user.
673
+ if dt_obj.save_errors or dt_obj.err_tbl_name:
674
+ if not pd_err_df.empty:
675
+ error_tablename = dt_obj._create_error_warnings_table(pd_err_df, "err", logon_seq_number[0][0],
676
+ dt_obj.err_tbl_name)
677
+ if dt_obj.save_errors or dt_obj.warn_tbl_name:
678
+ if not pd_warn_df.empty:
679
+ warn_tablename = dt_obj._create_error_warnings_table(pd_warn_df, "warn", logon_seq_number[0][0],
680
+ dt_obj.warn_tbl_name)
681
+
682
+ # Generate ERR_1 and ERR_2 table names if save_errors=True and
683
+ # errors are not stored in user provided error table name.
684
+ if dt_obj.save_errors and not dt_obj.err_tbl_name:
685
+ err_1_table = "{}.{}{}".format(dt_obj.err_staging_db if dt_obj.err_staging_db else _get_current_databasename(),
686
+ table_name,
687
+ dt_obj.err_tbl_1_suffix if dt_obj.err_tbl_1_suffix else "_ERR_1")
688
+ err_2_table = "{}.{}{}".format(dt_obj.err_staging_db if dt_obj.err_staging_db else _get_current_databasename(),
689
+ table_name,
690
+ dt_obj.err_tbl_2_suffix if dt_obj.err_tbl_2_suffix else "_ERR_2")
691
+
692
+ else:
693
+ err_1_table = ""
694
+ err_2_table = ""
513
695
 
514
696
  except Exception:
515
697
  conn.rollback()
@@ -520,7 +702,8 @@ def _insert_from_dataframe(dt_obj, table_name, batch_size):
520
702
  cur.close()
521
703
 
522
704
  return {"errors_dataframe": pd_err_df, "warnings_dataframe": pd_warn_df,
523
- "errors_table": error_tablename, "warnings_table": warn_tablename}
705
+ "errors_table": error_tablename, "warnings_table": warn_tablename,
706
+ "ERR_1_table": err_1_table, "ERR_2_table": err_2_table}
524
707
 
525
708
 
526
709
  def _get_batchsize(df):