teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +112 -0
- teradataml/__init__.py +6 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +224 -16
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +1 -0
- teradataml/analytics/json_parser/utils.py +6 -4
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +10 -2
- teradataml/analytics/table_operator/__init__.py +3 -2
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +62 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1502 -323
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +245 -306
- teradataml/automl/data_transformation.py +32 -12
- teradataml/automl/feature_engineering.py +313 -82
- teradataml/automl/model_evaluation.py +44 -35
- teradataml/automl/model_training.py +109 -146
- teradataml/catalog/byom.py +8 -8
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/constants.py +37 -0
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +151 -120
- teradataml/common/messagecodes.py +4 -1
- teradataml/common/messages.py +2 -1
- teradataml/common/sqlbundle.py +1 -1
- teradataml/common/utils.py +97 -11
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +72 -2
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/dataframe_example.json +10 -0
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scripts/deploy_script.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
- teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
- teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/templates/open_source_ml.json +2 -1
- teradataml/data/teradataml_example.json +20 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/dataframe/copy_to.py +1 -1
- teradataml/dataframe/data_transfer.py +5 -3
- teradataml/dataframe/dataframe.py +474 -41
- teradataml/dataframe/fastload.py +3 -3
- teradataml/dataframe/functions.py +339 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +658 -20
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +322 -16
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
- teradataml/options/__init__.py +3 -1
- teradataml/options/configure.py +14 -2
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +10 -6
- teradataml/scriptmgmt/lls_utils.py +3 -2
- teradataml/table_operators/Script.py +2 -2
- teradataml/table_operators/TableOperator.py +106 -20
- teradataml/table_operators/table_operator_util.py +88 -41
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/validators.py +1 -1
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
|
@@ -42,6 +42,7 @@ from teradataml.dataframe.indexer import _LocationIndexer
|
|
|
42
42
|
from teradataml.common.aed_utils import AedUtils
|
|
43
43
|
from teradataml.options.display import display
|
|
44
44
|
from teradataml.dataframe.copy_to import copy_to_sql
|
|
45
|
+
from teradataml.dataframe.row import _Row
|
|
45
46
|
from teradataml.dataframe.setop import concat
|
|
46
47
|
from teradataml.plot.plot import _Plot
|
|
47
48
|
from teradataml.scriptmgmt.UserEnv import UserEnv
|
|
@@ -53,7 +54,9 @@ from teradatasql import OperationalError
|
|
|
53
54
|
from teradataml.dataframe.window import Window
|
|
54
55
|
from teradataml.dataframe.data_transfer import _DataTransferUtils
|
|
55
56
|
from teradataml.common.bulk_exposed_utils import _validate_unimplemented_function
|
|
56
|
-
from
|
|
57
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
58
|
+
from teradataml.options.configure import configure
|
|
59
|
+
from teradataml.utils.internal_buffer import _InternalBuffer
|
|
57
60
|
|
|
58
61
|
# TODO use logger when available on master branch
|
|
59
62
|
# logger = teradatapylog.getLogger()
|
|
@@ -151,6 +154,11 @@ class DataFrame():
|
|
|
151
154
|
# This attribute added to add setter for columns property,
|
|
152
155
|
# it is required when setting columns from groupby
|
|
153
156
|
self._columns = None
|
|
157
|
+
# This attribute stores the internal AED query and avoid multiple
|
|
158
|
+
# calls to AED utility function aed_show_query()
|
|
159
|
+
self._aed_query = None
|
|
160
|
+
# This attribute stores the type of query stored in self._aed_query.
|
|
161
|
+
self._is_full_query = None
|
|
154
162
|
|
|
155
163
|
# Property to determine if table is an ART table or not.
|
|
156
164
|
self._is_art = None
|
|
@@ -418,6 +426,130 @@ class DataFrame():
|
|
|
418
426
|
|
|
419
427
|
return df
|
|
420
428
|
|
|
429
|
+
def create_temp_view(self, name):
|
|
430
|
+
"""
|
|
431
|
+
DESCRIPTION:
|
|
432
|
+
Creates a temporary view for session on the DataFrame.
|
|
433
|
+
|
|
434
|
+
PARAMETERS:
|
|
435
|
+
name:
|
|
436
|
+
Required Argument.
|
|
437
|
+
Specifies the name of the temporary view.
|
|
438
|
+
Type: str
|
|
439
|
+
|
|
440
|
+
RETURNS:
|
|
441
|
+
None
|
|
442
|
+
|
|
443
|
+
RAISES:
|
|
444
|
+
OperationalError (When view already exists).
|
|
445
|
+
|
|
446
|
+
EXAMPLES:
|
|
447
|
+
# Load the data to run the example.
|
|
448
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
449
|
+
>>> df = DataFrame("admissions_train")
|
|
450
|
+
>>> df
|
|
451
|
+
masters gpa stats programming admitted
|
|
452
|
+
id
|
|
453
|
+
38 yes 2.65 Advanced Beginner 1
|
|
454
|
+
7 yes 2.33 Novice Novice 1
|
|
455
|
+
26 yes 3.57 Advanced Advanced 1
|
|
456
|
+
17 no 3.83 Advanced Advanced 1
|
|
457
|
+
34 yes 3.85 Advanced Beginner 0
|
|
458
|
+
13 no 4.00 Advanced Novice 1
|
|
459
|
+
32 yes 3.46 Advanced Beginner 0
|
|
460
|
+
11 no 3.13 Advanced Advanced 1
|
|
461
|
+
15 yes 4.00 Advanced Advanced 1
|
|
462
|
+
36 no 3.00 Advanced Novice 0
|
|
463
|
+
|
|
464
|
+
# Example 1: Create view 'new_admissions'.
|
|
465
|
+
>>> df.create_temp_view("new_admissions")
|
|
466
|
+
>>> new_df = DataFrame("new_admissions")
|
|
467
|
+
>>> new_df
|
|
468
|
+
masters gpa stats programming admitted
|
|
469
|
+
id
|
|
470
|
+
38 yes 2.65 Advanced Beginner 1
|
|
471
|
+
7 yes 2.33 Novice Novice 1
|
|
472
|
+
26 yes 3.57 Advanced Advanced 1
|
|
473
|
+
17 no 3.83 Advanced Advanced 1
|
|
474
|
+
34 yes 3.85 Advanced Beginner 0
|
|
475
|
+
13 no 4.00 Advanced Novice 1
|
|
476
|
+
32 yes 3.46 Advanced Beginner 0
|
|
477
|
+
11 no 3.13 Advanced Advanced 1
|
|
478
|
+
15 yes 4.00 Advanced Advanced 1
|
|
479
|
+
36 no 3.00 Advanced Novice 0
|
|
480
|
+
"""
|
|
481
|
+
# Validating Arguments
|
|
482
|
+
arg_type_matrix = []
|
|
483
|
+
arg_type_matrix.append(["name", name, False, (str), True])
|
|
484
|
+
_Validators._validate_function_arguments(arg_type_matrix)
|
|
485
|
+
|
|
486
|
+
GarbageCollector._add_to_garbagecollector(name, TeradataConstants.TERADATA_VIEW)
|
|
487
|
+
UtilFuncs._create_view(name, self.show_query())
|
|
488
|
+
|
|
489
|
+
def materialize(self):
|
|
490
|
+
"""
|
|
491
|
+
DESCRIPTION:
|
|
492
|
+
Method to materialize teradataml DataFrame into a database object.
|
|
493
|
+
Notes:
|
|
494
|
+
* DataFrames are materialized in either view/table/volatile table,
|
|
495
|
+
which is decided and taken care by teradataml.
|
|
496
|
+
* If user wants to materialize object into specific database object
|
|
497
|
+
such as table/volatile table, use 'to_sql()' or 'copy_to_sql()' or
|
|
498
|
+
'fastload()' functions.
|
|
499
|
+
* Materialized object is garbage collected at the end of the session.
|
|
500
|
+
|
|
501
|
+
PARAMETERS:
|
|
502
|
+
None
|
|
503
|
+
|
|
504
|
+
RETURNS:
|
|
505
|
+
DataFrame
|
|
506
|
+
|
|
507
|
+
EXAMPLES:
|
|
508
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
509
|
+
>>> df = DataFrame("admissions_train")
|
|
510
|
+
>>> df
|
|
511
|
+
masters gpa stats programming admitted
|
|
512
|
+
id
|
|
513
|
+
13 no 4.00 Advanced Novice 1
|
|
514
|
+
26 yes 3.57 Advanced Advanced 1
|
|
515
|
+
5 no 3.44 Novice Novice 0
|
|
516
|
+
19 yes 1.98 Advanced Advanced 0
|
|
517
|
+
15 yes 4.00 Advanced Advanced 1
|
|
518
|
+
40 yes 3.95 Novice Beginner 0
|
|
519
|
+
7 yes 2.33 Novice Novice 1
|
|
520
|
+
22 yes 3.46 Novice Beginner 0
|
|
521
|
+
36 no 3.00 Advanced Novice 0
|
|
522
|
+
38 yes 2.65 Advanced Beginner 1
|
|
523
|
+
|
|
524
|
+
# Example 1: Perform operations on teradataml DataFrame
|
|
525
|
+
# and materializeit in a database object.
|
|
526
|
+
>>> df2 = df.get([["id", "masters", "gpa"]])
|
|
527
|
+
|
|
528
|
+
# Initially table_name will be None.
|
|
529
|
+
>>> df2._table_name
|
|
530
|
+
|
|
531
|
+
>>> df2.materialize()
|
|
532
|
+
masters gpa
|
|
533
|
+
id
|
|
534
|
+
15 yes 4.00
|
|
535
|
+
7 yes 2.33
|
|
536
|
+
22 yes 3.46
|
|
537
|
+
17 no 3.83
|
|
538
|
+
13 no 4.00
|
|
539
|
+
38 yes 2.65
|
|
540
|
+
26 yes 3.57
|
|
541
|
+
5 no 3.44
|
|
542
|
+
34 yes 3.85
|
|
543
|
+
40 yes 3.95
|
|
544
|
+
|
|
545
|
+
# After materialize(), view name will be assigned.
|
|
546
|
+
>>> df2._table_name
|
|
547
|
+
'"ALICE"."ml__select__172077355985236"'
|
|
548
|
+
>>>
|
|
549
|
+
"""
|
|
550
|
+
self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
|
|
551
|
+
return self
|
|
552
|
+
|
|
421
553
|
@collect_queryband(queryband="DF_fillna")
|
|
422
554
|
def fillna(self, value=None, columns=None, literal_value=False):
|
|
423
555
|
"""
|
|
@@ -5421,7 +5553,9 @@ class DataFrame():
|
|
|
5421
5553
|
result = self._check_numeric_overflow(agg_df)
|
|
5422
5554
|
"""
|
|
5423
5555
|
try:
|
|
5424
|
-
|
|
5556
|
+
# Printing the DF will actually run underlying select query and
|
|
5557
|
+
# will brought up numeric overflow if any. Only materializing won't work.
|
|
5558
|
+
print(result_df)
|
|
5425
5559
|
return False
|
|
5426
5560
|
except TeradataMlException as tme:
|
|
5427
5561
|
if "Numeric overflow occurred during computation" in str(tme):
|
|
@@ -5557,18 +5691,73 @@ class DataFrame():
|
|
|
5557
5691
|
EXAMPLES:
|
|
5558
5692
|
self.__get_data_columns()
|
|
5559
5693
|
"""
|
|
5560
|
-
|
|
5561
|
-
|
|
5562
|
-
|
|
5694
|
+
if not self._table_name:
|
|
5695
|
+
if not self._aed_query:
|
|
5696
|
+
self.__generate_aed_query()
|
|
5697
|
+
# TODO: Check the length of query and if it fails, create a view in catch block.
|
|
5698
|
+
# Address in this JIRA: https://teradata-pe.atlassian.net/browse/ELE-6922
|
|
5699
|
+
query = repr(self._metaexpr) + ' FROM ( ' + self._aed_query + ' ) as temp_table'
|
|
5700
|
+
else:
|
|
5701
|
+
query = repr(self._metaexpr) + ' FROM ' + self._table_name
|
|
5563
5702
|
|
|
5564
5703
|
if self._orderby is not None:
|
|
5565
5704
|
query += ' ORDER BY ' + self._orderby
|
|
5566
5705
|
|
|
5706
|
+
query += ';'
|
|
5567
5707
|
# Execute the query and get the results in a list.
|
|
5568
5708
|
self.__data, self.__data_columns = UtilFuncs._execute_query(query=query, fetchWarnings=True)
|
|
5569
5709
|
|
|
5570
5710
|
return self.__data, self.__data_columns
|
|
5571
5711
|
|
|
5712
|
+
def __generate_aed_query(self, full_query=False):
|
|
5713
|
+
"""
|
|
5714
|
+
DESCRIPTION:
|
|
5715
|
+
Internal function to return underlying SQL for the teradataml
|
|
5716
|
+
DataFrame. It is the same SQL that is used to view the data for
|
|
5717
|
+
a teradataml DataFrame.
|
|
5718
|
+
|
|
5719
|
+
PARAMETERS:
|
|
5720
|
+
full_query:
|
|
5721
|
+
Optional Argument.
|
|
5722
|
+
Specifies if the complete query for the dataframe should be returned.
|
|
5723
|
+
When this parameter is set to True, query for the dataframe is returned
|
|
5724
|
+
with respect to the base dataframe's table (from_table() or from_query())
|
|
5725
|
+
or from the output tables of analytical functions (if there are any in the
|
|
5726
|
+
workflow). This query may or may not be directly used to retrieve data
|
|
5727
|
+
for the dataframe upon which the function is called.
|
|
5728
|
+
When this parameter is not used, string returned is the query already used
|
|
5729
|
+
or will be used to retrieve data for the teradataml DataFrame.
|
|
5730
|
+
Default Value: False
|
|
5731
|
+
Types: bool
|
|
5732
|
+
|
|
5733
|
+
RETURNS:
|
|
5734
|
+
String representing the underlying SQL query for the teradataml DataFrame.
|
|
5735
|
+
|
|
5736
|
+
RAISES:
|
|
5737
|
+
None.
|
|
5738
|
+
|
|
5739
|
+
EXAMPLES:
|
|
5740
|
+
self.__generate_aed_query()
|
|
5741
|
+
"""
|
|
5742
|
+
# Run aed call only when _aed_query is None or
|
|
5743
|
+
# the type of current stored query (full/short) is not matching
|
|
5744
|
+
# with asked query type.
|
|
5745
|
+
if (not self._aed_query) or (not self._is_full_query == full_query):
|
|
5746
|
+
node_id = self._nodeid
|
|
5747
|
+
|
|
5748
|
+
if isinstance(self, (DataFrameGroupBy, DataFrameGroupByTime)):
|
|
5749
|
+
# If dataframe is either of type groupby or groupbytime
|
|
5750
|
+
# then get its parent dataframe nodeid and return queries
|
|
5751
|
+
# for the same
|
|
5752
|
+
node_id = self._aed_utils._aed_get_parent_nodeids(self._nodeid)[0]
|
|
5753
|
+
|
|
5754
|
+
queries = self._aed_utils._aed_show_query(node_id, query_with_reference_to_top=full_query)
|
|
5755
|
+
# Store query and type of query in class attributes to avoid future runs.
|
|
5756
|
+
self._aed_query = queries[0][0]
|
|
5757
|
+
self._is_full_query = full_query
|
|
5758
|
+
|
|
5759
|
+
return self._aed_query
|
|
5760
|
+
|
|
5572
5761
|
@collect_queryband(queryband="DF_select")
|
|
5573
5762
|
def select(self, select_expression):
|
|
5574
5763
|
"""
|
|
@@ -7108,6 +7297,97 @@ class DataFrame():
|
|
|
7108
7297
|
if function_name is None or function_name in VANTAGE_FUNCTION_ARGTYPE_DEPENDENT_MAPPER:
|
|
7109
7298
|
self.__execute_node_and_set_table_name(self._nodeid)
|
|
7110
7299
|
return True
|
|
7300
|
+
|
|
7301
|
+
def _assign_udf(self, udf_expr):
|
|
7302
|
+
"""
|
|
7303
|
+
DESCRIPTION:
|
|
7304
|
+
Internal function for DataFrame.assign() to execute the udf using
|
|
7305
|
+
Script Table Operator and create new column for teradataml DataFrame.
|
|
7306
|
+
|
|
7307
|
+
PARAMETER:
|
|
7308
|
+
udf_expr:
|
|
7309
|
+
Required Argument.
|
|
7310
|
+
Specifies a dictionary of column name to UDF expressions.
|
|
7311
|
+
Types: dict
|
|
7312
|
+
|
|
7313
|
+
RETURNS:
|
|
7314
|
+
teradataml DataFrame
|
|
7315
|
+
|
|
7316
|
+
RAISES:
|
|
7317
|
+
None.
|
|
7318
|
+
|
|
7319
|
+
EXAMPLES:
|
|
7320
|
+
self._assign_udf(udf_expr)
|
|
7321
|
+
"""
|
|
7322
|
+
|
|
7323
|
+
df = self
|
|
7324
|
+
env_name = None
|
|
7325
|
+
# Create a dictionary of env_name to list of output columns to be run on that env.
|
|
7326
|
+
env_mapper = OrderedDict()
|
|
7327
|
+
|
|
7328
|
+
exec_mode = 'REMOTE' if UtilFuncs._is_lake() else 'IN-DB'
|
|
7329
|
+
if exec_mode == 'REMOTE':
|
|
7330
|
+
if _InternalBuffer.get("auth_token") is None:
|
|
7331
|
+
raise TeradataMlException(Messages.get_message(
|
|
7332
|
+
MessageCodes.FUNC_EXECUTION_FAILED, "'udf'", 'Authentication token is required to run udf. Set token using set_auth_token().'),
|
|
7333
|
+
MessageCodes.FUNC_EXECUTION_FAILED)
|
|
7334
|
+
else:
|
|
7335
|
+
for colname, col in udf_expr.items():
|
|
7336
|
+
env_name = UtilFuncs._get_env_name(col)
|
|
7337
|
+
# Store the env_name and its corresponding output column
|
|
7338
|
+
if env_name in env_mapper:
|
|
7339
|
+
env_mapper[env_name].append(colname)
|
|
7340
|
+
else:
|
|
7341
|
+
env_mapper[env_name] = [colname]
|
|
7342
|
+
else:
|
|
7343
|
+
env_mapper[env_name] = udf_expr.keys()
|
|
7344
|
+
|
|
7345
|
+
for env_name, cols in env_mapper.items():
|
|
7346
|
+
# Create a dictionary of output columns to column type.
|
|
7347
|
+
returns = OrderedDict([(column.name, column.type) for column in df._metaexpr.c])
|
|
7348
|
+
# Store the udf functions
|
|
7349
|
+
user_function = []
|
|
7350
|
+
# Create a dictionary of output column name to udf name
|
|
7351
|
+
columns_definitions = {}
|
|
7352
|
+
# Create a dictionary of output column name to udf arguments
|
|
7353
|
+
function_args = {}
|
|
7354
|
+
for colname, col in udf_expr.items():
|
|
7355
|
+
delimiter = col._delimiter
|
|
7356
|
+
quotechar = col._quotechar
|
|
7357
|
+
if colname in cols:
|
|
7358
|
+
user_function.append(col._udf)
|
|
7359
|
+
function_args[colname] = col._udf_args if col._udf_args else ()
|
|
7360
|
+
returns[colname] = col.type
|
|
7361
|
+
columns_definitions[colname] = col._udf.__name__
|
|
7362
|
+
|
|
7363
|
+
tbl_operators = _TableOperatorUtils([],
|
|
7364
|
+
df,
|
|
7365
|
+
"udf",
|
|
7366
|
+
user_function,
|
|
7367
|
+
exec_mode,
|
|
7368
|
+
chunk_size=None,
|
|
7369
|
+
returns=returns,
|
|
7370
|
+
delimiter=delimiter,
|
|
7371
|
+
quotechar=quotechar,
|
|
7372
|
+
num_rows=1,
|
|
7373
|
+
auth=None,
|
|
7374
|
+
data_partition_column=None,
|
|
7375
|
+
data_hash_column=None,
|
|
7376
|
+
data_order_column=None,
|
|
7377
|
+
is_local_order=None,
|
|
7378
|
+
nulls_first=None,
|
|
7379
|
+
sort_ascending=None,
|
|
7380
|
+
charset=None,
|
|
7381
|
+
env_name = env_name,
|
|
7382
|
+
style = "csv",
|
|
7383
|
+
function_args=function_args,
|
|
7384
|
+
columns_definitions=columns_definitions,
|
|
7385
|
+
output_type_converters={
|
|
7386
|
+
col_name: _Dtypes._teradata_type_to_python_type(col_type)
|
|
7387
|
+
for col_name, col_type in returns.items()})
|
|
7388
|
+
|
|
7389
|
+
df = tbl_operators.execute()
|
|
7390
|
+
return df
|
|
7111
7391
|
|
|
7112
7392
|
@collect_queryband(queryband="DF_assign")
|
|
7113
7393
|
def assign(self, drop_columns=False, **kwargs):
|
|
@@ -7119,10 +7399,12 @@ class DataFrame():
|
|
|
7119
7399
|
drop_columns:
|
|
7120
7400
|
Optional Argument.
|
|
7121
7401
|
If True, drop columns that are not specified in assign.
|
|
7122
|
-
|
|
7123
|
-
When DataFrame.assign() is run on DataFrame.groupby(), this argument
|
|
7124
|
-
|
|
7125
|
-
|
|
7402
|
+
Notes:
|
|
7403
|
+
1. When DataFrame.assign() is run on DataFrame.groupby(), this argument
|
|
7404
|
+
is ignored. In such cases, all columns are dropped and only new columns
|
|
7405
|
+
and grouping columns are returned.
|
|
7406
|
+
2. Argument is ignored for UDF functions.
|
|
7407
|
+
|
|
7126
7408
|
Default Value: False
|
|
7127
7409
|
Types: bool
|
|
7128
7410
|
|
|
@@ -7138,6 +7420,7 @@ class DataFrame():
|
|
|
7138
7420
|
* SQLAlchemy ClauseElements.
|
|
7139
7421
|
(See teradataml extension with SQLAlchemy in teradataml User Guide
|
|
7140
7422
|
and Function reference guide for more details)
|
|
7423
|
+
* Function - udf.
|
|
7141
7424
|
|
|
7142
7425
|
|
|
7143
7426
|
RETURNS:
|
|
@@ -7163,6 +7446,16 @@ class DataFrame():
|
|
|
7163
7446
|
used, but the column used in such function must be a part of group by columns.
|
|
7164
7447
|
See examples for teradataml extension with SQLAlchemy on using various
|
|
7165
7448
|
functions with DataFrame.assign().
|
|
7449
|
+
6. UDF expressions can run on both Vantage Cloud Lake leveraging Apply Table Operator
|
|
7450
|
+
of Open Analytics Framework and Enterprise leveraging Vantage's Script Table Operator.
|
|
7451
|
+
7. One can pass both regular expressions and udf expressions to this API.
|
|
7452
|
+
However, regular expressions are computed first followed by udf expressions.
|
|
7453
|
+
Hence the order of columns also maintained in same order.
|
|
7454
|
+
Look at Example 18 to understand more.
|
|
7455
|
+
8. While passing multiple udf expressions, one can not pass one column output
|
|
7456
|
+
as another column input in the same ``assign`` call.
|
|
7457
|
+
9. If user pass multiple udf expressions, delimiter and quotechar specified in
|
|
7458
|
+
last udf expression are considered for processing.
|
|
7166
7459
|
|
|
7167
7460
|
RAISES:
|
|
7168
7461
|
1. ValueError - When a callable is passed as a value, or columns from different
|
|
@@ -7424,6 +7717,134 @@ class DataFrame():
|
|
|
7424
7717
|
1 Advanced 2.886226 3.508750 84.21
|
|
7425
7718
|
2 Novice 6.377775 3.559091 39.15
|
|
7426
7719
|
>>>
|
|
7720
|
+
|
|
7721
|
+
#
|
|
7722
|
+
# Executing user defined function (UDF) with assign()
|
|
7723
|
+
#
|
|
7724
|
+
# Example 15: Create two user defined functions to 'to_upper' and 'sum',
|
|
7725
|
+
# 'to_upper' to get the values in 'accounts' to upper case and
|
|
7726
|
+
# 'sum' to add length of string values in column 'accounts'
|
|
7727
|
+
# with column 'Feb' and store the result in Integer type column.
|
|
7728
|
+
>>> @udf
|
|
7729
|
+
... def to_upper(s):
|
|
7730
|
+
... if s is not None:
|
|
7731
|
+
... return s.upper()
|
|
7732
|
+
>>>
|
|
7733
|
+
>>> from teradatasqlalchemy.types import INTEGER
|
|
7734
|
+
>>> @udf(returns=INTEGER())
|
|
7735
|
+
... def sum(x, y):
|
|
7736
|
+
... return len(x)+y
|
|
7737
|
+
>>>
|
|
7738
|
+
# Assign both Column Expressions returned by user defined functions
|
|
7739
|
+
# to the DataFrame.
|
|
7740
|
+
>>> res = df.assign(upper_stats = to_upper('accounts'), len_sum = sum('accounts', 'Feb'))
|
|
7741
|
+
>>> res
|
|
7742
|
+
Feb Jan Mar Apr datetime upper_stats len_sum
|
|
7743
|
+
accounts
|
|
7744
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC 98
|
|
7745
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC 207
|
|
7746
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC 100
|
|
7747
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC 209
|
|
7748
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC 220
|
|
7749
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO 218
|
|
7750
|
+
>>>
|
|
7751
|
+
|
|
7752
|
+
# Example 16: Create a user defined function to add 4 to the 'datetime' column
|
|
7753
|
+
# and store the result in DATE type column.
|
|
7754
|
+
>>> from teradatasqlalchemy.types import DATE
|
|
7755
|
+
>>> import datetime
|
|
7756
|
+
>>> @udf(returns=DATE())
|
|
7757
|
+
... def add_date(x, y):
|
|
7758
|
+
... return (datetime.datetime.strptime(x, "%y/%m/%d")+datetime.timedelta(y)).strftime("%y/%m/%d")
|
|
7759
|
+
>>>
|
|
7760
|
+
# Assign the Column Expression returned by user defined function
|
|
7761
|
+
# to the DataFrame.
|
|
7762
|
+
>>> res = df.assign(new_date = add_date('datetime', 4))
|
|
7763
|
+
>>> res
|
|
7764
|
+
Feb Jan Mar Apr datetime new_date
|
|
7765
|
+
accounts
|
|
7766
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 17/01/08
|
|
7767
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 17/01/08
|
|
7768
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 17/01/08
|
|
7769
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 17/01/08
|
|
7770
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 17/01/08
|
|
7771
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 17/01/08
|
|
7772
|
+
>>>
|
|
7773
|
+
|
|
7774
|
+
# Example 17: Create a user defined functions to 'to_upper' to get
|
|
7775
|
+
# the values in 'accounts' to upper case and create a
|
|
7776
|
+
# new column with a string literal value.
|
|
7777
|
+
>>> @udf
|
|
7778
|
+
... def to_upper(s):
|
|
7779
|
+
... if s is not None:
|
|
7780
|
+
... return s.upper()
|
|
7781
|
+
>>>
|
|
7782
|
+
# Assign both expressions to the DataFrame.
|
|
7783
|
+
>>> res = df.assign(upper_stats = to_upper('accounts'), new_col = 'string')
|
|
7784
|
+
>>> res
|
|
7785
|
+
Feb Jan Mar Apr datetime new_col upper_stats
|
|
7786
|
+
accounts
|
|
7787
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 string ALPHA CO
|
|
7788
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 string BLUE INC
|
|
7789
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 string YELLOW INC
|
|
7790
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 string JONES LLC
|
|
7791
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 string RED INC
|
|
7792
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 string ORANGE INC
|
|
7793
|
+
>>>
|
|
7794
|
+
|
|
7795
|
+
# Example 18: Create two user defined functions to 'to_upper' and 'sum'
|
|
7796
|
+
# and create new columns with string literal value and
|
|
7797
|
+
# arithmetic operation on column 'Feb'.
|
|
7798
|
+
>>> @udf
|
|
7799
|
+
... def to_upper(s):
|
|
7800
|
+
... if s is not None:
|
|
7801
|
+
... return s.upper()
|
|
7802
|
+
>>>
|
|
7803
|
+
>>> from teradatasqlalchemy.types import INTEGER
|
|
7804
|
+
>>> @udf(returns=INTEGER())
|
|
7805
|
+
... def sum(x, y):
|
|
7806
|
+
... return len(x)+y
|
|
7807
|
+
>>>
|
|
7808
|
+
# Assign all expressions to the DataFrame.
|
|
7809
|
+
>>> res = df.assign(upper_stats = to_upper('accounts'),new_col = 'abc',
|
|
7810
|
+
... len_sum = sum('accounts', 'Feb'), col_sum = df.Feb+1)
|
|
7811
|
+
>>> res
|
|
7812
|
+
Feb Jan Mar Apr datetime col_sum new_col upper_stats len_sum
|
|
7813
|
+
accounts
|
|
7814
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 91.0 abc BLUE INC 98
|
|
7815
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 211.0 abc ALPHA CO 218
|
|
7816
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 201.0 abc JONES LLC 209
|
|
7817
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 91.0 abc YELLOW INC 100
|
|
7818
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 211.0 abc ORANGE INC 220
|
|
7819
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 201.0 abc RED INC 207
|
|
7820
|
+
>>>
|
|
7821
|
+
|
|
7822
|
+
# Example 19: Convert the values is 'accounts' column to upper case using a user
|
|
7823
|
+
# defined function on Vantage Cloud Lake.
|
|
7824
|
+
# Create a Python 3.10.5 environment with given name and description in Vantage.
|
|
7825
|
+
>>> env = create_env('test_udf', 'python_3.10.5', 'Test environment for UDF')
|
|
7826
|
+
User environment 'test_udf' created.
|
|
7827
|
+
>>>
|
|
7828
|
+
# Create a user defined functions to 'to_upper' to get the values in upper case
|
|
7829
|
+
# and pass the user env to run it on.
|
|
7830
|
+
>>> from teradataml.dataframe.functions import udf
|
|
7831
|
+
>>> @udf(env_name = env)
|
|
7832
|
+
... def to_upper(s):
|
|
7833
|
+
... if s is not None:
|
|
7834
|
+
... return s.upper()
|
|
7835
|
+
>>>
|
|
7836
|
+
# Assign the Column Expression returned by user defined function
|
|
7837
|
+
# to the DataFrame.
|
|
7838
|
+
>>> df.assign(upper_stats = to_upper('accounts'))
|
|
7839
|
+
Feb Jan Mar Apr datetime upper_stats
|
|
7840
|
+
accounts
|
|
7841
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO
|
|
7842
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC
|
|
7843
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC
|
|
7844
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC
|
|
7845
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC
|
|
7846
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC
|
|
7847
|
+
>>>
|
|
7427
7848
|
"""
|
|
7428
7849
|
# Argument validations
|
|
7429
7850
|
awu_matrix = []
|
|
@@ -7469,13 +7890,35 @@ class DataFrame():
|
|
|
7469
7890
|
msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
|
|
7470
7891
|
raise TeradataMlException(msg, MessageCodes.TDMLDF_INFO_ERROR)
|
|
7471
7892
|
|
|
7472
|
-
|
|
7473
|
-
|
|
7474
|
-
|
|
7475
|
-
|
|
7476
|
-
|
|
7477
|
-
|
|
7478
|
-
|
|
7893
|
+
# Create a dictionary of column name to udf expressions and
|
|
7894
|
+
# column name to normal/regular expressions.
|
|
7895
|
+
udf_expr = {}
|
|
7896
|
+
regular_expr = {}
|
|
7897
|
+
for colname, col in kwargs.items():
|
|
7898
|
+
# If value passed in kwargs is a ColumnExpression and is a udf, store it.
|
|
7899
|
+
if isinstance(col, ColumnExpression) and col._udf:
|
|
7900
|
+
udf_expr[colname] = col
|
|
7901
|
+
else:
|
|
7902
|
+
regular_expr[colname] = col
|
|
7903
|
+
df = self
|
|
7904
|
+
|
|
7905
|
+
# If kwargs contains both regular and udf expressions, first create new columns
|
|
7906
|
+
# from normal/regular expressions then on the output dataframe create new columns
|
|
7907
|
+
# from udf expression.
|
|
7908
|
+
if bool(regular_expr):
|
|
7909
|
+
try:
|
|
7910
|
+
(new_meta, new_nodeid) = df._generate_assign_metaexpr_aed_nodeid(drop_columns, **regular_expr)
|
|
7911
|
+
df = df._create_dataframe_from_node(new_nodeid, new_meta, df._index_label)
|
|
7912
|
+
except Exception as err:
|
|
7913
|
+
errcode = MessageCodes.TDMLDF_INFO_ERROR
|
|
7914
|
+
msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
|
|
7915
|
+
raise TeradataMlException(msg, errcode) from err
|
|
7916
|
+
|
|
7917
|
+
if bool(udf_expr):
|
|
7918
|
+
df = df._assign_udf(udf_expr)
|
|
7919
|
+
|
|
7920
|
+
return df
|
|
7921
|
+
|
|
7479
7922
|
|
|
7480
7923
|
@collect_queryband(queryband="DF_get")
|
|
7481
7924
|
def get(self, key):
|
|
@@ -10107,7 +10550,8 @@ class DataFrame():
|
|
|
10107
10550
|
test_size=list_of_fracs[1],
|
|
10108
10551
|
stratify_column=stratify_column,
|
|
10109
10552
|
seed=seed,
|
|
10110
|
-
persist=True
|
|
10553
|
+
persist=True,
|
|
10554
|
+
display_table_name=False)
|
|
10111
10555
|
|
|
10112
10556
|
# Retrieve the table name from TrainTestSplit_out object.
|
|
10113
10557
|
table_name = TrainTestSplit_out.result._table_name
|
|
@@ -10218,10 +10662,10 @@ class DataFrame():
|
|
|
10218
10662
|
|
|
10219
10663
|
# Make this non-lazy. Added this in order to fix https://teradata-pe.atlassian.net/browse/ELE-6368
|
|
10220
10664
|
# Cannot use __execute_node_and_set_table_name because self points to original df.
|
|
10221
|
-
# Hence, setting the
|
|
10665
|
+
# Hence, setting the _table_name with _execute_node_return_db_object_name.
|
|
10222
10666
|
|
|
10223
10667
|
df = self._create_dataframe_from_node(sample_node_id, new_metaexpr, self._index_label)
|
|
10224
|
-
df.
|
|
10668
|
+
df._table_name = df_utils._execute_node_return_db_object_name(sample_node_id, new_metaexpr)
|
|
10225
10669
|
|
|
10226
10670
|
return df
|
|
10227
10671
|
|
|
@@ -10352,26 +10796,14 @@ class DataFrame():
|
|
|
10352
10796
|
where admitted > 0) as temp_table SAMPLE 0.9'
|
|
10353
10797
|
|
|
10354
10798
|
"""
|
|
10799
|
+
# Argument validations
|
|
10800
|
+
awu_matrix = []
|
|
10801
|
+
awu_matrix.append(["full_query", full_query, False, (bool)])
|
|
10802
|
+
# Validate argument types
|
|
10803
|
+
_Validators._validate_function_arguments(awu_matrix)
|
|
10355
10804
|
|
|
10356
10805
|
try:
|
|
10357
|
-
|
|
10358
|
-
awu_matrix = []
|
|
10359
|
-
awu_matrix.append(["full_query", full_query, False, (bool)])
|
|
10360
|
-
# Validate argument types
|
|
10361
|
-
_Validators._validate_function_arguments(awu_matrix)
|
|
10362
|
-
|
|
10363
|
-
node_id = self._nodeid
|
|
10364
|
-
|
|
10365
|
-
if isinstance(self, (DataFrameGroupBy, DataFrameGroupByTime)):
|
|
10366
|
-
# If dataframe is either of type groupby or groupbytime
|
|
10367
|
-
# then get it's parent dataframe nodeid and return queries
|
|
10368
|
-
# for the same
|
|
10369
|
-
node_id = self._aed_utils._aed_get_parent_nodeids(self._nodeid)[0]
|
|
10370
|
-
|
|
10371
|
-
queries = self._aed_utils._aed_show_query(node_id, query_with_reference_to_top=full_query)
|
|
10372
|
-
|
|
10373
|
-
return queries[0][0]
|
|
10374
|
-
|
|
10806
|
+
return self.__generate_aed_query(full_query)
|
|
10375
10807
|
except TeradataMlException:
|
|
10376
10808
|
raise
|
|
10377
10809
|
|
|
@@ -10381,7 +10813,7 @@ class DataFrame():
|
|
|
10381
10813
|
except Exception as err:
|
|
10382
10814
|
errcode = MessageCodes.TDMLDF_INFO_ERROR
|
|
10383
10815
|
msg = Messages.get_message(errcode)
|
|
10384
|
-
raise TeradataMlException(msg, errcode) from err
|
|
10816
|
+
raise TeradataMlException(msg, errcode) from err
|
|
10385
10817
|
|
|
10386
10818
|
@collect_queryband(queryband="DF_mapRow")
|
|
10387
10819
|
def map_row(self,
|
|
@@ -13840,7 +14272,7 @@ class DataFrame():
|
|
|
13840
14272
|
Types: int OR NoneType
|
|
13841
14273
|
|
|
13842
14274
|
RETURNS:
|
|
13843
|
-
iterator, an object to iterate over
|
|
14275
|
+
iterator, an object to iterate over row in the DataFrame.
|
|
13844
14276
|
|
|
13845
14277
|
RAISES:
|
|
13846
14278
|
None
|
|
@@ -13889,9 +14321,10 @@ class DataFrame():
|
|
|
13889
14321
|
cur = execute_sql(query)
|
|
13890
14322
|
|
|
13891
14323
|
if name:
|
|
14324
|
+
columns = [column[0] for column in cur.description]
|
|
13892
14325
|
for rec in cur:
|
|
13893
|
-
|
|
13894
|
-
yield
|
|
14326
|
+
row = _Row(columns=columns, values=rec)
|
|
14327
|
+
yield row
|
|
13895
14328
|
else:
|
|
13896
14329
|
for rec in cur:
|
|
13897
14330
|
yield rec
|
teradataml/dataframe/fastload.py
CHANGED
|
@@ -30,7 +30,7 @@ from teradataml.dataframe.copy_to import copy_to_sql, \
|
|
|
30
30
|
_create_pti_table_object, _extract_column_info, \
|
|
31
31
|
_check_columns_insertion_compatible
|
|
32
32
|
from teradataml.dataframe.data_transfer import _DataTransferUtils
|
|
33
|
-
from
|
|
33
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
@collect_queryband(queryband="fstLd")
|
|
@@ -348,11 +348,11 @@ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
|
|
|
348
348
|
308 2014-03-06 10:01:20.000000
|
|
349
349
|
|
|
350
350
|
# Validate error and warning tables.
|
|
351
|
-
>>> DataFrame("fld_errors")
|
|
351
|
+
>>> DataFrame(in_schema("stage_db", "fld_errors"))
|
|
352
352
|
batch_no error_message
|
|
353
353
|
1 [Session 14527] [Teradata Database] [Error 2673] FastLoad failed to insert 1 of 9 batched rows. Batched row 3 failed to insert because of Teradata Database error 2673 in "target_db"."fastload_with_err_warn_tbl_stag_db"."C_timestamp"
|
|
354
354
|
|
|
355
|
-
>>> DataFrame("fld_warnings")
|
|
355
|
+
>>> DataFrame(in_schema("stage_db", "fld_warnings"))
|
|
356
356
|
batch_no error_message
|
|
357
357
|
batch_summary [Session 14526] [Teradata SQL Driver] [Warning 518] Found 1 duplicate or faulty row(s) while ending FastLoad of database table "target_db"."fastload_with_err_warn_tbl_stag_db": expected a row count of 8, got a row count of 7
|
|
358
358
|
|