teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +306 -0
- teradataml/__init__.py +10 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +299 -16
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +13 -3
- teradataml/analytics/json_parser/utils.py +13 -6
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +11 -2
- teradataml/analytics/table_operator/__init__.py +4 -3
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +66 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1502 -323
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +247 -307
- teradataml/automl/data_transformation.py +32 -12
- teradataml/automl/feature_engineering.py +325 -86
- teradataml/automl/model_evaluation.py +44 -35
- teradataml/automl/model_training.py +122 -153
- teradataml/catalog/byom.py +8 -8
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +72 -0
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +152 -120
- teradataml/common/messagecodes.py +11 -2
- teradataml/common/messages.py +4 -1
- teradataml/common/sqlbundle.py +26 -4
- teradataml/common/utils.py +225 -14
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +82 -2
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/dataframe_example.json +27 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scripts/deploy_script.py +1 -1
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
- teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -1
- teradataml/data/teradataml_example.json +20 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/dataframe/copy_to.py +1 -1
- teradataml/dataframe/data_transfer.py +5 -3
- teradataml/dataframe/dataframe.py +1002 -201
- teradataml/dataframe/fastload.py +3 -3
- teradataml/dataframe/functions.py +867 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +840 -33
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +878 -34
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
- teradataml/options/__init__.py +9 -23
- teradataml/options/configure.py +42 -4
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +13 -9
- teradataml/scriptmgmt/lls_utils.py +77 -23
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/Script.py +2 -2
- teradataml/table_operators/TableOperator.py +106 -20
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +102 -56
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +34 -2
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
teradataml/dataframe/sql.py
CHANGED
|
@@ -40,7 +40,7 @@ import sqlalchemy as sqlalc
|
|
|
40
40
|
|
|
41
41
|
import re
|
|
42
42
|
|
|
43
|
-
from teradatasqlalchemy.dialect import dialect as td_dialect, compiler as td_compiler
|
|
43
|
+
from teradatasqlalchemy.dialect import dialect as td_dialect, compiler as td_compiler, TeradataTypeCompiler as td_type_compiler
|
|
44
44
|
from teradatasqlalchemy import (INTEGER, SMALLINT, BIGINT, BYTEINT, DECIMAL, FLOAT, NUMBER)
|
|
45
45
|
from teradatasqlalchemy import (DATE, TIME, TIMESTAMP)
|
|
46
46
|
from teradatasqlalchemy import (BYTE, VARBYTE, BLOB)
|
|
@@ -52,7 +52,7 @@ from teradatasqlalchemy import (INTERVAL_DAY, INTERVAL_DAY_TO_HOUR, INTERVAL_DAY
|
|
|
52
52
|
INTERVAL_YEAR_TO_MONTH)
|
|
53
53
|
from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP)
|
|
54
54
|
from teradatasqlalchemy import XML, GEOMETRY
|
|
55
|
-
from
|
|
55
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
56
56
|
import decimal
|
|
57
57
|
import datetime as dt
|
|
58
58
|
from teradataml.dataframe.window import Window
|
|
@@ -222,6 +222,10 @@ class _MetaExpression(object):
|
|
|
222
222
|
def __repr__(self):
|
|
223
223
|
return repr(self.__t)
|
|
224
224
|
|
|
225
|
+
def _get_table_expr(self):
|
|
226
|
+
return self.__t
|
|
227
|
+
|
|
228
|
+
|
|
225
229
|
class _PandasTableExpression(TableExpression):
|
|
226
230
|
|
|
227
231
|
def _assign(self, drop_columns, **kw):
|
|
@@ -484,6 +488,7 @@ class _SQLTableExpression(_PandasTableExpression):
|
|
|
484
488
|
columns = []
|
|
485
489
|
for c in kw['column_order']:
|
|
486
490
|
name = c.strip()
|
|
491
|
+
# Get case-insensitive column names from Table object.
|
|
487
492
|
col = table.c.get(name, table.c.get(name.lower(), table.c.get(name.upper())))
|
|
488
493
|
|
|
489
494
|
if col is None:
|
|
@@ -612,8 +617,9 @@ class _SQLTableExpression(_PandasTableExpression):
|
|
|
612
617
|
expression = display_number(c.expression)
|
|
613
618
|
elif isinstance(c.type, tuple(datetime_period_types)):
|
|
614
619
|
expression = cast_expr(c.expression, 30)
|
|
620
|
+
# Change the size as INTERVAL_DAY_TO_SECOND(4, 6) is failing.
|
|
615
621
|
elif isinstance(c.type, tuple(interval_types)):
|
|
616
|
-
expression = cast_expr(c.expression,
|
|
622
|
+
expression = cast_expr(c.expression, 25)
|
|
617
623
|
elif isinstance(c.type, GEOMETRY):
|
|
618
624
|
expression = cast_expr(c.expression, display.geometry_column_length) if \
|
|
619
625
|
display.geometry_column_length is not None else c.expression.label(c.name)
|
|
@@ -5461,14 +5467,19 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5461
5467
|
expression = literal_column(expression)
|
|
5462
5468
|
self.kw = kw
|
|
5463
5469
|
self.expression = expression
|
|
5464
|
-
self.type = kw.get("type", expression.type)
|
|
5470
|
+
self.type = kw.get("type", expression.type if expression is not None else kw.get("udf_type"))
|
|
5465
5471
|
# Initial ColumnExpression has only one dataframe and hence
|
|
5466
5472
|
# __has_multiple_dataframes = False.
|
|
5467
5473
|
# eg: df1.col1, df2.col2
|
|
5468
5474
|
self.__has_multiple_dataframes = False
|
|
5469
5475
|
self.__names = []
|
|
5470
|
-
self.
|
|
5471
|
-
|
|
5476
|
+
self._udf = kw.get("udf", None)
|
|
5477
|
+
self._udf_args = kw.get("udf_args", None)
|
|
5478
|
+
self._env_name = kw.get("env_name", None)
|
|
5479
|
+
self._delimiter = kw.get("delimiter", None)
|
|
5480
|
+
self._quotechar = kw.get("quotechar", None)
|
|
5481
|
+
self._udf_script = kw.get("udf_script", None)
|
|
5482
|
+
self.alias_name = self.compile() if (self._udf or self._udf_script) is None else None
|
|
5472
5483
|
|
|
5473
5484
|
@property
|
|
5474
5485
|
def expression(self):
|
|
@@ -5648,23 +5659,23 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5648
5659
|
"""
|
|
5649
5660
|
Calls the compile method of the underlying sqlalchemy.Column
|
|
5650
5661
|
"""
|
|
5651
|
-
|
|
5652
|
-
|
|
5653
|
-
|
|
5654
|
-
|
|
5655
|
-
|
|
5656
|
-
|
|
5657
|
-
|
|
5658
|
-
|
|
5659
|
-
|
|
5660
|
-
return str(self.expression.compile(*args, **
|
|
5662
|
+
kw_new = dict({'dialect': td_dialect(),
|
|
5663
|
+
'compile_kwargs':
|
|
5664
|
+
{
|
|
5665
|
+
'include_table': False,
|
|
5666
|
+
'literal_binds': True
|
|
5667
|
+
}
|
|
5668
|
+
})
|
|
5669
|
+
if len(kw) != 0:
|
|
5670
|
+
kw_new.update(kw)
|
|
5671
|
+
return str(self.expression.compile(*args, **kw_new))
|
|
5661
5672
|
|
|
5662
5673
|
def compile_label(self, label):
|
|
5663
5674
|
"""
|
|
5664
5675
|
DESCRIPTION:
|
|
5665
5676
|
Compiles expression with label, by calling underlying sqlalchemy methods.
|
|
5666
5677
|
|
|
5667
|
-
|
|
5678
|
+
PARAMETERS:
|
|
5668
5679
|
label:
|
|
5669
5680
|
Required Argument.
|
|
5670
5681
|
Specifies the label to be used to alias the compiled expression.
|
|
@@ -5694,7 +5705,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5694
5705
|
with the "value". Use this function either to replace or remove
|
|
5695
5706
|
NA from Column.
|
|
5696
5707
|
|
|
5697
|
-
|
|
5708
|
+
PARAMETERS:
|
|
5698
5709
|
value:
|
|
5699
5710
|
Required Argument.
|
|
5700
5711
|
Specifies the replacement value for null values in the column.
|
|
@@ -5833,7 +5844,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5833
5844
|
return _SQLColumnExpression(func.concat(*columns_))
|
|
5834
5845
|
|
|
5835
5846
|
@collect_queryband(queryband="DFC_cast")
|
|
5836
|
-
def cast(self, type_ = None):
|
|
5847
|
+
def cast(self, type_ = None, format = None, timezone = None):
|
|
5837
5848
|
"""
|
|
5838
5849
|
DESCRIPTION:
|
|
5839
5850
|
Apply the CAST SQL function to the column with the type specified.
|
|
@@ -5849,6 +5860,32 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5849
5860
|
Default value: None
|
|
5850
5861
|
Types: teradatasqlalchemy type or object of teradatasqlalchemy type
|
|
5851
5862
|
|
|
5863
|
+
format:
|
|
5864
|
+
Optional Argument.
|
|
5865
|
+
Specifies a variable length string containing formatting characters
|
|
5866
|
+
that define the display format for the data type.
|
|
5867
|
+
Formats can be specified for columns that have character, numeric, byte,
|
|
5868
|
+
DateTime, Period or UDT data types.
|
|
5869
|
+
Note:
|
|
5870
|
+
* Teradata supports different formats. Look at 'Formats' section in
|
|
5871
|
+
"SQL-Data-Types-and-Literals" in Vantage documentation for additional
|
|
5872
|
+
details.
|
|
5873
|
+
Default value: None
|
|
5874
|
+
Types: str
|
|
5875
|
+
|
|
5876
|
+
timezone:
|
|
5877
|
+
Optional Argument.
|
|
5878
|
+
Specifies the timezone string.
|
|
5879
|
+
Check "SQL-Date-and-Time-Functions-and-Expressions" in
|
|
5880
|
+
Vantage documentation for supported timezones.
|
|
5881
|
+
Type: ColumnExpression or str.
|
|
5882
|
+
|
|
5883
|
+
RETURNS:
|
|
5884
|
+
ColumnExpression
|
|
5885
|
+
|
|
5886
|
+
RAISES:
|
|
5887
|
+
TeradataMlException
|
|
5888
|
+
|
|
5852
5889
|
EXAMPLES:
|
|
5853
5890
|
>>> load_example_data("dataframe","admissions_train")
|
|
5854
5891
|
>>> df = DataFrame('admissions_train')
|
|
@@ -5873,8 +5910,24 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5873
5910
|
programming str
|
|
5874
5911
|
admitted int
|
|
5875
5912
|
|
|
5876
|
-
>>>
|
|
5877
|
-
>>>
|
|
5913
|
+
>>> dataframe_dict = {"id": [100, 200,300],
|
|
5914
|
+
>>> "timestamp_col": ['1000-01-10 23:00:12-02:00', '2015-01-08 13:00:00+12:00', '2014-12-10 10:00:35-08:00'],
|
|
5915
|
+
>>> "timezone_col": ["GMT", "America Pacific", "GMT+10"]}
|
|
5916
|
+
>>> pandas_df = pd.DataFrame(dataframe_dict)
|
|
5917
|
+
>>> copy_to_sql(pandas_df, table_name = 'new_table', if_exists = 'replace')
|
|
5918
|
+
>>> df1 = DataFrame("new_table")
|
|
5919
|
+
>>> df1
|
|
5920
|
+
id timestamp_col timezone_col
|
|
5921
|
+
300 2014-12-10 10:00:35-08:00 GMT+10
|
|
5922
|
+
200 2015-01-08 13:00:00+12:00 America Pacific
|
|
5923
|
+
100 1000-01-10 23:00:12-02:00 GMT
|
|
5924
|
+
>>> df1.dtypes
|
|
5925
|
+
id int
|
|
5926
|
+
timestamp_col str
|
|
5927
|
+
timezone_col str
|
|
5928
|
+
|
|
5929
|
+
# Example 1: Let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR(5),
|
|
5930
|
+
# an object of a teradatasqlalchemy type.
|
|
5878
5931
|
>>> from teradatasqlalchemy import VARCHAR
|
|
5879
5932
|
>>> new_df = df.assign(char_id = df.id.cast(type_=VARCHAR(5)))
|
|
5880
5933
|
>>> new_df
|
|
@@ -5899,8 +5952,8 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5899
5952
|
admitted int
|
|
5900
5953
|
char_id str
|
|
5901
5954
|
|
|
5902
|
-
|
|
5903
|
-
|
|
5955
|
+
# Example 2: Now let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR,
|
|
5956
|
+
# a teradatasqlalchemy type.
|
|
5904
5957
|
>>> new_df_2 = df.assign(char_id = df.id.cast(type_=VARCHAR))
|
|
5905
5958
|
>>> new_df_2
|
|
5906
5959
|
masters gpa stats programming admitted char_id
|
|
@@ -5924,25 +5977,65 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5924
5977
|
admitted int
|
|
5925
5978
|
char_id str
|
|
5926
5979
|
|
|
5927
|
-
|
|
5928
|
-
|
|
5980
|
+
# Example 3: Let's try filtering some data with a match on a column cast to another type,
|
|
5981
|
+
# an object of a teradatasqlalchemy type.
|
|
5929
5982
|
>>> df[df.id.cast(VARCHAR(5)) == '1']
|
|
5930
5983
|
masters gpa stats programming admitted
|
|
5931
5984
|
id
|
|
5932
5985
|
1 yes 3.95 Beginner Beginner 0
|
|
5933
5986
|
|
|
5934
|
-
|
|
5987
|
+
# Example 4: Now let's try the same, this time using a teradatasqlalchemy type.
|
|
5935
5988
|
>>> df[df.id.cast(VARCHAR) == '1']
|
|
5936
5989
|
masters gpa stats programming admitted
|
|
5937
5990
|
id
|
|
5938
5991
|
1 yes 3.95 Beginner Beginner 0
|
|
5939
5992
|
|
|
5940
|
-
|
|
5941
|
-
|
|
5993
|
+
# Example 5: Let's try creating a new DataFrame casting 'timestamp_col' column (of type VARCHAR) to TIMESTAMP,
|
|
5994
|
+
# using format.
|
|
5995
|
+
>>> new_df1 = df1.assign(new_col = df1.timestamp_col.cast(TIMESTAMP, format='Y4-MM-DDBHH:MI:SSBZ'))
|
|
5996
|
+
id timestamp_col timezone_col new_col
|
|
5997
|
+
300 2014-12-10 10:00:35-08:00 GMT+10 2014-12-10 18:00:35
|
|
5998
|
+
200 2015-01-08 13:00:00+12:00 America Pacific 2015-01-08 01:00:00
|
|
5999
|
+
100 1000-01-10 23:00:12-02:00 GMT 1000-01-11 01:00:12
|
|
6000
|
+
>>> new_df1.tdtypes
|
|
6001
|
+
id int
|
|
6002
|
+
timestamp_col str
|
|
6003
|
+
timezone_col str
|
|
6004
|
+
new_col datetime.datetime
|
|
6005
|
+
|
|
6006
|
+
# Example 6: Let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR,
|
|
6007
|
+
# using format.
|
|
6008
|
+
>>> new_df2 = df1.assign(new_col = df1.id.cast(VARCHAR, format='zzz.zz'))
|
|
6009
|
+
id timestamp_col timezone_col new_col
|
|
6010
|
+
300 2014-12-10 10:00:35-08:00 GMT+10 300.00
|
|
6011
|
+
200 2015-01-08 13:00:00+12:00 America Pacific 200.00
|
|
6012
|
+
100 1000-01-10 23:00:12-02:00 GMT 100.00
|
|
6013
|
+
>>> new_df2.dtypes
|
|
6014
|
+
id int
|
|
6015
|
+
timestamp_col str
|
|
6016
|
+
timezone_col str
|
|
6017
|
+
new_col str
|
|
6018
|
+
|
|
6019
|
+
# Example 7: Let's try creating a new DataFrame casting 'timestamp_with_timezone' column (of type TIMESTAMP) to
|
|
6020
|
+
# TIMESTAMP WITH TIMEZONE, with offset 'GMT+10'.
|
|
6021
|
+
>>> new_df3 = new_df1.assign(timestamp_with_timezone = new_df1.new_col.cast(TIMESTAMP(timezone=True), timezone='GMT+10'))
|
|
6022
|
+
id timestamp_col timezone_col new_col timestamp_with_timezone
|
|
6023
|
+
300 2014-12-10 10:00:35-08:00 GMT+10 2014-12-10 18:00:35 2014-12-11 04:00:35.000000+10:00
|
|
6024
|
+
200 2015-01-08 13:00:00+12:00 America Pacific 2015-01-08 01:00:00 2015-01-08 11:00:00.000000+10:00
|
|
6025
|
+
100 1000-01-10 23:00:12-02:00 GMT 1000-01-11 01:00:12 1000-01-11 11:00:12.000000+10:00
|
|
6026
|
+
>>> new_df3.dtypes
|
|
6027
|
+
id int
|
|
6028
|
+
timestamp_col str
|
|
6029
|
+
timezone_col str
|
|
6030
|
+
new_col datetime.datetime
|
|
6031
|
+
timestamp_with_timezone datetime.datetime
|
|
6032
|
+
"""
|
|
6033
|
+
# Validating Arguments
|
|
6034
|
+
arg_type_matrix = []
|
|
6035
|
+
arg_type_matrix.append(["format", format , True, (str), True])
|
|
6036
|
+
arg_type_matrix.append(["timezone", timezone, True, (str, ColumnExpression, int, float), True])
|
|
6037
|
+
_Validators._validate_function_arguments(arg_type_matrix)
|
|
5942
6038
|
|
|
5943
|
-
RAISES:
|
|
5944
|
-
TeradataMlException
|
|
5945
|
-
"""
|
|
5946
6039
|
# If type_ is None or not specified, raise an Exception
|
|
5947
6040
|
if type_ is None:
|
|
5948
6041
|
raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS, 'type_'),
|
|
@@ -5953,8 +6046,26 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5953
6046
|
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, 'type_',
|
|
5954
6047
|
'a valid teradatasqlalchemy type'),
|
|
5955
6048
|
MessageCodes.UNSUPPORTED_DATATYPE)
|
|
5956
|
-
|
|
5957
6049
|
expression = func.cast(self.expression, type_=type_).label(self.name)
|
|
6050
|
+
if format or timezone:
|
|
6051
|
+
# Casting to VARCHAR or CHAR with format require this type of query
|
|
6052
|
+
# CAST((CAST (F1 AS FORMAT 'format_str')) AS [CHAR|VARCHAR])
|
|
6053
|
+
if isinstance(type_, (VARCHAR, CHAR)) or (isinstance(type_, type) and issubclass(type_, (VARCHAR, CHAR))):
|
|
6054
|
+
expression = func.cast(literal_column("""CAST({} AS FORMAT '{}')""".format(self.compile(), format)), type_=type_)
|
|
6055
|
+
else:
|
|
6056
|
+
# Compile _TDType to string
|
|
6057
|
+
type_compiler = td_type_compiler(td_dialect)
|
|
6058
|
+
type_expression = type_compiler.process(type_) if not isinstance(type_, type) else type_compiler.process(type_())
|
|
6059
|
+
# Create a query with format and timezone string
|
|
6060
|
+
# CAST(TIMESTAMP "column_name" AS "_TDType" FORMAT "format" AT TIMEZONE "timezone_str")
|
|
6061
|
+
format = " FORMAT '{}'".format(format) if format else ""
|
|
6062
|
+
if timezone and isinstance(timezone, _SQLColumnExpression):
|
|
6063
|
+
timezone = _SQLColumnExpression(literal_column(f' AT TIME ZONE {timezone.compile()}')).compile()
|
|
6064
|
+
elif timezone:
|
|
6065
|
+
timezone = _SQLColumnExpression(literal_column(_SQLColumnExpression._timezone_string(timezone))).compile()
|
|
6066
|
+
else:
|
|
6067
|
+
timezone = ""
|
|
6068
|
+
expression = literal_column("""CAST({} AS {}{}{})""".format(self.compile(), type_expression, timezone, format), type_=type_)
|
|
5958
6069
|
return _SQLColumnExpression(expression)
|
|
5959
6070
|
|
|
5960
6071
|
def __hash__(self):
|
|
@@ -6081,12 +6192,19 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
6081
6192
|
# If user has not passed any type, then set it to
|
|
6082
6193
|
# NullType().
|
|
6083
6194
|
type = sqlalc.sql.sqltypes.NullType()
|
|
6084
|
-
|
|
6195
|
+
# Boolean flag to treat function as an instance method.
|
|
6196
|
+
function_has_col_caller = column_function
|
|
6085
6197
|
# Generate the function syntax based on whether the
|
|
6086
6198
|
# function is column function or not.
|
|
6087
6199
|
if column_function:
|
|
6088
6200
|
name = quoted_name("{}.{}".format(col_name, func_name),
|
|
6089
6201
|
False)
|
|
6202
|
+
# Dynamic function gets called on teradataml._SQLColumnExpression type object.
|
|
6203
|
+
# 'expression' attribute of _SQLColumnExpression object holds
|
|
6204
|
+
# corresponding SQLAlchemy.Expression type object.
|
|
6205
|
+
# SQLAlchemy.Expression type object should be available from FunctionElement.
|
|
6206
|
+
# This 'func_caller' attribute points to that Expression object.
|
|
6207
|
+
func_caller = self.expression
|
|
6090
6208
|
else:
|
|
6091
6209
|
name = quoted_name(func_name, False)
|
|
6092
6210
|
|
|
@@ -10172,3 +10290,692 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
10172
10290
|
|
|
10173
10291
|
self.alias_name = name
|
|
10174
10292
|
return self
|
|
10293
|
+
|
|
10294
|
+
@staticmethod
|
|
10295
|
+
def _timezone_string(value):
|
|
10296
|
+
"""
|
|
10297
|
+
DESCRIPTION:
|
|
10298
|
+
Function to return timezone string in correct format.
|
|
10299
|
+
|
|
10300
|
+
PARAMETERS:
|
|
10301
|
+
value:
|
|
10302
|
+
Required Argument.
|
|
10303
|
+
Specifies timezone string.
|
|
10304
|
+
Types: str, int , float
|
|
10305
|
+
|
|
10306
|
+
RETURNS:
|
|
10307
|
+
bool
|
|
10308
|
+
"""
|
|
10309
|
+
if isinstance(value, (float, int)):
|
|
10310
|
+
return " AT TIME ZONE {}".format(value)
|
|
10311
|
+
if value.upper() not in ['LOCAL']:
|
|
10312
|
+
return " AT TIME ZONE '{}'".format(value)
|
|
10313
|
+
return " AT {}".format(value)
|
|
10314
|
+
|
|
10315
|
+
def to_timestamp(self, format=None, type_=TIMESTAMP, timezone=None):
|
|
10316
|
+
"""
|
|
10317
|
+
DESCRIPTION:
|
|
10318
|
+
Converts string or integer to a TIMESTAMP data type or TIMESTAMP WITH
|
|
10319
|
+
TIME ZONE data type.
|
|
10320
|
+
Note:
|
|
10321
|
+
* POSIX epoch conversion is implicit in the "to_timestamp" when column
|
|
10322
|
+
is integer type. POSIX epoch is the number of seconds that have elapsed
|
|
10323
|
+
since midnight Coordinated Universal Time (UTC) of January 1, 1970.
|
|
10324
|
+
|
|
10325
|
+
PARAMETERS:
|
|
10326
|
+
format:
|
|
10327
|
+
Specifies the format of string column.
|
|
10328
|
+
Argument is not required when column is integer type, Otherwise Required.
|
|
10329
|
+
For valid 'format' values, see documentation on
|
|
10330
|
+
"to_date" or "help(df.col_name.to_date)".
|
|
10331
|
+
Type: ColumnExpression or str
|
|
10332
|
+
|
|
10333
|
+
type_:
|
|
10334
|
+
Optional Argument.
|
|
10335
|
+
Specifies a TIMESTAMP type or an object of a
|
|
10336
|
+
TIMESTAMP type that the column needs to be cast to.
|
|
10337
|
+
Default value: TIMESTAMP
|
|
10338
|
+
Permitted Values: TIMESTAMP data type
|
|
10339
|
+
Types: teradatasqlalchemy type or object of teradatasqlalchemy type
|
|
10340
|
+
|
|
10341
|
+
timezone:
|
|
10342
|
+
Optional Argument.
|
|
10343
|
+
Specifies the timezone string.
|
|
10344
|
+
For valid timezone strings, user should check Vantage documentation.
|
|
10345
|
+
Type: ColumnExpression or str.
|
|
10346
|
+
|
|
10347
|
+
RETURNS:
|
|
10348
|
+
ColumnExpression
|
|
10349
|
+
|
|
10350
|
+
EXAMPLES:
|
|
10351
|
+
# Load the data to run the example.
|
|
10352
|
+
>>> load_example_data("teradataml", "timestamp_data")
|
|
10353
|
+
|
|
10354
|
+
# Create a DataFrame on 'timestamp_data' table.
|
|
10355
|
+
>>> df = DataFrame("timestamp_data")
|
|
10356
|
+
>>> df
|
|
10357
|
+
id timestamp_col timestamp_col1 format_col timezone_col
|
|
10358
|
+
2 2015-01-08 00:00:12.2+10:00 45678910234 YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM GMT+10
|
|
10359
|
+
1 2015-01-08 13:00 878986 YYYY-MM-DD HH24:MI America Pacific
|
|
10360
|
+
0 2015-01-08 00:00:12.2 123456 YYYY-MM-DD HH24:MI:SS.FF6 GMT
|
|
10361
|
+
|
|
10362
|
+
>>> df.tdtypes
|
|
10363
|
+
id INTEGER()
|
|
10364
|
+
timestamp_col VARCHAR(length=30, charset='LATIN')
|
|
10365
|
+
timestamp_col1 BIGINT()
|
|
10366
|
+
format_col VARCHAR(length=30, charset='LATIN')
|
|
10367
|
+
timezone_col VARCHAR(length=30, charset='LATIN')
|
|
10368
|
+
|
|
10369
|
+
# Example 1: Convert Epoch seconds to timestamp.
|
|
10370
|
+
>>> df.select(['id','timestamp_col1']).assign(col = df.timestamp_col1.to_timestamp())
|
|
10371
|
+
id timestamp_col1 col
|
|
10372
|
+
2 45678910234 3417-07-05 02:10:34.000000
|
|
10373
|
+
1 878986 1970-01-11 04:09:46.000000
|
|
10374
|
+
0 123456 1970-01-02 10:17:36.000000
|
|
10375
|
+
|
|
10376
|
+
# Example 2: Convert timestamp string to timestamp with timezone in
|
|
10377
|
+
# format mentioned in column "format_col".
|
|
10378
|
+
>>> df.select(['id', 'timestamp_col', 'format_col']).assign(col = df.timestamp_col.to_timestamp(df.format_col, TIMESTAMP(timezone=True)))
|
|
10379
|
+
id timestamp_col format_col col
|
|
10380
|
+
2 2015-01-08 00:00:12.2+10:00 YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM 2015-01-08 00:00:12.200000+10:00
|
|
10381
|
+
1 2015-01-08 13:00 YYYY-MM-DD HH24:MI 2015-01-08 13:00:00.000000+00:00
|
|
10382
|
+
0 2015-01-08 00:00:12.2 YYYY-MM-DD HH24:MI:SS.FF6 2015-01-08 00:00:12.200000+00:00
|
|
10383
|
+
|
|
10384
|
+
# Example 3: Convert Epoch seconds to timestamp with timezone in 'GMT+2' location.
|
|
10385
|
+
>>> df.select(['id', 'timestamp_col1', 'format_col']).assign(col = df.timestamp_col1.to_timestamp(df.format_col, TIMESTAMP(timezone=True), 'GMT+2'))
|
|
10386
|
+
id timestamp_col1 format_col col
|
|
10387
|
+
2 45678910234 YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM 3417-07-05 04:10:34.000000+02:00
|
|
10388
|
+
1 878986 YYYY-MM-DD HH24:MI 1970-01-11 06:09:46.000000+02:00
|
|
10389
|
+
0 123456 YYYY-MM-DD HH24:MI:SS.FF6 1970-01-02 12:17:36.000000+02:00
|
|
10390
|
+
|
|
10391
|
+
"""
|
|
10392
|
+
# Validating Arguments
|
|
10393
|
+
arg_type_matrix = []
|
|
10394
|
+
arg_type_matrix.append(["format", format , True, (str, ColumnExpression), True])
|
|
10395
|
+
arg_type_matrix.append(["timezone", timezone, True, (str, ColumnExpression, int, float), True])
|
|
10396
|
+
_Validators._validate_function_arguments(arg_type_matrix)
|
|
10397
|
+
|
|
10398
|
+
if not UtilFuncs._is_valid_td_type(type_):
|
|
10399
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, 'type_',
|
|
10400
|
+
'a valid teradatasqlalchemy type'),
|
|
10401
|
+
MessageCodes.UNSUPPORTED_DATATYPE)
|
|
10402
|
+
|
|
10403
|
+
_format = format.expression if isinstance(format, _SQLColumnExpression) else format
|
|
10404
|
+
_params = [self.expression, _format]
|
|
10405
|
+
# format is not required when column is of below types.
|
|
10406
|
+
if isinstance(self._type, (BYTEINT, SMALLINT, INTEGER, BIGINT)):
|
|
10407
|
+
_params.pop()
|
|
10408
|
+
# Use to_timestamp_tz when below 3 conditions are true.
|
|
10409
|
+
# Resultant query will be Example:
|
|
10410
|
+
# TO_TIMESTAMP('2015-10-08 00:00:12.2') or TO_TIMESTAMP_TZ('2015-10-08 00:00:12.2+03:00') based on type_
|
|
10411
|
+
_fun = getattr(func, "to_timestamp_tz") if isinstance(type_, TIMESTAMP) and type_.timezone and len(_params) == 2 \
|
|
10412
|
+
else getattr(func, "to_timestamp")
|
|
10413
|
+
if not timezone:
|
|
10414
|
+
return _SQLColumnExpression(_fun(*_params), type=type_)
|
|
10415
|
+
|
|
10416
|
+
# If user uses timezone generate query with time zone.
|
|
10417
|
+
# Resultant query will be Example:
|
|
10418
|
+
# TO_TIMESTAMP('2015-10-08 00:00:12.2') at time zone 'America Alaska',
|
|
10419
|
+
# TO_TIMESTAMP_TZ('2015-10-08 00:00:12.2+03:00') at time zone 'America Alaska'.
|
|
10420
|
+
if isinstance(timezone, _SQLColumnExpression):
|
|
10421
|
+
_timezone_expr = _SQLColumnExpression(literal_column(f' AT TIME ZONE {timezone.compile()}')).compile()
|
|
10422
|
+
else:
|
|
10423
|
+
_timezone_expr = _SQLColumnExpression(literal_column(_SQLColumnExpression._timezone_string(timezone))).compile()
|
|
10424
|
+
return _SQLColumnExpression(_SQLColumnExpression(_fun(*_params)).compile() + _timezone_expr, type=type_)
|
|
10425
|
+
|
|
10426
|
+
def extract(self, value, timezone=None):
|
|
10427
|
+
"""
|
|
10428
|
+
DESCRIPTION:
|
|
10429
|
+
Extracts a single specified field from any DateTime, Interval or timestamp value,
|
|
10430
|
+
converting it to an exact numeric value.
|
|
10431
|
+
|
|
10432
|
+
PARAMETERS:
|
|
10433
|
+
value:
|
|
10434
|
+
Required Argument.
|
|
10435
|
+
Specifies the field which needs to be extracted.
|
|
10436
|
+
Permitted Values: YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, TIMEZONE_HOUR, TIMEZONE_MINUTE
|
|
10437
|
+
Note:
|
|
10438
|
+
* Permitted Values are case insensitive.
|
|
10439
|
+
Type: str
|
|
10440
|
+
|
|
10441
|
+
timezone:
|
|
10442
|
+
Optional Argument.
|
|
10443
|
+
Specifies the timezone string.
|
|
10444
|
+
For valid timezone strings, user should check Vantage documentation.
|
|
10445
|
+
Type: ColumnExpression or str.
|
|
10446
|
+
|
|
10447
|
+
RETURNS:
|
|
10448
|
+
ColumnExpression
|
|
10449
|
+
|
|
10450
|
+
EXAMPLES:
|
|
10451
|
+
# Load the data to run the example.
|
|
10452
|
+
>>> load_example_data("uaf", "Traindata")
|
|
10453
|
+
|
|
10454
|
+
# Create a DataFrame on 'Traindata' table.
|
|
10455
|
+
|
|
10456
|
+
>>> temp_df = DataFrame("Traindata")
|
|
10457
|
+
>>> df = temp_df.select(["seq_no", "schedule_date", "arrivalTime"])
|
|
10458
|
+
>>> df
|
|
10459
|
+
schedule_date arrivalTime
|
|
10460
|
+
seq_no
|
|
10461
|
+
26 16/03/26 2016-03-26 12:33:05
|
|
10462
|
+
24 16/03/26 2016-03-26 12:25:06
|
|
10463
|
+
3 16/03/26 2016-03-26 10:52:05
|
|
10464
|
+
22 16/03/26 2016-03-26 12:18:01
|
|
10465
|
+
20 16/03/26 2016-03-26 12:10:06
|
|
10466
|
+
18 16/03/26 2016-03-26 12:04:01
|
|
10467
|
+
8 16/03/26 2016-03-26 11:15:06
|
|
10468
|
+
17 16/03/26 2016-03-26 11:56:06
|
|
10469
|
+
15 16/03/26 2016-03-26 11:45:00
|
|
10470
|
+
13 16/03/26 2016-03-26 11:33:00
|
|
10471
|
+
11 16/03/26 2016-03-26 11:26:00
|
|
10472
|
+
|
|
10473
|
+
# Example 1: Extract year from column 'schedule_date'.
|
|
10474
|
+
>>> df.assign(col = df.schedule_date.extract('YEAR'))
|
|
10475
|
+
schedule_date arrivalTime col
|
|
10476
|
+
seq_no
|
|
10477
|
+
26 16/03/26 2016-03-26 12:33:05 2016
|
|
10478
|
+
24 16/03/26 2016-03-26 12:25:06 2016
|
|
10479
|
+
3 16/03/26 2016-03-26 10:52:05 2016
|
|
10480
|
+
22 16/03/26 2016-03-26 12:18:01 2016
|
|
10481
|
+
20 16/03/26 2016-03-26 12:10:06 2016
|
|
10482
|
+
18 16/03/26 2016-03-26 12:04:01 2016
|
|
10483
|
+
8 16/03/26 2016-03-26 11:15:06 2016
|
|
10484
|
+
17 16/03/26 2016-03-26 11:56:06 2016
|
|
10485
|
+
15 16/03/26 2016-03-26 11:45:00 2016
|
|
10486
|
+
13 16/03/26 2016-03-26 11:33:00 2016
|
|
10487
|
+
11 16/03/26 2016-03-26 11:26:00 2016
|
|
10488
|
+
|
|
10489
|
+
# Example 2: Extract hour from column 'arrivalTime'.
|
|
10490
|
+
>>> df.assign(col = df.arrivalTime.extract('HOUR'))
|
|
10491
|
+
schedule_date arrivalTime col
|
|
10492
|
+
seq_no
|
|
10493
|
+
26 16/03/26 2016-03-26 12:33:05 12
|
|
10494
|
+
24 16/03/26 2016-03-26 12:25:06 12
|
|
10495
|
+
3 16/03/26 2016-03-26 10:52:05 10
|
|
10496
|
+
22 16/03/26 2016-03-26 12:18:01 12
|
|
10497
|
+
20 16/03/26 2016-03-26 12:10:06 12
|
|
10498
|
+
18 16/03/26 2016-03-26 12:04:01 12
|
|
10499
|
+
8 16/03/26 2016-03-26 11:15:06 11
|
|
10500
|
+
17 16/03/26 2016-03-26 11:56:06 11
|
|
10501
|
+
15 16/03/26 2016-03-26 11:45:00 11
|
|
10502
|
+
|
|
10503
|
+
# Example 3: Extract hour from column 'arrivalTime' with offset '-11:00'.
|
|
10504
|
+
>>> df.assign(col = df.arrivalTime.extract('HOUR', '-11:00'))
|
|
10505
|
+
schedule_date arrivalTime col
|
|
10506
|
+
seq_no
|
|
10507
|
+
26 16/03/26 2016-03-26 12:33:05 1
|
|
10508
|
+
24 16/03/26 2016-03-26 12:25:06 1
|
|
10509
|
+
3 16/03/26 2016-03-26 10:52:05 23
|
|
10510
|
+
22 16/03/26 2016-03-26 12:18:01 1
|
|
10511
|
+
20 16/03/26 2016-03-26 12:10:06 1
|
|
10512
|
+
18 16/03/26 2016-03-26 12:04:01 1
|
|
10513
|
+
8 16/03/26 2016-03-26 11:15:06 0
|
|
10514
|
+
17 16/03/26 2016-03-26 11:56:06 0
|
|
10515
|
+
15 16/03/26 2016-03-26 11:45:00 0
|
|
10516
|
+
|
|
10517
|
+
# Example 4: Extract hour from column 'arrivalTime' with offset 10.
|
|
10518
|
+
>>> df.assign(col = df.arrivalTime.extract('HOUR', 10))
|
|
10519
|
+
schedule_date arrivalTime col
|
|
10520
|
+
seq_no
|
|
10521
|
+
26 16/03/26 2016-03-26 12:33:05 22
|
|
10522
|
+
24 16/03/26 2016-03-26 12:25:06 22
|
|
10523
|
+
3 16/03/26 2016-03-26 10:52:05 20
|
|
10524
|
+
22 16/03/26 2016-03-26 12:18:01 22
|
|
10525
|
+
20 16/03/26 2016-03-26 12:10:06 22
|
|
10526
|
+
18 16/03/26 2016-03-26 12:04:01 22
|
|
10527
|
+
8 16/03/26 2016-03-26 11:15:06 21
|
|
10528
|
+
17 16/03/26 2016-03-26 11:56:06 21
|
|
10529
|
+
15 16/03/26 2016-03-26 11:45:00 21
|
|
10530
|
+
13 16/03/26 2016-03-26 11:33:00 21
|
|
10531
|
+
11 16/03/26 2016-03-26 11:26:00 21
|
|
10532
|
+
"""
|
|
10533
|
+
# Validating Arguments
|
|
10534
|
+
arg_type_matrix = []
|
|
10535
|
+
arg_type_matrix.append(["value", value , True, (str), True])
|
|
10536
|
+
arg_type_matrix.append(["timezone", timezone, True, (str, ColumnExpression, int, float), True])
|
|
10537
|
+
_Validators._validate_function_arguments(arg_type_matrix)
|
|
10538
|
+
|
|
10539
|
+
# If user doesn't provide timezone simply use extract functionality.
|
|
10540
|
+
if not timezone:
|
|
10541
|
+
return _SQLColumnExpression(func.extract(value, self.expression))
|
|
10542
|
+
|
|
10543
|
+
# If user uses timezone generate query with time zone.
|
|
10544
|
+
if isinstance(timezone, _SQLColumnExpression):
|
|
10545
|
+
_timezone_expr = _SQLColumnExpression(literal_column(f' AT TIME ZONE {timezone.compile()}')).compile()
|
|
10546
|
+
else:
|
|
10547
|
+
_timezone_expr = _SQLColumnExpression(literal_column(_SQLColumnExpression._timezone_string(timezone))).compile()
|
|
10548
|
+
return _SQLColumnExpression(func.extract(value, literal_column('({}{})'.format(self.compile(), _timezone_expr))))
|
|
10549
|
+
|
|
10550
|
+
def to_interval(self, value=None, type_=INTERVAL_DAY_TO_SECOND):
|
|
10551
|
+
"""
|
|
10552
|
+
DESCRIPTION:
|
|
10553
|
+
Converts a numeric value or string value into an INTERVAL_DAY_TO_SECOND or INTERVAL_YEAR_TO_MONTH value.
|
|
10554
|
+
|
|
10555
|
+
PARAMETERS:
|
|
10556
|
+
value:
|
|
10557
|
+
Optional, when column type is VARCHAR or CHAR, otherwise required.
|
|
10558
|
+
Specifies the unit of value for numeric value.
|
|
10559
|
+
when type_ is INTERVAL_DAY_TO_SECOND permitted values:
|
|
10560
|
+
* DAY, HOUR, MINUTE, SECOND
|
|
10561
|
+
when type_ is INTERVAL_YEAR_TO_MONTH permitted values:
|
|
10562
|
+
* YEAR, MONTH
|
|
10563
|
+
Note:
|
|
10564
|
+
* Permitted Values are case insensitive.
|
|
10565
|
+
Type: str or ColumnExpression
|
|
10566
|
+
|
|
10567
|
+
type_:
|
|
10568
|
+
Optional Argument.
|
|
10569
|
+
Specifies a teradatasqlalchemy type or an object of a teradatasqlalchemy type
|
|
10570
|
+
that the column needs to be cast to.
|
|
10571
|
+
Default value: TIMESTAMP
|
|
10572
|
+
Permitted Values: INTERVAL_DAY_TO_SECOND or INTERVAL_YEAR_TO_MONTH type.
|
|
10573
|
+
Types: teradatasqlalchemy type or object of teradatasqlalchemy type
|
|
10574
|
+
|
|
10575
|
+
Returns:
|
|
10576
|
+
ColumnExpression
|
|
10577
|
+
|
|
10578
|
+
EXAMPLES:
|
|
10579
|
+
# Load the data to run the example.
|
|
10580
|
+
>>> load_example_data("teradataml", "interval_data")
|
|
10581
|
+
|
|
10582
|
+
# Create a DataFrame on 'interval_data' table.
|
|
10583
|
+
>>> df = DataFrame("interval_data")
|
|
10584
|
+
>>> df
|
|
10585
|
+
id int_col value_col value_col1 str_col1 str_col2
|
|
10586
|
+
2 657 MINUTE MONTH PT73H -P14M
|
|
10587
|
+
3 1234 SECOND MONTH 100 04:23:59 06-10
|
|
10588
|
+
1 240 HOUR YEAR P100DT4H23M59S P100Y4M
|
|
10589
|
+
0 20 DAY YEAR 100 04:23:59 04-10
|
|
10590
|
+
|
|
10591
|
+
>>> df.tdtypes
|
|
10592
|
+
id INTEGER()
|
|
10593
|
+
int_col BIGINT()
|
|
10594
|
+
value_col VARCHAR(length=30, charset='LATIN')
|
|
10595
|
+
value_col1 VARCHAR(length=30, charset='LATIN')
|
|
10596
|
+
str_col1 VARCHAR(length=30, charset='LATIN')
|
|
10597
|
+
str_col2 VARCHAR(length=30, charset='LATIN')
|
|
10598
|
+
|
|
10599
|
+
|
|
10600
|
+
# Example 1: Convert "int_col" column to INTERVAL_DAY_TO_SECOND with value
|
|
10601
|
+
# provided in "value_col".
|
|
10602
|
+
>>> df.assign(col = df.int_col.to_interval(df.value_col))
|
|
10603
|
+
id int_col value_col value_col1 str_col1 str_col2 col
|
|
10604
|
+
2 657 MINUTE MONTH PT73H -P14M 0 10:57:00.000000
|
|
10605
|
+
3 1234 SECOND MONTH 100 04:23:59 06-10 0 00:20:34.000000
|
|
10606
|
+
1 240 HOUR YEAR P100DT4H23M59S P100Y4M 10 00:00:00.000000
|
|
10607
|
+
0 20 DAY YEAR 100 04:23:59 04-10 20 00:00:00.000000
|
|
10608
|
+
|
|
10609
|
+
# Example 2: Convert int_col to INTERVAL_YEAR_TO_MONTH when value = 'MONTH'.
|
|
10610
|
+
>>> df.assign(col = df.int_col.to_interval('MONTH', INTERVAL_YEAR_TO_MONTH))
|
|
10611
|
+
id int_col value_col value_col1 str_col1 str_col2 col
|
|
10612
|
+
2 657 MINUTE MONTH PT73H -P14M 54-09
|
|
10613
|
+
3 1234 SECOND MONTH 100 04:23:59 06-10 102-10
|
|
10614
|
+
1 240 HOUR YEAR P100DT4H23M59S P100Y4M 20-00
|
|
10615
|
+
0 20 DAY YEAR 100 04:23:59 04-10 1-08
|
|
10616
|
+
|
|
10617
|
+
# Example 3: Convert string column "str_col1" to INTERVAL_DAY_TO_SECOND.
|
|
10618
|
+
>>> df.assign(col = df.str_col1.to_interval())
|
|
10619
|
+
id int_col value_col value_col1 str_col1 str_col2 col
|
|
10620
|
+
2 657 MINUTE MONTH PT73H -P14M 3 01:00:00.000000
|
|
10621
|
+
3 1234 SECOND MONTH 100 04:23:59 06-10 100 04:23:59.000000
|
|
10622
|
+
1 240 HOUR YEAR P100DT4H23M59S P100Y4M 100 04:23:59.000000
|
|
10623
|
+
0 20 DAY YEAR 100 04:23:59 04-10 100 04:23:59.000000
|
|
10624
|
+
|
|
10625
|
+
# Example 4: Convert string column "str_col2" to INTERVAL_DAY_TO_MONTH.
|
|
10626
|
+
>>> df.assign(col = df.str_col2.to_interval(type_=INTERVAL_YEAR_TO_MONTH))
|
|
10627
|
+
id int_col value_col value_col1 str_col1 str_col2 col
|
|
10628
|
+
2 657 MINUTE MONTH PT73H -P14M -1-02
|
|
10629
|
+
3 1234 SECOND MONTH 100 04:23:59 06-10 6-10
|
|
10630
|
+
1 240 HOUR YEAR P100DT4H23M59S P100Y4M 100-04
|
|
10631
|
+
0 20 DAY YEAR 100 04:23:59 04-10 4-10
|
|
10632
|
+
|
|
10633
|
+
"""
|
|
10634
|
+
# Validating Arguments
|
|
10635
|
+
arg_type_matrix = []
|
|
10636
|
+
arg_type_matrix.append(["value", value , True, (str, ColumnExpression), True])
|
|
10637
|
+
_Validators._validate_function_arguments(arg_type_matrix)
|
|
10638
|
+
|
|
10639
|
+
if not UtilFuncs._is_valid_td_type(type_):
|
|
10640
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, 'type_',
|
|
10641
|
+
'a valid teradatasqlalchemy type'),
|
|
10642
|
+
MessageCodes.UNSUPPORTED_DATATYPE)
|
|
10643
|
+
|
|
10644
|
+
# When column type is string, use either to_dsinterval or to_yminterval function based on "type_".
|
|
10645
|
+
if isinstance(self._type, (VARCHAR, CHAR)):
|
|
10646
|
+
_fun = (getattr(func, "to_dsinterval")) if isinstance(type_, INTERVAL_DAY_TO_SECOND)\
|
|
10647
|
+
or (isinstance(type_, type) and issubclass(type_, INTERVAL_DAY_TO_SECOND)) \
|
|
10648
|
+
else (getattr(func, "to_yminterval"))
|
|
10649
|
+
return _SQLColumnExpression(_fun(self.expression), type=type_)
|
|
10650
|
+
|
|
10651
|
+
# When column type is integer or float type, use either numtodsinterval or numtoyminterval
|
|
10652
|
+
# function based on "type_".
|
|
10653
|
+
_fun = (getattr(func, "numtodsinterval")) if isinstance(type_, INTERVAL_DAY_TO_SECOND) \
|
|
10654
|
+
or (isinstance(type_, type) and issubclass(type_, INTERVAL_DAY_TO_SECOND))\
|
|
10655
|
+
else (getattr(func, "numtoyminterval"))
|
|
10656
|
+
value = value.expression if isinstance(value, _SQLColumnExpression) else value
|
|
10657
|
+
return _SQLColumnExpression(_fun(self.expression, value), type=type_)
|
|
10658
|
+
|
|
10659
|
+
def parse_url(self, url_part):
|
|
10660
|
+
"""
|
|
10661
|
+
DESCRIPTION:
|
|
10662
|
+
Extracts a specific part from the URL.
|
|
10663
|
+
|
|
10664
|
+
PARAMETERS:
|
|
10665
|
+
url_part:
|
|
10666
|
+
Required Argument.
|
|
10667
|
+
Specifies which part to be extracted.
|
|
10668
|
+
Permitted Values: HOST, PATH, QUERY, REF, PROTOCOL, FILE, AUTHORITY, USERINFO
|
|
10669
|
+
Type: str or ColumnExpression
|
|
10670
|
+
|
|
10671
|
+
Returns:
|
|
10672
|
+
ColumnExpression
|
|
10673
|
+
|
|
10674
|
+
EXAMPLES:
|
|
10675
|
+
# Load the data to run the example.
|
|
10676
|
+
>>> load_example_data("teradataml", "url_data")
|
|
10677
|
+
|
|
10678
|
+
# Create a DataFrame on 'url_data' table.
|
|
10679
|
+
>>> df = DataFrame("url_data")
|
|
10680
|
+
>>> df
|
|
10681
|
+
urls part
|
|
10682
|
+
id
|
|
10683
|
+
3 https://www.facebook.com HOST
|
|
10684
|
+
6 smtp://user:password@smtp.example.com:21/file.txt USERINFO
|
|
10685
|
+
4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY
|
|
10686
|
+
2 https://example.net/path4/path5/path6?query4=value4#fragment REF
|
|
10687
|
+
0 http://example.com:8080/path FILE
|
|
10688
|
+
1 ftp://example.net:21/path PATH
|
|
10689
|
+
5 http://pg.example.ml/path150#fragment90 AUTHORITY
|
|
10690
|
+
7 https://www.google.com PROTOCOL
|
|
10691
|
+
|
|
10692
|
+
# Example 1: Extract components from column 'urls' using column 'part'
|
|
10693
|
+
>>> df.assign(col = df.urls.parse_url(df.part))
|
|
10694
|
+
urls part col
|
|
10695
|
+
id
|
|
10696
|
+
3 https://www.facebook.com HOST www.facebook.com
|
|
10697
|
+
6 smtp://user:password@smtp.example.com:21/file.txt USERINFO user:password
|
|
10698
|
+
4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None
|
|
10699
|
+
2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment
|
|
10700
|
+
0 http://example.com:8080/path FILE /path
|
|
10701
|
+
1 ftp://example.net:21/path PATH /path
|
|
10702
|
+
5 http://pg.example.ml/path150#fragment90 AUTHORITY pg.example.ml
|
|
10703
|
+
7 https://www.google.com PROTOCOL https
|
|
10704
|
+
>>>
|
|
10705
|
+
"""
|
|
10706
|
+
|
|
10707
|
+
# Validating Arguments
|
|
10708
|
+
arg_type_matrix = []
|
|
10709
|
+
arg_type_matrix.append(["url_part", url_part, False, (str, ColumnExpression), True])
|
|
10710
|
+
_Validators._validate_function_arguments(arg_type_matrix)
|
|
10711
|
+
|
|
10712
|
+
# Regex pattern used to extract 'url_part' is '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'.
|
|
10713
|
+
# teradataml does not support regex grouping hence in some cases first used 'regex_replace' and
|
|
10714
|
+
# then 'regex_substr' or vice-versa.
|
|
10715
|
+
_part_to_extract_dict = {'HOST': _SQLColumnExpression(
|
|
10716
|
+
func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), '(//[^/?#]+@)|(//)|(:\d+)', ''),
|
|
10717
|
+
type=VARCHAR()),
|
|
10718
|
+
'PATH': _SQLColumnExpression(func.regexp_substr(
|
|
10719
|
+
func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
|
|
10720
|
+
'([^?#]*)'), type=VARCHAR()),
|
|
10721
|
+
'QUERY': _SQLColumnExpression(func.ltrim(func.regexp_substr(
|
|
10722
|
+
func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)', ''),
|
|
10723
|
+
'\?([^#]*)'), '?'), type=VARCHAR()),
|
|
10724
|
+
'REF': _SQLColumnExpression(func.ltrim(func.regexp_substr(
|
|
10725
|
+
func.regexp_replace(self.expression,
|
|
10726
|
+
'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?', ''),
|
|
10727
|
+
'(#(.*))'), '#'), type=VARCHAR()),
|
|
10728
|
+
'PROTOCOL': _SQLColumnExpression(
|
|
10729
|
+
func.rtrim(func.regexp_substr(self.expression, '^(([^:/?#]+):)'), ':'),
|
|
10730
|
+
type=VARCHAR()),
|
|
10731
|
+
'FILE': _SQLColumnExpression(func.regexp_substr(
|
|
10732
|
+
func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
|
|
10733
|
+
'([^?#]*)(\?([^#]*))?'), type=VARCHAR()),
|
|
10734
|
+
'AUTHORITY': _SQLColumnExpression(
|
|
10735
|
+
func.ltrim(func.regexp_substr(self.expression, '//([^/?#]*)'), '//'),
|
|
10736
|
+
type=VARCHAR()),
|
|
10737
|
+
'USERINFO': _SQLColumnExpression(func.rtrim(func.ltrim(
|
|
10738
|
+
func.regexp_substr(func.regexp_substr(self.expression, '//([^/?#]*)'),
|
|
10739
|
+
'//[^/?#]+@'), '/'), '@'), type=VARCHAR())
|
|
10740
|
+
}
|
|
10741
|
+
|
|
10742
|
+
if isinstance(url_part, str):
|
|
10743
|
+
return _part_to_extract_dict[url_part]
|
|
10744
|
+
|
|
10745
|
+
whens = [(url_part == 'HOST', _part_to_extract_dict['HOST']),
|
|
10746
|
+
(url_part == 'PATH', _part_to_extract_dict['PATH'] ),
|
|
10747
|
+
(url_part == 'QUERY', _part_to_extract_dict['QUERY']),
|
|
10748
|
+
(url_part == 'REF', _part_to_extract_dict['REF']),
|
|
10749
|
+
(url_part == 'PROTOCOL', _part_to_extract_dict['PROTOCOL']),
|
|
10750
|
+
(url_part == 'FILE', _part_to_extract_dict['FILE']),
|
|
10751
|
+
(url_part == 'AUTHORITY', _part_to_extract_dict['AUTHORITY']),
|
|
10752
|
+
(url_part == 'USERINFO', _part_to_extract_dict['USERINFO'])]
|
|
10753
|
+
|
|
10754
|
+
from teradataml.dataframe.sql_functions import case
|
|
10755
|
+
return case(whens)
|
|
10756
|
+
|
|
10757
|
+
def log(self, base):
|
|
10758
|
+
"""
|
|
10759
|
+
DESCRIPTION:
|
|
10760
|
+
Returns the logarithm value of the column with respect to 'base'.
|
|
10761
|
+
|
|
10762
|
+
PARAMETERS:
|
|
10763
|
+
base:
|
|
10764
|
+
Required Argument.
|
|
10765
|
+
Specifies base of logarithm.
|
|
10766
|
+
Type: int or float or ColumnExpression
|
|
10767
|
+
|
|
10768
|
+
Returns:
|
|
10769
|
+
ColumnExpression
|
|
10770
|
+
|
|
10771
|
+
EXAMPLES:
|
|
10772
|
+
# Load the data to run the example.
|
|
10773
|
+
>>> load_example_data("teradataml", "titanic")
|
|
10774
|
+
|
|
10775
|
+
# Create a DataFrame on 'titanic' table.
|
|
10776
|
+
>>> titanic = DataFrame.from_table('titanic')
|
|
10777
|
+
>>> df = titanic.select(["passenger", "age", "fare"])
|
|
10778
|
+
>>> print(df)
|
|
10779
|
+
age fare
|
|
10780
|
+
passenger
|
|
10781
|
+
326 36.0 135.6333
|
|
10782
|
+
183 9.0 31.3875
|
|
10783
|
+
652 18.0 23.0000
|
|
10784
|
+
265 NaN 7.7500
|
|
10785
|
+
530 23.0 11.5000
|
|
10786
|
+
122 NaN 8.0500
|
|
10787
|
+
591 35.0 7.1250
|
|
10788
|
+
387 1.0 46.9000
|
|
10789
|
+
734 23.0 13.0000
|
|
10790
|
+
795 25.0 7.8958
|
|
10791
|
+
>>>
|
|
10792
|
+
|
|
10793
|
+
# Example 1: Compute log values for column 'fare' using base as column 'age'.
|
|
10794
|
+
>>> log_df = df.assign(fare_log=df.fare.log(df.age))
|
|
10795
|
+
>>> print(log_df)
|
|
10796
|
+
age fare fare_log
|
|
10797
|
+
passenger
|
|
10798
|
+
326 36.0 135.6333 1.370149
|
|
10799
|
+
183 9.0 31.3875 1.568529
|
|
10800
|
+
652 18.0 23.0000 1.084807
|
|
10801
|
+
40 14.0 11.2417 0.916854
|
|
10802
|
+
774 NaN 7.2250 NaN
|
|
10803
|
+
366 30.0 7.2500 0.582442
|
|
10804
|
+
509 28.0 22.5250 0.934704
|
|
10805
|
+
795 25.0 7.8958 0.641942
|
|
10806
|
+
61 22.0 7.2292 0.639955
|
|
10807
|
+
469 NaN 7.7250 NaN
|
|
10808
|
+
>>>
|
|
10809
|
+
"""
|
|
10810
|
+
# Validating Arguments
|
|
10811
|
+
arg_type_matrix = []
|
|
10812
|
+
arg_type_matrix.append(["base", base, False, (int, float, ColumnExpression), True])
|
|
10813
|
+
_Validators._validate_function_arguments(arg_type_matrix)
|
|
10814
|
+
|
|
10815
|
+
# Handling cases when 'base' or 'self' column values are zero or when denominator is zero
|
|
10816
|
+
from teradataml.dataframe.sql_functions import case
|
|
10817
|
+
|
|
10818
|
+
if not isinstance(base, _SQLColumnExpression):
|
|
10819
|
+
whens = case([((self != 0) & (_SQLColumnExpression(literal(base)).ln() != 0),
|
|
10820
|
+
(self.ln() / _SQLColumnExpression(literal(base)).ln()).cast(FLOAT))])
|
|
10821
|
+
else:
|
|
10822
|
+
whens = case([((self != 0) & (base != 0) & (base.ln() != 0),
|
|
10823
|
+
(self.ln() / base.ln()).cast(FLOAT))])
|
|
10824
|
+
|
|
10825
|
+
return whens
|
|
10826
|
+
|
|
10827
|
+
def isnan(self):
|
|
10828
|
+
"""
|
|
10829
|
+
DESCRIPTION:
|
|
10830
|
+
Function evaluates a variable or expression to determine if the
|
|
10831
|
+
floating-point argument is a NaN (Not-a-Number) value. When a database
|
|
10832
|
+
table contains a NaN value, the data is undefined and unrepresentable
|
|
10833
|
+
in floating-point arithmetic. For example, division by 0, or the square root
|
|
10834
|
+
of a negative number would return a NaN result.
|
|
10835
|
+
|
|
10836
|
+
RETURNS:
|
|
10837
|
+
ColumnExpression.
|
|
10838
|
+
|
|
10839
|
+
EXAMPLES:
|
|
10840
|
+
# Load the data to run the example.
|
|
10841
|
+
>>> load_example_data("teradataml","titanic")
|
|
10842
|
+
|
|
10843
|
+
# Create a DataFrame on 'titanic' table.
|
|
10844
|
+
>>> titanic = DataFrame.from_table('titanic')
|
|
10845
|
+
>>> df = titanic.select(["passenger", "age", "fare"])
|
|
10846
|
+
>>> print(df)
|
|
10847
|
+
age fare
|
|
10848
|
+
passenger
|
|
10849
|
+
326 36.0 135.6333
|
|
10850
|
+
183 9.0 31.3875
|
|
10851
|
+
652 18.0 23.0000
|
|
10852
|
+
40 14.0 11.2417
|
|
10853
|
+
774 NaN 7.2250
|
|
10854
|
+
366 30.0 7.2500
|
|
10855
|
+
509 28.0 22.5250
|
|
10856
|
+
795 25.0 7.8958
|
|
10857
|
+
61 22.0 7.2292
|
|
10858
|
+
469 NaN 7.7250
|
|
10859
|
+
>>>
|
|
10860
|
+
|
|
10861
|
+
# Example 1: Find whether 'fare' column contains NaN values or not.
|
|
10862
|
+
>>> nan_df = df.assign(nanornot = df.fare.isnan())
|
|
10863
|
+
>>> print(nan_df)
|
|
10864
|
+
age fare nanornot
|
|
10865
|
+
passenger
|
|
10866
|
+
326 36.0 135.6333 0
|
|
10867
|
+
183 9.0 31.3875 0
|
|
10868
|
+
652 18.0 23.0000 0
|
|
10869
|
+
40 14.0 11.2417 0
|
|
10870
|
+
774 NaN 7.2250 0
|
|
10871
|
+
366 30.0 7.2500 0
|
|
10872
|
+
509 28.0 22.5250 0
|
|
10873
|
+
795 25.0 7.8958 0
|
|
10874
|
+
61 22.0 7.2292 0
|
|
10875
|
+
469 NaN 7.7250 0
|
|
10876
|
+
>>>
|
|
10877
|
+
"""
|
|
10878
|
+
return _SQLColumnExpression(literal_column(f"TD_ISNAN({self.compile()})"), type=INTEGER)
|
|
10879
|
+
|
|
10880
|
+
def isinf(self):
|
|
10881
|
+
"""
|
|
10882
|
+
DESCRIPTION:
|
|
10883
|
+
Function evaluates a variable or expression to determine if the
|
|
10884
|
+
floating-point argument is an infinite number. This function determines
|
|
10885
|
+
if a database table contains positive or negative infinite values.
|
|
10886
|
+
|
|
10887
|
+
RETURNS:
|
|
10888
|
+
ColumnExpression.
|
|
10889
|
+
|
|
10890
|
+
EXAMPLES:
|
|
10891
|
+
# Load the data to run the example.
|
|
10892
|
+
>>> load_example_data("teradataml","titanic")
|
|
10893
|
+
|
|
10894
|
+
# Create a DataFrame on 'titanic' table.
|
|
10895
|
+
>>> titanic = DataFrame.from_table('titanic')
|
|
10896
|
+
>>> df = titanic.select(["passenger", "age", "fare"])
|
|
10897
|
+
>>> print(df)
|
|
10898
|
+
age fare
|
|
10899
|
+
passenger
|
|
10900
|
+
326 36.0 135.6333
|
|
10901
|
+
183 9.0 31.3875
|
|
10902
|
+
652 18.0 23.0000
|
|
10903
|
+
40 14.0 11.2417
|
|
10904
|
+
774 NaN 7.2250
|
|
10905
|
+
366 30.0 7.2500
|
|
10906
|
+
509 28.0 22.5250
|
|
10907
|
+
795 25.0 7.8958
|
|
10908
|
+
61 22.0 7.2292
|
|
10909
|
+
469 NaN 7.7250
|
|
10910
|
+
>>>
|
|
10911
|
+
|
|
10912
|
+
# Example 1: Find whether 'fare' column contains infinity values or not.
|
|
10913
|
+
>>> inf_df = df.assign(infornot = df.fare.isinf())
|
|
10914
|
+
>>> print(inf_df)
|
|
10915
|
+
age fare infornot
|
|
10916
|
+
passenger
|
|
10917
|
+
326 36.0 135.6333 0
|
|
10918
|
+
183 9.0 31.3875 0
|
|
10919
|
+
652 18.0 23.0000 0
|
|
10920
|
+
40 14.0 11.2417 0
|
|
10921
|
+
774 NaN 7.2250 0
|
|
10922
|
+
366 30.0 7.2500 0
|
|
10923
|
+
509 28.0 22.5250 0
|
|
10924
|
+
795 25.0 7.8958 0
|
|
10925
|
+
61 22.0 7.2292 0
|
|
10926
|
+
469 NaN 7.7250 0
|
|
10927
|
+
>>>
|
|
10928
|
+
"""
|
|
10929
|
+
return _SQLColumnExpression(literal_column(f"TD_ISINF({self.compile()})"), type=INTEGER)
|
|
10930
|
+
|
|
10931
|
+
def isfinite(self):
|
|
10932
|
+
"""
|
|
10933
|
+
DESCRIPTION:
|
|
10934
|
+
Function evaluates a variable or expression to determine if
|
|
10935
|
+
it is a finite floating value. A finite floating value is not
|
|
10936
|
+
a NaN (Not a Number) value and is not an infinity value.
|
|
10937
|
+
|
|
10938
|
+
RETURNS:
|
|
10939
|
+
ColumnExpression.
|
|
10940
|
+
|
|
10941
|
+
EXAMPLES:
|
|
10942
|
+
# Load the data to run the example.
|
|
10943
|
+
>>> load_example_data("teradataml","titanic")
|
|
10944
|
+
|
|
10945
|
+
# Create a DataFrame on 'titanic' table.
|
|
10946
|
+
>>> titanic = DataFrame.from_table('titanic')
|
|
10947
|
+
>>> df = titanic.select(["passenger", "age", "fare"])
|
|
10948
|
+
>>> print(df)
|
|
10949
|
+
age fare
|
|
10950
|
+
passenger
|
|
10951
|
+
326 36.0 135.6333
|
|
10952
|
+
183 9.0 31.3875
|
|
10953
|
+
652 18.0 23.0000
|
|
10954
|
+
40 14.0 11.2417
|
|
10955
|
+
774 NaN 7.2250
|
|
10956
|
+
366 30.0 7.2500
|
|
10957
|
+
509 28.0 22.5250
|
|
10958
|
+
795 25.0 7.8958
|
|
10959
|
+
61 22.0 7.2292
|
|
10960
|
+
469 NaN 7.7250
|
|
10961
|
+
>>>
|
|
10962
|
+
|
|
10963
|
+
# Example 1: Find whether 'fare' column contains finite values or not.
|
|
10964
|
+
>>> finite_df = df.assign(finiteornot = df.fare.isfinite())
|
|
10965
|
+
>>> print(finite_df)
|
|
10966
|
+
age fare finiteornot
|
|
10967
|
+
passenger
|
|
10968
|
+
530 23.0 11.500 1
|
|
10969
|
+
591 35.0 7.125 1
|
|
10970
|
+
387 1.0 46.900 1
|
|
10971
|
+
856 18.0 9.350 1
|
|
10972
|
+
244 22.0 7.125 1
|
|
10973
|
+
713 48.0 52.000 1
|
|
10974
|
+
448 34.0 26.550 1
|
|
10975
|
+
122 NaN 8.050 1
|
|
10976
|
+
734 23.0 13.000 1
|
|
10977
|
+
265 NaN 7.750 1
|
|
10978
|
+
>>>
|
|
10979
|
+
|
|
10980
|
+
"""
|
|
10981
|
+
return _SQLColumnExpression(literal_column(f"TD_ISFINITE({self.compile()})"), type=INTEGER)
|