teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (263) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +183 -0
  4. teradataml/__init__.py +6 -3
  5. teradataml/_version.py +2 -2
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +275 -40
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +1 -0
  11. teradataml/analytics/json_parser/utils.py +17 -21
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +10 -2
  15. teradataml/analytics/table_operator/__init__.py +3 -2
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +62 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1553 -319
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +276 -319
  22. teradataml/automl/data_transformation.py +163 -81
  23. teradataml/automl/feature_engineering.py +402 -239
  24. teradataml/automl/feature_exploration.py +9 -2
  25. teradataml/automl/model_evaluation.py +48 -51
  26. teradataml/automl/model_training.py +291 -189
  27. teradataml/catalog/byom.py +8 -8
  28. teradataml/catalog/model_cataloging_utils.py +1 -1
  29. teradataml/clients/auth_client.py +133 -0
  30. teradataml/clients/pkce_client.py +1 -1
  31. teradataml/common/aed_utils.py +3 -2
  32. teradataml/common/constants.py +48 -6
  33. teradataml/common/deprecations.py +13 -7
  34. teradataml/common/garbagecollector.py +156 -120
  35. teradataml/common/messagecodes.py +6 -1
  36. teradataml/common/messages.py +3 -1
  37. teradataml/common/sqlbundle.py +1 -1
  38. teradataml/common/utils.py +103 -11
  39. teradataml/common/wrapper_utils.py +1 -1
  40. teradataml/context/context.py +121 -31
  41. teradataml/data/advertising.csv +201 -0
  42. teradataml/data/bank_marketing.csv +11163 -0
  43. teradataml/data/bike_sharing.csv +732 -0
  44. teradataml/data/boston2cols.csv +721 -0
  45. teradataml/data/breast_cancer.csv +570 -0
  46. teradataml/data/complaints_test_tokenized.csv +353 -0
  47. teradataml/data/complaints_tokens_model.csv +348 -0
  48. teradataml/data/covid_confirm_sd.csv +83 -0
  49. teradataml/data/customer_segmentation_test.csv +2628 -0
  50. teradataml/data/customer_segmentation_train.csv +8069 -0
  51. teradataml/data/dataframe_example.json +10 -0
  52. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
  53. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
  54. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
  55. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
  56. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  57. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
  58. teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
  59. teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
  60. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
  61. teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
  62. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  63. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
  64. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
  65. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
  66. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  67. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  68. teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
  69. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
  70. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
  71. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
  72. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  73. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
  74. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  75. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  76. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  77. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
  78. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
  79. teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
  80. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  81. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  82. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  83. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  84. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  85. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  86. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  87. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  88. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  89. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  90. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  91. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  92. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  93. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  94. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  95. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  96. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  97. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  98. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  99. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  100. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  101. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  102. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  103. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  104. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  105. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  106. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  107. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  108. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  109. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  110. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  111. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  112. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  113. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  114. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  115. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  116. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  117. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  118. teradataml/data/dwt2d_dataTable.csv +65 -0
  119. teradataml/data/dwt_dataTable.csv +8 -0
  120. teradataml/data/dwt_filterTable.csv +3 -0
  121. teradataml/data/finance_data4.csv +13 -0
  122. teradataml/data/glm_example.json +28 -1
  123. teradataml/data/grocery_transaction.csv +19 -0
  124. teradataml/data/housing_train_segment.csv +201 -0
  125. teradataml/data/idwt2d_dataTable.csv +5 -0
  126. teradataml/data/idwt_dataTable.csv +8 -0
  127. teradataml/data/idwt_filterTable.csv +3 -0
  128. teradataml/data/insect2Cols.csv +61 -0
  129. teradataml/data/interval_data.csv +5 -0
  130. teradataml/data/jsons/paired_functions.json +14 -0
  131. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
  132. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  133. teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
  134. teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
  135. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
  136. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
  137. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
  138. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  139. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  140. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
  141. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  142. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  143. teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
  144. teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
  145. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
  146. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
  147. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
  148. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  149. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  150. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  151. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
  152. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
  153. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
  154. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  155. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  156. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  157. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  158. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  159. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  160. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  161. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  162. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  163. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  164. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  165. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  166. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  167. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  168. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  169. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  170. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  171. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  172. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  173. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  174. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  175. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  176. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  177. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  178. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  179. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  180. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  181. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  182. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  183. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  184. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  185. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  186. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  187. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  188. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  189. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  190. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  191. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  192. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  193. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  194. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  195. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  196. teradataml/data/kmeans_example.json +5 -0
  197. teradataml/data/kmeans_table.csv +10 -0
  198. teradataml/data/load_example_data.py +8 -2
  199. teradataml/data/naivebayestextclassifier_example.json +1 -1
  200. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  201. teradataml/data/onehot_encoder_train.csv +4 -0
  202. teradataml/data/openml_example.json +29 -0
  203. teradataml/data/peppers.png +0 -0
  204. teradataml/data/real_values.csv +14 -0
  205. teradataml/data/sax_example.json +8 -0
  206. teradataml/data/scale_attributes.csv +3 -0
  207. teradataml/data/scale_example.json +52 -1
  208. teradataml/data/scale_input_part_sparse.csv +31 -0
  209. teradataml/data/scale_input_partitioned.csv +16 -0
  210. teradataml/data/scale_input_sparse.csv +11 -0
  211. teradataml/data/scale_parameters.csv +3 -0
  212. teradataml/data/scripts/deploy_script.py +21 -2
  213. teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
  214. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
  215. teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
  216. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
  217. teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
  218. teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
  219. teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
  220. teradataml/data/star_pivot.csv +8 -0
  221. teradataml/data/templates/open_source_ml.json +2 -1
  222. teradataml/data/teradataml_example.json +97 -1
  223. teradataml/data/timestamp_data.csv +4 -0
  224. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  225. teradataml/data/uaf_example.json +55 -1
  226. teradataml/data/unpivot_example.json +15 -0
  227. teradataml/data/url_data.csv +9 -0
  228. teradataml/data/windowdfft.csv +16 -0
  229. teradataml/data/ztest_example.json +16 -0
  230. teradataml/dataframe/copy_to.py +9 -4
  231. teradataml/dataframe/data_transfer.py +125 -64
  232. teradataml/dataframe/dataframe.py +575 -57
  233. teradataml/dataframe/dataframe_utils.py +47 -9
  234. teradataml/dataframe/fastload.py +273 -90
  235. teradataml/dataframe/functions.py +339 -0
  236. teradataml/dataframe/row.py +160 -0
  237. teradataml/dataframe/setop.py +2 -2
  238. teradataml/dataframe/sql.py +740 -18
  239. teradataml/dataframe/window.py +1 -1
  240. teradataml/dbutils/dbutils.py +324 -18
  241. teradataml/geospatial/geodataframe.py +1 -1
  242. teradataml/geospatial/geodataframecolumn.py +1 -1
  243. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  244. teradataml/lib/aed_0_1.dll +0 -0
  245. teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
  246. teradataml/options/__init__.py +16 -5
  247. teradataml/options/configure.py +39 -6
  248. teradataml/options/display.py +2 -2
  249. teradataml/plot/axis.py +4 -4
  250. teradataml/scriptmgmt/UserEnv.py +26 -19
  251. teradataml/scriptmgmt/lls_utils.py +120 -16
  252. teradataml/table_operators/Script.py +4 -5
  253. teradataml/table_operators/TableOperator.py +160 -26
  254. teradataml/table_operators/table_operator_util.py +88 -41
  255. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  256. teradataml/telemetry_utils/__init__.py +0 -0
  257. teradataml/telemetry_utils/queryband.py +52 -0
  258. teradataml/utils/validators.py +41 -3
  259. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
  260. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
  261. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  262. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  263. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -40,7 +40,7 @@ import sqlalchemy as sqlalc
40
40
 
41
41
  import re
42
42
 
43
- from teradatasqlalchemy.dialect import dialect as td_dialect, compiler as td_compiler
43
+ from teradatasqlalchemy.dialect import dialect as td_dialect, compiler as td_compiler, TeradataTypeCompiler as td_type_compiler
44
44
  from teradatasqlalchemy import (INTEGER, SMALLINT, BIGINT, BYTEINT, DECIMAL, FLOAT, NUMBER)
45
45
  from teradatasqlalchemy import (DATE, TIME, TIMESTAMP)
46
46
  from teradatasqlalchemy import (BYTE, VARBYTE, BLOB)
@@ -52,7 +52,7 @@ from teradatasqlalchemy import (INTERVAL_DAY, INTERVAL_DAY_TO_HOUR, INTERVAL_DAY
52
52
  INTERVAL_YEAR_TO_MONTH)
53
53
  from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP)
54
54
  from teradatasqlalchemy import XML, GEOMETRY
55
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
55
+ from teradataml.telemetry_utils.queryband import collect_queryband
56
56
  import decimal
57
57
  import datetime as dt
58
58
  from teradataml.dataframe.window import Window
@@ -612,8 +612,9 @@ class _SQLTableExpression(_PandasTableExpression):
612
612
  expression = display_number(c.expression)
613
613
  elif isinstance(c.type, tuple(datetime_period_types)):
614
614
  expression = cast_expr(c.expression, 30)
615
+ # Change the size as INTERVAL_DAY_TO_SECOND(4, 6) is failing.
615
616
  elif isinstance(c.type, tuple(interval_types)):
616
- expression = cast_expr(c.expression, 20)
617
+ expression = cast_expr(c.expression, 25)
617
618
  elif isinstance(c.type, GEOMETRY):
618
619
  expression = cast_expr(c.expression, display.geometry_column_length) if \
619
620
  display.geometry_column_length is not None else c.expression.label(c.name)
@@ -1618,6 +1619,8 @@ class _ArithmeticColumnExpression(ColumnExpression):
1618
1619
  def __sub__(self, other):
1619
1620
  """
1620
1621
  Compute the difference between two ColumnExpressions using -
1622
+ Note:
1623
+ * Difference between two timestamp columns return value in seconds.
1621
1624
 
1622
1625
  PARAMETERS:
1623
1626
  other:
@@ -1644,6 +1647,15 @@ class _ArithmeticColumnExpression(ColumnExpression):
1644
1647
  2 67/06/30 07/07/10 421.0 465.0 179.0
1645
1648
  3 67/06/30 07/07/10 434.0 485.0 185.0
1646
1649
  5 67/06/30 07/07/10 459.0 509.0 211.0
1650
+ >>> load_example_data("uaf", "Convolve2RealsLeft")
1651
+ >>> timestamp_df = DataFrame("Convolve2RealsLeft")
1652
+ >>> timestamp_df
1653
+ row_seq row_i_time col_seq column_i_time A B C D
1654
+ id
1655
+ 1 1 2018-08-08 08:02:00.000000 0 2018-08-08 08:00:00.000000 1.3 10.3 20.3 30.3
1656
+ 1 1 2018-08-08 08:02:00.000000 1 2018-08-08 08:02:00.000000 1.4 10.4 20.4 30.4
1657
+ 1 0 2018-08-08 08:00:00.000000 1 2018-08-08 08:02:00.000000 1.2 10.2 20.2 30.2
1658
+ 1 0 2018-08-08 08:00:00.000000 0 2018-08-08 08:00:00.000000 1.1 10.1 20.1 30.1
1647
1659
 
1648
1660
  # Example 1: Subtract 100 from the income amount and assign the final amount
1649
1661
  # to new column 'remaining_income'.
@@ -1666,7 +1678,26 @@ class _ArithmeticColumnExpression(ColumnExpression):
1666
1678
  1 67/06/30 07/07/10 415.0 451.0 180.0 271.0
1667
1679
  5 67/06/30 07/07/10 459.0 509.0 211.0 298.0
1668
1680
  4 67/06/30 07/07/10 448.0 493.0 192.0 301.0
1681
+
1682
+ # Example 3: Subtract 2 timestamp columns and assign to new column 'seconds'.
1683
+ >>> timestamp_df.assign(seconds = timestamp_df.row_i_time-timestamp_df.column_i_time)
1684
+ row_seq row_i_time col_seq column_i_time A B C D seconds
1685
+ id
1686
+ 1 1 2018-08-08 08:02:00.000000 0 2018-08-08 08:00:00.000000 1.3 10.3 20.3 30.3 120.0
1687
+ 1 1 2018-08-08 08:02:00.000000 1 2018-08-08 08:02:00.000000 1.4 10.4 20.4 30.4 0.0
1688
+ 1 0 2018-08-08 08:00:00.000000 1 2018-08-08 08:02:00.000000 1.2 10.2 20.2 30.2 -120.0
1689
+ 1 0 2018-08-08 08:00:00.000000 0 2018-08-08 08:00:00.000000 1.1 10.1 20.1 30.1 0.0
1690
+
1669
1691
  """
1692
+ if isinstance(self._type, TIMESTAMP) and isinstance(other._type, TIMESTAMP):
1693
+ s = """
1694
+ (CAST((CAST({0} AS DATE)-CAST({1} AS DATE)) AS FLOAT) * 86400) +
1695
+ ((EXTRACT(HOUR FROM {0}) - EXTRACT(HOUR FROM {1})) * 3600) +
1696
+ ((EXTRACT(MINUTE FROM {0}) - EXTRACT(MINUTE FROM {1})) * 60) +
1697
+ ((EXTRACT(SECOND FROM {0}) - EXTRACT(SECOND FROM {1})))
1698
+ """.format(self.compile(), other.compile())
1699
+ return _SQLColumnExpression(literal_column(s, type_ = FLOAT))
1700
+
1670
1701
  expr = other.expression if isinstance(other, _SQLColumnExpression) else other
1671
1702
  res = _SQLColumnExpression(self.expression - expr)
1672
1703
  return res
@@ -5431,12 +5462,18 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5431
5462
  expression = literal_column(expression)
5432
5463
  self.kw = kw
5433
5464
  self.expression = expression
5434
- self.type = kw.get("type", expression.type)
5465
+ self.type = kw.get("type", expression.type if expression is not None else kw.get("udf_type"))
5435
5466
  # Initial ColumnExpression has only one dataframe and hence
5436
5467
  # __has_multiple_dataframes = False.
5437
5468
  # eg: df1.col1, df2.col2
5438
5469
  self.__has_multiple_dataframes = False
5439
5470
  self.__names = []
5471
+ self._udf = kw.get("udf", None)
5472
+ self._udf_args = kw.get("udf_args", None)
5473
+ self._env_name = kw.get("env_name", None)
5474
+ self._delimiter = kw.get("delimiter", None)
5475
+ self._quotechar = kw.get("quotechar", None)
5476
+ self.alias_name = self.compile() if self._udf is None else None
5440
5477
 
5441
5478
  @property
5442
5479
  def expression(self):
@@ -5801,7 +5838,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5801
5838
  return _SQLColumnExpression(func.concat(*columns_))
5802
5839
 
5803
5840
  @collect_queryband(queryband="DFC_cast")
5804
- def cast(self, type_ = None):
5841
+ def cast(self, type_ = None, format = None, timezone = None):
5805
5842
  """
5806
5843
  DESCRIPTION:
5807
5844
  Apply the CAST SQL function to the column with the type specified.
@@ -5817,6 +5854,32 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5817
5854
  Default value: None
5818
5855
  Types: teradatasqlalchemy type or object of teradatasqlalchemy type
5819
5856
 
5857
+ format:
5858
+ Optional Argument.
5859
+ Specifies a variable length string containing formatting characters
5860
+ that define the display format for the data type.
5861
+ Formats can be specified for columns that have character, numeric, byte,
5862
+ DateTime, Period or UDT data types.
5863
+ Note:
5864
+ * Teradata supports different formats. Look at 'Formats' section in
5865
+ "SQL-Data-Types-and-Literals" in Vantage documentation for additional
5866
+ details.
5867
+ Default value: None
5868
+ Types: str
5869
+
5870
+ timezone:
5871
+ Optional Argument.
5872
+ Specifies the timezone string.
5873
+ Check "SQL-Date-and-Time-Functions-and-Expressions" in
5874
+ Vantage documentation for supported timezones.
5875
+ Type: ColumnExpression or str.
5876
+
5877
+ RETURNS:
5878
+ ColumnExpression
5879
+
5880
+ RAISES:
5881
+ TeradataMlException
5882
+
5820
5883
  EXAMPLES:
5821
5884
  >>> load_example_data("dataframe","admissions_train")
5822
5885
  >>> df = DataFrame('admissions_train')
@@ -5841,8 +5904,24 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5841
5904
  programming str
5842
5905
  admitted int
5843
5906
 
5844
- >>> # Let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR(5),
5845
- >>> # an object of a teradatasqlalchemy type.
5907
+ >>> dataframe_dict = {"id": [100, 200,300],
5908
+ >>> "timestamp_col": ['1000-01-10 23:00:12-02:00', '2015-01-08 13:00:00+12:00', '2014-12-10 10:00:35-08:00'],
5909
+ >>> "timezone_col": ["GMT", "America Pacific", "GMT+10"]}
5910
+ >>> pandas_df = pd.DataFrame(dataframe_dict)
5911
+ >>> copy_to_sql(pandas_df, table_name = 'new_table', if_exists = 'replace')
5912
+ >>> df1 = DataFrame("new_table")
5913
+ >>> df1
5914
+ id timestamp_col timezone_col
5915
+ 300 2014-12-10 10:00:35-08:00 GMT+10
5916
+ 200 2015-01-08 13:00:00+12:00 America Pacific
5917
+ 100 1000-01-10 23:00:12-02:00 GMT
5918
+ >>> df1.dtypes
5919
+ id int
5920
+ timestamp_col str
5921
+ timezone_col str
5922
+
5923
+ # Example 1: Let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR(5),
5924
+ # an object of a teradatasqlalchemy type.
5846
5925
  >>> from teradatasqlalchemy import VARCHAR
5847
5926
  >>> new_df = df.assign(char_id = df.id.cast(type_=VARCHAR(5)))
5848
5927
  >>> new_df
@@ -5867,8 +5946,8 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5867
5946
  admitted int
5868
5947
  char_id str
5869
5948
 
5870
- >>> # Now let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR,
5871
- >>> # a teradatasqlalchemy type.
5949
+ # Example 2: Now let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR,
5950
+ # a teradatasqlalchemy type.
5872
5951
  >>> new_df_2 = df.assign(char_id = df.id.cast(type_=VARCHAR))
5873
5952
  >>> new_df_2
5874
5953
  masters gpa stats programming admitted char_id
@@ -5892,25 +5971,65 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5892
5971
  admitted int
5893
5972
  char_id str
5894
5973
 
5895
- >>> # Let's try filtering some data with a match on a column cast to another type,
5896
- >>> # an object of a teradatasqlalchemy type.
5974
+ # Example 3: Let's try filtering some data with a match on a column cast to another type,
5975
+ # an object of a teradatasqlalchemy type.
5897
5976
  >>> df[df.id.cast(VARCHAR(5)) == '1']
5898
5977
  masters gpa stats programming admitted
5899
5978
  id
5900
5979
  1 yes 3.95 Beginner Beginner 0
5901
5980
 
5902
- >>> # Now let's try the same, this time using a teradatasqlalchemy type.
5981
+ # Example 4: Now let's try the same, this time using a teradatasqlalchemy type.
5903
5982
  >>> df[df.id.cast(VARCHAR) == '1']
5904
5983
  masters gpa stats programming admitted
5905
5984
  id
5906
5985
  1 yes 3.95 Beginner Beginner 0
5907
5986
 
5908
- RETURNS:
5909
- ColumnExpression
5987
+ # Example 5: Let's try creating a new DataFrame casting 'timestamp_col' column (of type VARCHAR) to TIMESTAMP,
5988
+ # using format.
5989
+ >>> new_df1 = df1.assign(new_col = df1.timestamp_col.cast(TIMESTAMP, format='Y4-MM-DDBHH:MI:SSBZ'))
5990
+ id timestamp_col timezone_col new_col
5991
+ 300 2014-12-10 10:00:35-08:00 GMT+10 2014-12-10 18:00:35
5992
+ 200 2015-01-08 13:00:00+12:00 America Pacific 2015-01-08 01:00:00
5993
+ 100 1000-01-10 23:00:12-02:00 GMT 1000-01-11 01:00:12
5994
+ >>> new_df1.tdtypes
5995
+ id int
5996
+ timestamp_col str
5997
+ timezone_col str
5998
+ new_col datetime.datetime
5999
+
6000
+ # Example 6: Let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR,
6001
+ # using format.
6002
+ >>> new_df2 = df1.assign(new_col = df1.id.cast(VARCHAR, format='zzz.zz'))
6003
+ id timestamp_col timezone_col new_col
6004
+ 300 2014-12-10 10:00:35-08:00 GMT+10 300.00
6005
+ 200 2015-01-08 13:00:00+12:00 America Pacific 200.00
6006
+ 100 1000-01-10 23:00:12-02:00 GMT 100.00
6007
+ >>> new_df2.dtypes
6008
+ id int
6009
+ timestamp_col str
6010
+ timezone_col str
6011
+ new_col str
6012
+
6013
+ # Example 7: Let's try creating a new DataFrame casting 'timestamp_with_timezone' column (of type TIMESTAMP) to
6014
+ # TIMESTAMP WITH TIMEZONE, with offset 'GMT+10'.
6015
+ >>> new_df3 = new_df1.assign(timestamp_with_timezone = new_df1.new_col.cast(TIMESTAMP(timezone=True), timezone='GMT+10'))
6016
+ id timestamp_col timezone_col new_col timestamp_with_timezone
6017
+ 300 2014-12-10 10:00:35-08:00 GMT+10 2014-12-10 18:00:35 2014-12-11 04:00:35.000000+10:00
6018
+ 200 2015-01-08 13:00:00+12:00 America Pacific 2015-01-08 01:00:00 2015-01-08 11:00:00.000000+10:00
6019
+ 100 1000-01-10 23:00:12-02:00 GMT 1000-01-11 01:00:12 1000-01-11 11:00:12.000000+10:00
6020
+ >>> new_df3.dtypes
6021
+ id int
6022
+ timestamp_col str
6023
+ timezone_col str
6024
+ new_col datetime.datetime
6025
+ timestamp_with_timezone datetime.datetime
6026
+ """
6027
+ # Validating Arguments
6028
+ arg_type_matrix = []
6029
+ arg_type_matrix.append(["format", format , True, (str), True])
6030
+ arg_type_matrix.append(["timezone", timezone, True, (str, ColumnExpression, int, float), True])
6031
+ _Validators._validate_function_arguments(arg_type_matrix)
5910
6032
 
5911
- RAISES:
5912
- TeradataMlException
5913
- """
5914
6033
  # If type_ is None or not specified, raise an Exception
5915
6034
  if type_ is None:
5916
6035
  raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS, 'type_'),
@@ -5921,8 +6040,26 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5921
6040
  raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, 'type_',
5922
6041
  'a valid teradatasqlalchemy type'),
5923
6042
  MessageCodes.UNSUPPORTED_DATATYPE)
5924
-
5925
6043
  expression = func.cast(self.expression, type_=type_).label(self.name)
6044
+ if format or timezone:
6045
+ # Casting to VARCHAR or CHAR with format require this type of query
6046
+ # CAST((CAST (F1 AS FORMAT 'format_str')) AS [CHAR|VARCHAR])
6047
+ if isinstance(type_, (VARCHAR, CHAR)) or (isinstance(type_, type) and issubclass(type_, (VARCHAR, CHAR))):
6048
+ expression = func.cast(literal_column("""CAST({} AS FORMAT '{}')""".format(self.compile(), format)), type_=type_)
6049
+ else:
6050
+ # Compile _TDType to string
6051
+ type_compiler = td_type_compiler(td_dialect)
6052
+ type_expression = type_compiler.process(type_) if not isinstance(type_, type) else type_compiler.process(type_())
6053
+ # Create a query with format and timezone string
6054
+ # CAST(TIMESTAMP "column_name" AS "_TDType" FORMAT "format" AT TIMEZONE "timezone_str")
6055
+ format = " FORMAT '{}'".format(format) if format else ""
6056
+ if timezone and isinstance(timezone, _SQLColumnExpression):
6057
+ timezone = _SQLColumnExpression(literal_column(f' AT TIME ZONE {timezone.compile()}')).compile()
6058
+ elif timezone:
6059
+ timezone = _SQLColumnExpression(literal_column(_SQLColumnExpression._timezone_string(timezone))).compile()
6060
+ else:
6061
+ timezone = ""
6062
+ expression = literal_column("""CAST({} AS {}{}{})""".format(self.compile(), type_expression, timezone, format), type_=type_)
5926
6063
  return _SQLColumnExpression(expression)
5927
6064
 
5928
6065
  def __hash__(self):
@@ -10088,3 +10225,588 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10088
10225
  return list(set(result))
10089
10226
 
10090
10227
  return []
10228
+
10229
+ def alias(self, name):
10230
+ """
10231
+ DESCRIPTION:
10232
+ Function to returns this column with aliased name.
10233
+
10234
+ PARAMETERS:
10235
+ name:
10236
+ Required Argument.
10237
+ Specifies the column name.
10238
+ Type: str
10239
+
10240
+ RAISES:
10241
+ TypeError, ValueError
10242
+
10243
+ RETURNS:
10244
+ ColumnExpression
10245
+
10246
+ EXAMPLES:
10247
+ # Load the data to run the example.
10248
+ >>> load_example_data("dataframe", "admissions_train")
10249
+
10250
+ # Create a DataFrame on 'admissions_train' table.
10251
+ >>> df = DataFrame("admissions_train")
10252
+ >>> df
10253
+ masters gpa stats programming admitted
10254
+ id
10255
+ 38 yes 2.65 Advanced Beginner 1
10256
+ 7 yes 2.33 Novice Novice 1
10257
+ 26 yes 3.57 Advanced Advanced 1
10258
+ 5 no 3.44 Novice Novice 0
10259
+ 3 no 3.70 Novice Beginner 1
10260
+ 22 yes 3.46 Novice Beginner 0
10261
+ 24 no 1.87 Advanced Novice 1
10262
+ 36 no 3.00 Advanced Novice 0
10263
+ 19 yes 1.98 Advanced Advanced 0
10264
+ 40 yes 3.95 Novice Beginner 0
10265
+
10266
+ # Example 1: Alias the resultant column after aggregation with "count_program".
10267
+ >>> res = df.agg(df.programming.count().alias("count_program"))
10268
+ >>> res
10269
+ count_program
10270
+ 0 40
10271
+
10272
+ """
10273
+
10274
+ # Validate argument types
10275
+ arg_type_matrix = [["name", name , True, (str), True]]
10276
+ _Validators._validate_function_arguments(arg_type_matrix)
10277
+
10278
+ self.alias_name = name
10279
+ return self
10280
+
10281
+ @staticmethod
10282
+ def _timezone_string(value):
10283
+ """
10284
+ DESCRIPTION:
10285
+ Function to return timezone string in correct format.
10286
+
10287
+ PARAMETERS:
10288
+ value:
10289
+ Required Argument.
10290
+ Specifies timezone string.
10291
+ Types: str, int , float
10292
+
10293
+ RETURNS:
10294
+ bool
10295
+ """
10296
+ if isinstance(value, (float, int)):
10297
+ return " AT TIME ZONE {}".format(value)
10298
+ if value.upper() not in ['LOCAL']:
10299
+ return " AT TIME ZONE '{}'".format(value)
10300
+ return " AT {}".format(value)
10301
+
10302
+ def to_timestamp(self, format=None, type_=TIMESTAMP, timezone=None):
10303
+ """
10304
+ DESCRIPTION:
10305
+ Converts string or integer to a TIMESTAMP data type or TIMESTAMP WITH
10306
+ TIME ZONE data type.
10307
+ Note:
10308
+ * POSIX epoch conversion is implicit in the "to_timestamp" when column
10309
+ is integer type. POSIX epoch is the number of seconds that have elapsed
10310
+ since midnight Coordinated Universal Time (UTC) of January 1, 1970.
10311
+
10312
+ PARAMETERS:
10313
+ format:
10314
+ Specifies the format of string column.
10315
+ Argument is not required when column is integer type, Otherwise Required.
10316
+ For valid 'format' values, see documentation on
10317
+ "to_date" or "help(df.col_name.to_date)".
10318
+ Type: ColumnExpression or str
10319
+
10320
+ type_:
10321
+ Optional Argument.
10322
+ Specifies a TIMESTAMP type or an object of a
10323
+ TIMESTAMP type that the column needs to be cast to.
10324
+ Default value: TIMESTAMP
10325
+ Permitted Values: TIMESTAMP data type
10326
+ Types: teradatasqlalchemy type or object of teradatasqlalchemy type
10327
+
10328
+ timezone:
10329
+ Optional Argument.
10330
+ Specifies the timezone string.
10331
+ For valid timezone strings, user should check Vantage documentation.
10332
+ Type: ColumnExpression or str.
10333
+
10334
+ RETURNS:
10335
+ ColumnExpression
10336
+
10337
+ EXAMPLES:
10338
+ # Load the data to run the example.
10339
+ >>> load_example_data("teradataml", "timestamp_data")
10340
+
10341
+ # Create a DataFrame on 'timestamp_data' table.
10342
+ >>> df = DataFrame("timestamp_data")
10343
+ >>> df
10344
+ id timestamp_col timestamp_col1 format_col timezone_col
10345
+ 2 2015-01-08 00:00:12.2+10:00 45678910234 YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM GMT+10
10346
+ 1 2015-01-08 13:00 878986 YYYY-MM-DD HH24:MI America Pacific
10347
+ 0 2015-01-08 00:00:12.2 123456 YYYY-MM-DD HH24:MI:SS.FF6 GMT
10348
+
10349
+ >>> df.tdtypes
10350
+ id INTEGER()
10351
+ timestamp_col VARCHAR(length=30, charset='LATIN')
10352
+ timestamp_col1 BIGINT()
10353
+ format_col VARCHAR(length=30, charset='LATIN')
10354
+ timezone_col VARCHAR(length=30, charset='LATIN')
10355
+
10356
+ # Example 1: Convert Epoch seconds to timestamp.
10357
+ >>> df.select(['id','timestamp_col1']).assign(col = df.timestamp_col1.to_timestamp())
10358
+ id timestamp_col1 col
10359
+ 2 45678910234 3417-07-05 02:10:34.000000
10360
+ 1 878986 1970-01-11 04:09:46.000000
10361
+ 0 123456 1970-01-02 10:17:36.000000
10362
+
10363
+ # Example 2: Convert timestamp string to timestamp with timezone in
10364
+ # format mentioned in column "format_col".
10365
+ >>> df.select(['id', 'timestamp_col', 'format_col']).assign(col = df.timestamp_col.to_timestamp(df.format_col, TIMESTAMP(timezone=True)))
10366
+ id timestamp_col format_col col
10367
+ 2 2015-01-08 00:00:12.2+10:00 YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM 2015-01-08 00:00:12.200000+10:00
10368
+ 1 2015-01-08 13:00 YYYY-MM-DD HH24:MI 2015-01-08 13:00:00.000000+00:00
10369
+ 0 2015-01-08 00:00:12.2 YYYY-MM-DD HH24:MI:SS.FF6 2015-01-08 00:00:12.200000+00:00
10370
+
10371
+ # Example 3: Convert Epoch seconds to timestamp with timezone in 'GMT+2' location.
10372
+ >>> df.select(['id', 'timestamp_col1', 'format_col']).assign(col = df.timestamp_col1.to_timestamp(df.format_col, TIMESTAMP(timezone=True), 'GMT+2'))
10373
+ id timestamp_col1 format_col col
10374
+ 2 45678910234 YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM 3417-07-05 04:10:34.000000+02:00
10375
+ 1 878986 YYYY-MM-DD HH24:MI 1970-01-11 06:09:46.000000+02:00
10376
+ 0 123456 YYYY-MM-DD HH24:MI:SS.FF6 1970-01-02 12:17:36.000000+02:00
10377
+
10378
+ """
10379
+ # Validating Arguments
10380
+ arg_type_matrix = []
10381
+ arg_type_matrix.append(["format", format , True, (str, ColumnExpression), True])
10382
+ arg_type_matrix.append(["timezone", timezone, True, (str, ColumnExpression, int, float), True])
10383
+ _Validators._validate_function_arguments(arg_type_matrix)
10384
+
10385
+ if not UtilFuncs._is_valid_td_type(type_):
10386
+ raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, 'type_',
10387
+ 'a valid teradatasqlalchemy type'),
10388
+ MessageCodes.UNSUPPORTED_DATATYPE)
10389
+
10390
+ _format = format.expression if isinstance(format, _SQLColumnExpression) else format
10391
+ _params = [self.expression, _format]
10392
+ # format is not required when column is of below types.
10393
+ if isinstance(self._type, (BYTEINT, SMALLINT, INTEGER, BIGINT)):
10394
+ _params.pop()
10395
+ # Use to_timestamp_tz when below 3 conditions are true.
10396
+ # Resultant query will be Example:
10397
+ # TO_TIMESTAMP('2015-10-08 00:00:12.2') or TO_TIMESTAMP_TZ('2015-10-08 00:00:12.2+03:00') based on type_
10398
+ _fun = getattr(func, "to_timestamp_tz") if isinstance(type_, TIMESTAMP) and type_.timezone and len(_params) == 2 \
10399
+ else getattr(func, "to_timestamp")
10400
+ if not timezone:
10401
+ return _SQLColumnExpression(_fun(*_params), type=type_)
10402
+
10403
+ # If user uses timezone generate query with time zone.
10404
+ # Resultant query will be Example:
10405
+ # TO_TIMESTAMP('2015-10-08 00:00:12.2') at time zone 'America Alaska',
10406
+ # TO_TIMESTAMP_TZ('2015-10-08 00:00:12.2+03:00') at time zone 'America Alaska'.
10407
+ if isinstance(timezone, _SQLColumnExpression):
10408
+ _timezone_expr = _SQLColumnExpression(literal_column(f' AT TIME ZONE {timezone.compile()}')).compile()
10409
+ else:
10410
+ _timezone_expr = _SQLColumnExpression(literal_column(_SQLColumnExpression._timezone_string(timezone))).compile()
10411
+ return _SQLColumnExpression(_SQLColumnExpression(_fun(*_params)).compile() + _timezone_expr, type=type_)
10412
+
10413
+ def extract(self, value, timezone=None):
10414
+ """
10415
+ DESCRIPTION:
10416
+ Extracts a single specified field from any DateTime, Interval or timestamp value,
10417
+ converting it to an exact numeric value.
10418
+
10419
+ PARAMETERS:
10420
+ value:
10421
+ Required Argument.
10422
+ Specifies the field which needs to be extracted.
10423
+ Permitted Values: YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, TIMEZONE_HOUR, TIMEZONE_MINUTE
10424
+ Note:
10425
+ * Permitted Values are case insensitive.
10426
+ Type: str
10427
+
10428
+ timezone:
10429
+ Optional Argument.
10430
+ Specifies the timezone string.
10431
+ For valid timezone strings, user should check Vantage documentation.
10432
+ Type: ColumnExpression or str.
10433
+
10434
+ RETURNS:
10435
+ ColumnExpression
10436
+
10437
+ EXAMPLES:
10438
+ # Load the data to run the example.
10439
+ >>> load_example_data("uaf", "Traindata")
10440
+
10441
+ # Create a DataFrame on 'Traindata' table.
10442
+
10443
+ >>> temp_df = DataFrame("Traindata")
10444
+ >>> df = temp_df.select(["seq_no", "schedule_date", "arrivalTime"])
10445
+ >>> df
10446
+ schedule_date arrivalTime
10447
+ seq_no
10448
+ 26 16/03/26 2016-03-26 12:33:05
10449
+ 24 16/03/26 2016-03-26 12:25:06
10450
+ 3 16/03/26 2016-03-26 10:52:05
10451
+ 22 16/03/26 2016-03-26 12:18:01
10452
+ 20 16/03/26 2016-03-26 12:10:06
10453
+ 18 16/03/26 2016-03-26 12:04:01
10454
+ 8 16/03/26 2016-03-26 11:15:06
10455
+ 17 16/03/26 2016-03-26 11:56:06
10456
+ 15 16/03/26 2016-03-26 11:45:00
10457
+ 13 16/03/26 2016-03-26 11:33:00
10458
+ 11 16/03/26 2016-03-26 11:26:00
10459
+
10460
+ # Example 1: Extract year from column 'schedule_date'.
10461
+ >>> df.assign(col = df.schedule_date.extract('YEAR'))
10462
+ schedule_date arrivalTime col
10463
+ seq_no
10464
+ 26 16/03/26 2016-03-26 12:33:05 2016
10465
+ 24 16/03/26 2016-03-26 12:25:06 2016
10466
+ 3 16/03/26 2016-03-26 10:52:05 2016
10467
+ 22 16/03/26 2016-03-26 12:18:01 2016
10468
+ 20 16/03/26 2016-03-26 12:10:06 2016
10469
+ 18 16/03/26 2016-03-26 12:04:01 2016
10470
+ 8 16/03/26 2016-03-26 11:15:06 2016
10471
+ 17 16/03/26 2016-03-26 11:56:06 2016
10472
+ 15 16/03/26 2016-03-26 11:45:00 2016
10473
+ 13 16/03/26 2016-03-26 11:33:00 2016
10474
+ 11 16/03/26 2016-03-26 11:26:00 2016
10475
+
10476
+ # Example 2: Extract hour from column 'arrivalTime'.
10477
+ >>> df.assign(col = df.arrivalTime.extract('HOUR'))
10478
+ schedule_date arrivalTime col
10479
+ seq_no
10480
+ 26 16/03/26 2016-03-26 12:33:05 12
10481
+ 24 16/03/26 2016-03-26 12:25:06 12
10482
+ 3 16/03/26 2016-03-26 10:52:05 10
10483
+ 22 16/03/26 2016-03-26 12:18:01 12
10484
+ 20 16/03/26 2016-03-26 12:10:06 12
10485
+ 18 16/03/26 2016-03-26 12:04:01 12
10486
+ 8 16/03/26 2016-03-26 11:15:06 11
10487
+ 17 16/03/26 2016-03-26 11:56:06 11
10488
+ 15 16/03/26 2016-03-26 11:45:00 11
10489
+
10490
+ # Example 3: Extract hour from column 'arrivalTime' with offset '-11:00'.
10491
+ >>> df.assign(col = df.arrivalTime.extract('HOUR', '-11:00'))
10492
+ schedule_date arrivalTime col
10493
+ seq_no
10494
+ 26 16/03/26 2016-03-26 12:33:05 1
10495
+ 24 16/03/26 2016-03-26 12:25:06 1
10496
+ 3 16/03/26 2016-03-26 10:52:05 23
10497
+ 22 16/03/26 2016-03-26 12:18:01 1
10498
+ 20 16/03/26 2016-03-26 12:10:06 1
10499
+ 18 16/03/26 2016-03-26 12:04:01 1
10500
+ 8 16/03/26 2016-03-26 11:15:06 0
10501
+ 17 16/03/26 2016-03-26 11:56:06 0
10502
+ 15 16/03/26 2016-03-26 11:45:00 0
10503
+
10504
+ # Example 4: Extract hour from column 'arrivalTime' with offset 10.
10505
+ >>> df.assign(col = df.arrivalTime.extract('HOUR', 10))
10506
+ schedule_date arrivalTime col
10507
+ seq_no
10508
+ 26 16/03/26 2016-03-26 12:33:05 22
10509
+ 24 16/03/26 2016-03-26 12:25:06 22
10510
+ 3 16/03/26 2016-03-26 10:52:05 20
10511
+ 22 16/03/26 2016-03-26 12:18:01 22
10512
+ 20 16/03/26 2016-03-26 12:10:06 22
10513
+ 18 16/03/26 2016-03-26 12:04:01 22
10514
+ 8 16/03/26 2016-03-26 11:15:06 21
10515
+ 17 16/03/26 2016-03-26 11:56:06 21
10516
+ 15 16/03/26 2016-03-26 11:45:00 21
10517
+ 13 16/03/26 2016-03-26 11:33:00 21
10518
+ 11 16/03/26 2016-03-26 11:26:00 21
10519
+ """
10520
+ # Validating Arguments
10521
+ arg_type_matrix = []
10522
+ arg_type_matrix.append(["value", value , True, (str), True])
10523
+ arg_type_matrix.append(["timezone", timezone, True, (str, ColumnExpression, int, float), True])
10524
+ _Validators._validate_function_arguments(arg_type_matrix)
10525
+
10526
+ # If user doesn't provide timezone simply use extract functionality.
10527
+ if not timezone:
10528
+ return _SQLColumnExpression(func.extract(value, self.expression))
10529
+
10530
+ # If user uses timezone generate query with time zone.
10531
+ if isinstance(timezone, _SQLColumnExpression):
10532
+ _timezone_expr = _SQLColumnExpression(literal_column(f' AT TIME ZONE {timezone.compile()}')).compile()
10533
+ else:
10534
+ _timezone_expr = _SQLColumnExpression(literal_column(_SQLColumnExpression._timezone_string(timezone))).compile()
10535
+ return _SQLColumnExpression(func.extract(value, literal_column('({}{})'.format(self.compile(), _timezone_expr))))
10536
+
10537
+ def to_interval(self, value=None, type_=INTERVAL_DAY_TO_SECOND):
10538
+ """
10539
+ DESCRIPTION:
10540
+ Converts a numeric value or string value into an INTERVAL_DAY_TO_SECOND or INTERVAL_YEAR_TO_MONTH value.
10541
+
10542
+ PARAMETERS:
10543
+ value:
10544
+ Optional, when column type is VARCHAR or CHAR, otherwise required.
10545
+ Specifies the unit of value for numeric value.
10546
+ when type_ is INTERVAL_DAY_TO_SECOND permitted values:
10547
+ * DAY, HOUR, MINUTE, SECOND
10548
+ when type_ is INTERVAL_YEAR_TO_MONTH permitted values:
10549
+ * YEAR, MONTH
10550
+ Note:
10551
+ * Permitted Values are case insensitive.
10552
+ Type: str or ColumnExpression
10553
+
10554
+ type_:
10555
+ Optional Argument.
10556
+ Specifies a teradatasqlalchemy type or an object of a teradatasqlalchemy type
10557
+ that the column needs to be cast to.
10558
+ Default value: TIMESTAMP
10559
+ Permitted Values: INTERVAL_DAY_TO_SECOND or INTERVAL_YEAR_TO_MONTH type.
10560
+ Types: teradatasqlalchemy type or object of teradatasqlalchemy type
10561
+
10562
+ Returns:
10563
+ ColumnExpression
10564
+
10565
+ EXAMPLES:
10566
+ # Load the data to run the example.
10567
+ >>> load_example_data("teradataml", "interval_data")
10568
+
10569
+ # Create a DataFrame on 'interval_data' table.
10570
+ >>> df = DataFrame("interval_data")
10571
+ >>> df
10572
+ id int_col value_col value_col1 str_col1 str_col2
10573
+ 2 657 MINUTE MONTH PT73H -P14M
10574
+ 3 1234 SECOND MONTH 100 04:23:59 06-10
10575
+ 1 240 HOUR YEAR P100DT4H23M59S P100Y4M
10576
+ 0 20 DAY YEAR 100 04:23:59 04-10
10577
+
10578
+ >>> df.tdtypes
10579
+ id INTEGER()
10580
+ int_col BIGINT()
10581
+ value_col VARCHAR(length=30, charset='LATIN')
10582
+ value_col1 VARCHAR(length=30, charset='LATIN')
10583
+ str_col1 VARCHAR(length=30, charset='LATIN')
10584
+ str_col2 VARCHAR(length=30, charset='LATIN')
10585
+
10586
+
10587
+ # Example 1: Convert "int_col" column to INTERVAL_DAY_TO_SECOND with value
10588
+ # provided in "value_col".
10589
+ >>> df.assign(col = df.int_col.to_interval(df.value_col))
10590
+ id int_col value_col value_col1 str_col1 str_col2 col
10591
+ 2 657 MINUTE MONTH PT73H -P14M 0 10:57:00.000000
10592
+ 3 1234 SECOND MONTH 100 04:23:59 06-10 0 00:20:34.000000
10593
+ 1 240 HOUR YEAR P100DT4H23M59S P100Y4M 10 00:00:00.000000
10594
+ 0 20 DAY YEAR 100 04:23:59 04-10 20 00:00:00.000000
10595
+
10596
+ # Example 2: Convert int_col to INTERVAL_YEAR_TO_MONTH when value = 'MONTH'.
10597
+ >>> df.assign(col = df.int_col.to_interval('MONTH', INTERVAL_YEAR_TO_MONTH))
10598
+ id int_col value_col value_col1 str_col1 str_col2 col
10599
+ 2 657 MINUTE MONTH PT73H -P14M 54-09
10600
+ 3 1234 SECOND MONTH 100 04:23:59 06-10 102-10
10601
+ 1 240 HOUR YEAR P100DT4H23M59S P100Y4M 20-00
10602
+ 0 20 DAY YEAR 100 04:23:59 04-10 1-08
10603
+
10604
+ # Example 3: Convert string column "str_col1" to INTERVAL_DAY_TO_SECOND.
10605
+ >>> df.assign(col = df.str_col1.to_interval())
10606
+ id int_col value_col value_col1 str_col1 str_col2 col
10607
+ 2 657 MINUTE MONTH PT73H -P14M 3 01:00:00.000000
10608
+ 3 1234 SECOND MONTH 100 04:23:59 06-10 100 04:23:59.000000
10609
+ 1 240 HOUR YEAR P100DT4H23M59S P100Y4M 100 04:23:59.000000
10610
+ 0 20 DAY YEAR 100 04:23:59 04-10 100 04:23:59.000000
10611
+
10612
+ # Example 4: Convert string column "str_col2" to INTERVAL_DAY_TO_MONTH.
10613
+ >>> df.assign(col = df.str_col2.to_interval(type_=INTERVAL_YEAR_TO_MONTH))
10614
+ id int_col value_col value_col1 str_col1 str_col2 col
10615
+ 2 657 MINUTE MONTH PT73H -P14M -1-02
10616
+ 3 1234 SECOND MONTH 100 04:23:59 06-10 6-10
10617
+ 1 240 HOUR YEAR P100DT4H23M59S P100Y4M 100-04
10618
+ 0 20 DAY YEAR 100 04:23:59 04-10 4-10
10619
+
10620
+ """
10621
+ # Validating Arguments
10622
+ arg_type_matrix = []
10623
+ arg_type_matrix.append(["value", value , True, (str, ColumnExpression), True])
10624
+ _Validators._validate_function_arguments(arg_type_matrix)
10625
+
10626
+ if not UtilFuncs._is_valid_td_type(type_):
10627
+ raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, 'type_',
10628
+ 'a valid teradatasqlalchemy type'),
10629
+ MessageCodes.UNSUPPORTED_DATATYPE)
10630
+
10631
+ # When column type is string, use either to_dsinterval or to_yminterval function based on "type_".
10632
+ if isinstance(self._type, (VARCHAR, CHAR)):
10633
+ _fun = (getattr(func, "to_dsinterval")) if isinstance(type_, INTERVAL_DAY_TO_SECOND)\
10634
+ or (isinstance(type_, type) and issubclass(type_, INTERVAL_DAY_TO_SECOND)) \
10635
+ else (getattr(func, "to_yminterval"))
10636
+ return _SQLColumnExpression(_fun(self.expression), type=type_)
10637
+
10638
+ # When column type is integer or float type, use either numtodsinterval or numtoyminterval
10639
+ # function based on "type_".
10640
+ _fun = (getattr(func, "numtodsinterval")) if isinstance(type_, INTERVAL_DAY_TO_SECOND) \
10641
+ or (isinstance(type_, type) and issubclass(type_, INTERVAL_DAY_TO_SECOND))\
10642
+ else (getattr(func, "numtoyminterval"))
10643
+ value = value.expression if isinstance(value, _SQLColumnExpression) else value
10644
+ return _SQLColumnExpression(_fun(self.expression, value), type=type_)
10645
+
10646
+ def parse_url(self, url_part):
10647
+ """
10648
+ DESCRIPTION:
10649
+ Extracts a specific part from the URL.
10650
+
10651
+ PARAMETERS:
10652
+ url_part:
10653
+ Required Argument.
10654
+ Specifies which part to be extracted.
10655
+ Permitted Values: HOST, PATH, QUERY, REF, PROTOCOL, FILE, AUTHORITY, USERINFO
10656
+ Type: str or ColumnExpression
10657
+
10658
+ Returns:
10659
+ ColumnExpression
10660
+
10661
+ EXAMPLES:
10662
+ # Load the data to run the example.
10663
+ >>> load_example_data("teradataml", "url_data")
10664
+
10665
+ # Create a DataFrame on 'url_data' table.
10666
+ >>> df = DataFrame("url_data")
10667
+ >>> df
10668
+ urls part
10669
+ id
10670
+ 3 https://www.facebook.com HOST
10671
+ 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO
10672
+ 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY
10673
+ 2 https://example.net/path4/path5/path6?query4=value4#fragment REF
10674
+ 0 http://example.com:8080/path FILE
10675
+ 1 ftp://example.net:21/path PATH
10676
+ 5 http://pg.example.ml/path150#fragment90 AUTHORITY
10677
+ 7 https://www.google.com PROTOCOL
10678
+
10679
+ # Example 1: Extract components from column 'urls' using column 'part'
10680
+ >>> df.assign(col = df.urls.parse_url(df.part))
10681
+ urls part col
10682
+ id
10683
+ 3 https://www.facebook.com HOST www.facebook.com
10684
+ 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO user:password
10685
+ 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None
10686
+ 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment
10687
+ 0 http://example.com:8080/path FILE /path
10688
+ 1 ftp://example.net:21/path PATH /path
10689
+ 5 http://pg.example.ml/path150#fragment90 AUTHORITY pg.example.ml
10690
+ 7 https://www.google.com PROTOCOL https
10691
+ >>>
10692
+ """
10693
+
10694
+ # Validating Arguments
10695
+ arg_type_matrix = []
10696
+ arg_type_matrix.append(["url_part", url_part, False, (str, ColumnExpression), True])
10697
+ _Validators._validate_function_arguments(arg_type_matrix)
10698
+
10699
+ # Regex pattern used to extract 'url_part' is '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'.
10700
+ # teradataml does not support regex grouping hence in some cases first used 'regex_replace' and
10701
+ # then 'regex_substr' or vice-versa.
10702
+ _part_to_extract_dict = {'HOST': _SQLColumnExpression(
10703
+ func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), '(//[^/?#]+@)|(//)|(:\d+)', ''),
10704
+ type=VARCHAR()),
10705
+ 'PATH': _SQLColumnExpression(func.regexp_substr(
10706
+ func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
10707
+ '([^?#]*)'), type=VARCHAR()),
10708
+ 'QUERY': _SQLColumnExpression(func.ltrim(func.regexp_substr(
10709
+ func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)', ''),
10710
+ '\?([^#]*)'), '?'), type=VARCHAR()),
10711
+ 'REF': _SQLColumnExpression(func.ltrim(func.regexp_substr(
10712
+ func.regexp_replace(self.expression,
10713
+ '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?', ''),
10714
+ '(#(.*))'), '#'), type=VARCHAR()),
10715
+ 'PROTOCOL': _SQLColumnExpression(
10716
+ func.rtrim(func.regexp_substr(self.expression, '^(([^:/?#]+):)'), ':'),
10717
+ type=VARCHAR()),
10718
+ 'FILE': _SQLColumnExpression(func.regexp_substr(
10719
+ func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
10720
+ '([^?#]*)(\?([^#]*))?'), type=VARCHAR()),
10721
+ 'AUTHORITY': _SQLColumnExpression(
10722
+ func.ltrim(func.regexp_substr(self.expression, '//([^/?#]*)'), '//'),
10723
+ type=VARCHAR()),
10724
+ 'USERINFO': _SQLColumnExpression(func.rtrim(func.ltrim(
10725
+ func.regexp_substr(func.regexp_substr(self.expression, '//([^/?#]*)'),
10726
+ '//[^/?#]+@'), '/'), '@'), type=VARCHAR())
10727
+ }
10728
+
10729
+ if isinstance(url_part, str):
10730
+ return _part_to_extract_dict[url_part]
10731
+
10732
+ whens = [(url_part == 'HOST', _part_to_extract_dict['HOST']),
10733
+ (url_part == 'PATH', _part_to_extract_dict['PATH'] ),
10734
+ (url_part == 'QUERY', _part_to_extract_dict['QUERY']),
10735
+ (url_part == 'REF', _part_to_extract_dict['REF']),
10736
+ (url_part == 'PROTOCOL', _part_to_extract_dict['PROTOCOL']),
10737
+ (url_part == 'FILE', _part_to_extract_dict['FILE']),
10738
+ (url_part == 'AUTHORITY', _part_to_extract_dict['AUTHORITY']),
10739
+ (url_part == 'USERINFO', _part_to_extract_dict['USERINFO'])]
10740
+
10741
+ from teradataml.dataframe.sql_functions import case
10742
+ return case(whens)
10743
+
10744
+ def log(self, base):
10745
+ """
10746
+ DESCRIPTION:
10747
+ Returns the logarithm value of the column with respect to 'base'.
10748
+
10749
+ PARAMETERS:
10750
+ base:
10751
+ Required Argument.
10752
+ Specifies base of logarithm.
10753
+ Type: int or float or ColumnExpression
10754
+
10755
+ Returns:
10756
+ ColumnExpression
10757
+
10758
+ EXAMPLES:
10759
+ # Load the data to run the example.
10760
+ >>> load_example_data("teradataml", "titanic")
10761
+
10762
+ # Create a DataFrame on 'titanic' table.
10763
+ >>> titanic = DataFrame.from_table('titanic')
10764
+ >>> df = titanic.select(["passenger", "age", "fare"])
10765
+ >>> print(df)
10766
+ age fare
10767
+ passenger
10768
+ 326 36.0 135.6333
10769
+ 183 9.0 31.3875
10770
+ 652 18.0 23.0000
10771
+ 265 NaN 7.7500
10772
+ 530 23.0 11.5000
10773
+ 122 NaN 8.0500
10774
+ 591 35.0 7.1250
10775
+ 387 1.0 46.9000
10776
+ 734 23.0 13.0000
10777
+ 795 25.0 7.8958
10778
+ >>>
10779
+
10780
+ # Example 1: Compute log values for column 'fare' using base as column 'age'.
10781
+ >>> log_df = df.assign(fare_log=df.fare.log(df.age))
10782
+ >>> print(log_df)
10783
+ age fare fare_log
10784
+ passenger
10785
+ 326 36.0 135.6333 1.370149
10786
+ 183 9.0 31.3875 1.568529
10787
+ 652 18.0 23.0000 1.084807
10788
+ 40 14.0 11.2417 0.916854
10789
+ 774 NaN 7.2250 NaN
10790
+ 366 30.0 7.2500 0.582442
10791
+ 509 28.0 22.5250 0.934704
10792
+ 795 25.0 7.8958 0.641942
10793
+ 61 22.0 7.2292 0.639955
10794
+ 469 NaN 7.7250 NaN
10795
+ >>>
10796
+ """
10797
+ # Validating Arguments
10798
+ arg_type_matrix = []
10799
+ arg_type_matrix.append(["base", base, False, (int, float, ColumnExpression), True])
10800
+ _Validators._validate_function_arguments(arg_type_matrix)
10801
+
10802
+ # Handling cases when 'base' or 'self' column values are zero or when denominator is zero
10803
+ from teradataml.dataframe.sql_functions import case
10804
+
10805
+ if not isinstance(base, _SQLColumnExpression):
10806
+ whens = case([((self != 0) & (_SQLColumnExpression(literal(base)).ln() != 0),
10807
+ (self.ln() / _SQLColumnExpression(literal(base)).ln()).cast(FLOAT))])
10808
+ else:
10809
+ whens = case([((self != 0) & (base != 0) & (base.ln() != 0),
10810
+ (self.ln() / base.ln()).cast(FLOAT))])
10811
+
10812
+ return whens