teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +315 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +95 -8
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/metadata.py +12 -3
  8. teradataml/analytics/json_parser/utils.py +7 -2
  9. teradataml/analytics/sqle/__init__.py +5 -1
  10. teradataml/analytics/table_operator/__init__.py +1 -1
  11. teradataml/analytics/uaf/__init__.py +1 -1
  12. teradataml/analytics/utils.py +4 -0
  13. teradataml/analytics/valib.py +18 -4
  14. teradataml/automl/__init__.py +51 -6
  15. teradataml/automl/data_preparation.py +59 -35
  16. teradataml/automl/data_transformation.py +58 -33
  17. teradataml/automl/feature_engineering.py +27 -12
  18. teradataml/automl/model_training.py +73 -46
  19. teradataml/common/constants.py +88 -29
  20. teradataml/common/garbagecollector.py +2 -1
  21. teradataml/common/messagecodes.py +19 -3
  22. teradataml/common/messages.py +6 -1
  23. teradataml/common/sqlbundle.py +64 -12
  24. teradataml/common/utils.py +246 -47
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +161 -27
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/byom_example.json +11 -0
  29. teradataml/data/dataframe_example.json +18 -2
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  37. teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
  38. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  39. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  40. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  41. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  42. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  43. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  44. teradataml/data/hnsw_alter_data.csv +5 -0
  45. teradataml/data/hnsw_data.csv +10 -0
  46. teradataml/data/jsons/byom/h2opredict.json +1 -1
  47. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  48. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  49. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  50. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  51. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  52. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  53. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  54. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  55. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  56. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  57. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  58. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  59. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  60. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  61. teradataml/data/medical_readings.csv +101 -0
  62. teradataml/data/patient_profile.csv +101 -0
  63. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  64. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  65. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  66. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  67. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  68. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  69. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  70. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  71. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  72. teradataml/data/target_udt_data.csv +8 -0
  73. teradataml/data/templates/open_source_ml.json +3 -2
  74. teradataml/data/teradataml_example.json +8 -0
  75. teradataml/data/vectordistance_example.json +4 -0
  76. teradataml/dataframe/copy_to.py +8 -3
  77. teradataml/dataframe/data_transfer.py +11 -1
  78. teradataml/dataframe/dataframe.py +1049 -285
  79. teradataml/dataframe/dataframe_utils.py +152 -20
  80. teradataml/dataframe/functions.py +578 -35
  81. teradataml/dataframe/setop.py +11 -6
  82. teradataml/dataframe/sql.py +185 -16
  83. teradataml/dbutils/dbutils.py +1049 -115
  84. teradataml/dbutils/filemgr.py +48 -1
  85. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  86. teradataml/lib/aed_0_1.dll +0 -0
  87. teradataml/opensource/__init__.py +1 -1
  88. teradataml/opensource/_base.py +1466 -0
  89. teradataml/opensource/_class.py +464 -0
  90. teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
  91. teradataml/opensource/_lightgbm.py +949 -0
  92. teradataml/opensource/_sklearn.py +1008 -0
  93. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
  94. teradataml/options/__init__.py +54 -38
  95. teradataml/options/configure.py +131 -27
  96. teradataml/options/display.py +13 -2
  97. teradataml/plot/axis.py +47 -8
  98. teradataml/plot/figure.py +33 -0
  99. teradataml/plot/plot.py +63 -13
  100. teradataml/scriptmgmt/UserEnv.py +5 -5
  101. teradataml/scriptmgmt/lls_utils.py +130 -40
  102. teradataml/store/__init__.py +12 -0
  103. teradataml/store/feature_store/__init__.py +0 -0
  104. teradataml/store/feature_store/constants.py +291 -0
  105. teradataml/store/feature_store/feature_store.py +2318 -0
  106. teradataml/store/feature_store/models.py +1505 -0
  107. teradataml/table_operators/Apply.py +32 -18
  108. teradataml/table_operators/Script.py +3 -1
  109. teradataml/table_operators/TableOperator.py +3 -1
  110. teradataml/table_operators/query_generator.py +3 -0
  111. teradataml/table_operators/table_operator_query_generator.py +3 -1
  112. teradataml/table_operators/table_operator_util.py +37 -38
  113. teradataml/table_operators/templates/dataframe_register.template +69 -0
  114. teradataml/utils/dtypes.py +51 -2
  115. teradataml/utils/internal_buffer.py +18 -0
  116. teradataml/utils/validators.py +99 -8
  117. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
  118. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
  119. teradataml/libaed_0_1.dylib +0 -0
  120. teradataml/libaed_0_1.so +0 -0
  121. teradataml/opensource/sklearn/__init__.py +0 -1
  122. teradataml/opensource/sklearn/_class.py +0 -255
  123. teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
  124. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  125. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  126. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -1,23 +1,37 @@
1
- from teradatasqlalchemy.types import VARCHAR
1
+ import pandas as pd
2
+ from inspect import getsource
3
+ import re
4
+ from types import FunctionType
5
+ from teradataml.dbutils.filemgr import install_file, list_files, remove_file
6
+ from teradataml.options.configure import configure
7
+ import teradatasqlalchemy as tdsqlalchemy
2
8
  from teradataml.utils.validators import _Validators
3
9
  from teradataml.dataframe.sql import _SQLColumnExpression
4
- from teradatasqlalchemy import (BYTEINT, SMALLINT, INTEGER, BIGINT, DECIMAL, FLOAT,
5
- NUMBER)
6
- from teradatasqlalchemy import (TIMESTAMP, DATE, TIME)
7
- from teradatasqlalchemy import (CHAR, VARCHAR, CLOB)
8
- from teradatasqlalchemy import (BYTE, VARBYTE, BLOB)
9
- from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP)
10
- from teradatasqlalchemy import (INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
11
- INTERVAL_DAY,INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
12
- INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
13
- INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
14
- INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND,
15
- INTERVAL_SECOND)
10
+ from teradatasqlalchemy import VARCHAR, CLOB, CHAR
11
+ from teradataml.common.constants import TeradataTypes
12
+ from teradataml.common.utils import UtilFuncs
13
+ from teradataml.utils.dtypes import _Dtypes
14
+ from teradataml.dataframe.sql_interfaces import ColumnExpression
15
+ from teradataml.table_operators.table_operator_util import _TableOperatorUtils
16
+ from teradataml.utils.internal_buffer import _InternalBuffer
17
+ from teradataml.common.exceptions import TeradataMlException
18
+ from teradataml.common.messages import Messages
19
+ from teradataml.common.messagecodes import MessageCodes
20
+ from teradataml.scriptmgmt.lls_utils import get_env
16
21
 
17
22
  def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',', quotechar=None):
18
23
  """
19
24
  DESCRIPTION:
20
25
  Creates a user defined function (UDF).
26
+
27
+ Notes:
28
+ 1. Date and time data types must be formatted to supported formats.
29
+ (See Prerequisite Input and Output Structures in Open Analytics Framework for more details.)
30
+ 2. Packages required to run the user defined function must be installed in remote user
31
+ environment using install_lib method of UserEnv class. Import statements of these
32
+ packages should be inside the user defined function itself.
33
+ 3. Do not call a regular function defined outside the udf() from the user defined function.
34
+ The function definition and call must be inside the udf(). Look at Example 9 to understand more.
21
35
 
22
36
  PARAMETERS:
23
37
  user_function:
@@ -26,12 +40,12 @@ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',
26
40
  teradataml DataFrame.
27
41
  Types: function
28
42
  Note:
29
- 1. Lambda Function are not supported.
43
+ Lambda functions are not supported. Re-write the lambda function as regular Python function to use with UDF.
30
44
 
31
45
  returns:
32
46
  Optional Argument.
33
47
  Specifies the output column type.
34
- Types: teradata type
48
+ Types: teradatasqlalchemy types object
35
49
  Default: VARCHAR(1024)
36
50
 
37
51
  env_name:
@@ -77,15 +91,6 @@ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',
77
91
  RAISES:
78
92
  TeradataMLException
79
93
 
80
- NOTES:
81
- 1. While working on date and time data types one must format these to supported formats.
82
- (See Requisite Input and Output Structures in Open Analytics Framework for more details.)
83
- 2. Required packages to run the user defined function must be installed in remote user
84
- environment using install_lib function Of UserEnv class. Import statements of these
85
- packages should be inside the user defined function itself.
86
- 3. One can't call a regular function defined outside the udf from the user defined function.
87
- The function definition and call must be inside the udf. Look at Example 9 to understand more.
88
-
89
94
  EXAMPLES:
90
95
  # Load the data to run the example.
91
96
  >>> load_example_data("dataframe", "sales")
@@ -309,17 +314,8 @@ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',
309
314
  Red Inc 200.0 150.0 140.0 NaN 17/01/04 2021-10-06
310
315
  >>>
311
316
  """
312
-
313
- allowed_datatypes = (BYTEINT, SMALLINT, INTEGER, BIGINT, DECIMAL, FLOAT, NUMBER,
314
- TIMESTAMP, DATE, TIME, CHAR, VARCHAR, CLOB, BYTE, VARBYTE,
315
- BLOB, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP,
316
- INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
317
- INTERVAL_DAY, INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
318
- INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
319
- INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
320
- INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND, INTERVAL_SECOND
321
- )
322
317
 
318
+ allowed_datatypes = TeradataTypes.TD_ALL_TYPES.value
323
319
  # Validate datatypes in returns.
324
320
  _Validators._validate_function_arguments([["returns", returns, False, allowed_datatypes]])
325
321
 
@@ -336,4 +332,551 @@ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',
336
332
  def func_(*args):
337
333
  return _SQLColumnExpression(expression=None, udf=user_function, udf_type=returns, udf_args=args,\
338
334
  env_name=env_name, delimiter=delimiter, quotechar=quotechar)
339
- return func_
335
+ return func_
336
+
337
+
338
+ def register(name, user_function, returns=VARCHAR(1024)):
339
+ """
340
+ DESCRIPTION:
341
+ Registers a user defined function (UDF).
342
+
343
+ Notes:
344
+ 1. Date and time data types must be formatted to supported formats.
345
+ (See Requisite Input and Output Structures in Open Analytics Framework for more details.)
346
+ 2. On VantageCloud Lake, user defined function is registered by default in the 'openml_env' environment.
347
+ User can register it in their own user environment, using the 'openml_user_env' configuration option.
348
+
349
+ PARAMETERS:
350
+ name:
351
+ Required Argument.
352
+ Specifies the name of the user defined function to register.
353
+ Types: str
354
+
355
+ user_function:
356
+ Required Argument.
357
+ Specifies the user defined function to create a column for
358
+ teradataml DataFrame.
359
+ Types: function, udf
360
+ Note:
361
+ Lambda functions are not supported. Re-write the lambda function as regular Python function to use with UDF.
362
+
363
+ returns:
364
+ Optional Argument.
365
+ Specifies the output column type used to register the user defined function.
366
+ Note:
367
+ * If 'user_function' is a udf, then return type of the udf is used as return type
368
+ of the registered user defined function.
369
+ Default Value: VARCHAR(1024)
370
+ Types: teradatasqlalchemy types object
371
+
372
+ RETURNS:
373
+ None
374
+
375
+ RAISES:
376
+ TeradataMLException, TypeError
377
+
378
+ EXAMPLES:
379
+ # Example 1: Register the user defined function to get the values upper case.
380
+ >>> from teradataml.dataframe.functions import udf, register
381
+ >>> @udf
382
+ ... def to_upper(s):
383
+ ... if s is not None:
384
+ ... return s.upper()
385
+ >>>
386
+ # Register the created user defined function.
387
+ >>> register("upper_val", to_upper)
388
+ >>>
389
+
390
+ # Example 2: Register a user defined function to get factorial of a number and
391
+ # store the result in Integer type column.
392
+ >>> from teradataml.dataframe.functions import udf, register
393
+ >>> from teradatasqlalchemy.types import INTEGER
394
+ >>> @udf
395
+ ... def factorial(n):
396
+ ... import math
397
+ ... return math.factorial(n)
398
+ >>>
399
+ # Register the created user defined function.
400
+ >>> register("fact", factorial, INTEGER())
401
+ >>>
402
+
403
+ # Example 3: Register a Python function to get the values upper case.
404
+ >>> from teradataml.dataframe.functions import register
405
+ >>> def to_upper(s):
406
+ ... return s.upper()
407
+ >>>
408
+ # Register the created Python function.
409
+ >>> register("upper_val", to_upper)
410
+ >>>
411
+ """
412
+
413
+ # Validate the arguments.
414
+ arg_matrix = []
415
+ allowed_datatypes = TeradataTypes.TD_ALL_TYPES.value
416
+ arg_matrix.append(["returns", returns, True, allowed_datatypes])
417
+ arg_matrix.append(["name", name, False, str])
418
+ _Validators._validate_function_arguments(arg_matrix)
419
+
420
+ function = []
421
+ # Check if the user_function is Python function or
422
+ # a user defined function(udf) or ColumnExpression returned by udf.
423
+ if isinstance(user_function, ColumnExpression):
424
+ function.append(user_function._udf)
425
+ returns = user_function._type
426
+ elif "udf.<locals>" not in user_function.__qualname__:
427
+ function.append(user_function)
428
+ else:
429
+ user_function = user_function.__call__()
430
+ function.append(user_function._udf)
431
+ returns = user_function._type
432
+
433
+ # Create a dictionary of user defined function name to return type.
434
+ returns = {name: _create_return_type(returns)}
435
+
436
+ exec_mode = 'REMOTE' if UtilFuncs._is_lake() else 'IN-DB'
437
+
438
+ tbl_operators = _TableOperatorUtils([],
439
+ None,
440
+ "register",
441
+ function,
442
+ exec_mode,
443
+ chunk_size=None,
444
+ num_rows=1,
445
+ delimiter=None,
446
+ quotechar=None,
447
+ data_partition_column=None,
448
+ data_hash_column=None,
449
+ style = "csv",
450
+ returns = returns,
451
+ )
452
+
453
+ # Install the file on the lake/enterprise environment.
454
+ if exec_mode == 'REMOTE':
455
+ _Validators._check_auth_token("register")
456
+ env_name = UtilFuncs._get_env_name()
457
+ tbl_operators.__env = get_env(env_name)
458
+ tbl_operators.__env.install_file(tbl_operators.script_path, suppress_output=True, replace=True)
459
+ else:
460
+ install_file(file_identifier=tbl_operators.script_base_name,
461
+ file_path=tbl_operators.script_path,
462
+ suppress_output=True, replace=True)
463
+
464
+
465
+ def call_udf(udf_name, func_args = () , **kwargs):
466
+ """
467
+ DESCRIPTION:
468
+ Call a registered user defined function (UDF).
469
+
470
+ Notes:
471
+ 1. Packages required to run the registered user defined function must be installed in remote user
472
+ environment using install_lib method of UserEnv class. Import statements of these
473
+ packages should be inside the user defined function itself.
474
+ 2. On VantageCloud Lake, user defined function runs by default in the 'openml_env' environment.
475
+ User can use their own user environment, using the 'openml_user_env' configuration option.
476
+
477
+ PARAMETERS:
478
+ udf_name:
479
+ Required Argument.
480
+ Specifies the name of the registered user defined function.
481
+ Types: str
482
+
483
+ func_args:
484
+ Optional Argument.
485
+ Specifies the arguments to pass to the registered UDF.
486
+ Default Value: ()
487
+ Types: tuple
488
+
489
+ delimiter:
490
+ Optional Argument.
491
+ Specifies a delimiter to use when reading columns from a row and
492
+ writing result columns.
493
+ Notes:
494
+ * This argument cannot be same as "quotechar" argument.
495
+ * This argument cannot be a newline character.
496
+ * Use a different delimiter if categorial columns in the data contains
497
+ a character same as the delimiter.
498
+ Default Value: ','
499
+ Types: one character string
500
+
501
+ quotechar:
502
+ Optional Argument.
503
+ Specifies a character that forces input of the user function
504
+ to be quoted using this specified character.
505
+ Using this argument enables the Analytics Database to
506
+ distinguish between NULL fields and empty strings.
507
+ A string with length zero is quoted, while NULL fields are not.
508
+ Notes:
509
+ * This argument cannot be same as "delimiter" argument.
510
+ * This argument cannot be a newline character.
511
+ Default Value: None
512
+ Types: one character string
513
+
514
+ RETURNS:
515
+ ColumnExpression
516
+
517
+ RAISES:
518
+ TeradataMLException
519
+
520
+ EXAMPLES:
521
+ # Load the data to run the example.
522
+ >>> load_example_data("dataframe", "sales")
523
+
524
+ # Create a DataFrame on 'sales' table.
525
+ >>> import random
526
+ >>> dfsales = DataFrame("sales")
527
+ >>> df = dfsales.assign(id = case([(df.accounts == 'Alpha Co', random.randrange(1, 9)),
528
+ ... (df.accounts == 'Blue Inc', random.randrange(1, 9)),
529
+ ... (df.accounts == 'Jones LLC', random.randrange(1, 9)),
530
+ ... (df.accounts == 'Orange Inc', random.randrange(1, 9)),
531
+ ... (df.accounts == 'Yellow Inc', random.randrange(1, 9)),
532
+ ... (df.accounts == 'Red Inc', random.randrange(1, 9))]))
533
+
534
+ # Example 1: Register and Call the user defined function to get the values upper case.
535
+ >>> from teradataml.dataframe.functions import udf, register, call_udf
536
+ >>> @udf
537
+ ... def to_upper(s):
538
+ ... if s is not None:
539
+ ... return s.upper()
540
+ >>>
541
+ # Register the created user defined function with name "upper".
542
+ >>> register("upper", to_upper)
543
+ >>>
544
+ # Call the user defined function registered with name "upper" and assign the
545
+ # ColumnExpression returned to the DataFrame.
546
+ >>> res = df.assign(upper_col = call_udf("upper", ('accounts',)))
547
+ >>> res
548
+ Feb Jan Mar Apr datetime id upper_col
549
+ accounts
550
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 4 YELLOW INC
551
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2 ALPHA CO
552
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 5 JONES LLC
553
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 3 RED INC
554
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 1 BLUE INC
555
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 4 ORANGE INC
556
+ >>>
557
+
558
+ # Example 2: Register and Call user defined function to get factorial of a number
559
+ # and store the result in Integer type column.
560
+ >>> from teradataml.dataframe.functions import udf, register
561
+ >>> @udf(returns = INTEGER())
562
+ ... def factorial(n):
563
+ ... import math
564
+ ... return math.factorial(n)
565
+ >>>
566
+ # Register the created user defined function with name "fact".
567
+ >>> from teradatasqlalchemy.types import INTEGER
568
+ >>> register("fact", factorial)
569
+ >>>
570
+ # Call the user defined function registered with name "fact" and assign the
571
+ # ColumnExpression returned to the DataFrame.
572
+ >>> res = df.assign(fact_col = call_udf("fact", ('id',)))
573
+ >>> res
574
+ Feb Jan Mar Apr datetime id fact_col
575
+ accounts
576
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 5 120
577
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 4 24
578
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 3 6
579
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 1 1
580
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2 2
581
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 4 24
582
+ >>>
583
+
584
+ # Example 3: Register and Call the Python function to get the values upper case.
585
+ >>> from teradataml.dataframe.functions import register, call_udf
586
+ >>> def to_upper(s):
587
+ ... return s.upper()
588
+ >>>
589
+ # Register the created Python function with name "upper".
590
+ >>> register("upper", to_upper, returns = VARCHAR(1024))
591
+ >>>
592
+ # Call the Python function registered with name "upper" and assign the
593
+ # ColumnExpression returned to the DataFrame.
594
+ >>> res = df.assign(upper_col = call_udf("upper", ('accounts',)))
595
+ >>> res
596
+ Feb Jan Mar Apr datetime id upper_col
597
+ accounts
598
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 4 YELLOW INC
599
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2 ALPHA CO
600
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 5 JONES LLC
601
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 3 RED INC
602
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 1 BLUE INC
603
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 4 ORANGE INC
604
+ >>>
605
+ """
606
+ env = None
607
+ delimiter = kwargs.pop('delimiter', ',')
608
+ quotechar = kwargs.pop('quotechar', None)
609
+ unknown_args = list(kwargs.keys())
610
+ if len(unknown_args) > 0:
611
+ raise TypeError(Messages.get_message(MessageCodes.UNKNOWN_ARGUMENT,
612
+ "call_udf", unknown_args[0]))
613
+
614
+ if UtilFuncs._is_lake():
615
+ _Validators._check_auth_token("call_udf")
616
+ env = get_env(UtilFuncs._get_env_name())
617
+ file_list = env.files
618
+ if file_list is None:
619
+ raise TeradataMlException(Messages.get_message(
620
+ MessageCodes.FUNC_EXECUTION_FAILED, "'call_udf'", "No UDF is registered with the name '{}'.".format(udf_name)),
621
+ MessageCodes.FUNC_EXECUTION_FAILED)
622
+ file_column = 'File'
623
+ else:
624
+ file_list = list_files().to_pandas()
625
+ file_column = 'Files'
626
+
627
+ # Get the script name from the environment that starts with tdml_udf_name_<udf_name>_.
628
+ script_file = [file for file in file_list[file_column] if file.startswith('tdml_udf_name_{}_udf_type_'.format(udf_name))]
629
+ if len(script_file) != 1:
630
+ raise TeradataMlException(Messages.get_message(
631
+ MessageCodes.FUNC_EXECUTION_FAILED, "'call_udf'", "Multiple UDFs or no UDF is registered with the name '{}'.".format(udf_name)),
632
+ MessageCodes.FUNC_EXECUTION_FAILED)
633
+
634
+ script_name = script_file[0]
635
+ # Get the return type from the script name.
636
+ x = re.search(r"tdml_udf_name_{}_udf_type_([A-Z_]+)(\d*)_register".format(udf_name), script_name)
637
+ returns = getattr(tdsqlalchemy, x.group(1))
638
+ # If the return type has length, get the length from the script name.
639
+ returns = returns(x.group(2)) if x.group(2) else returns()
640
+
641
+ return _SQLColumnExpression(expression=None, udf_args = func_args, udf_script = script_name, udf_type=returns,\
642
+ delimiter=delimiter, quotechar=quotechar, env_name=env)
643
+
644
+
645
+ def list_udfs(show_files=False):
646
+ """
647
+ DESCRIPTION:
648
+ List all the UDFs registered using 'register()' function.
649
+
650
+ PARAMETERS:
651
+ show_files:
652
+ Optional Argument.
653
+ Specifies whether to show file names or not.
654
+ Default Value: False
655
+ Types: bool
656
+
657
+ RETURNS:
658
+ Pandas DataFrame containing files and it's details or
659
+ None if DataFrame is empty.
660
+
661
+ RAISES:
662
+ TeradataMLException.
663
+
664
+ EXAMPLES:
665
+ # Example 1: Register the user defined function to get the values in lower case,
666
+ then list all the UDFs registered.
667
+ >>> @udf
668
+ ... def to_lower(s):
669
+ ... if s is not None:
670
+ ... return s.lower()
671
+
672
+ # Register the created user defined function.
673
+ >>> register("lower", to_lower)
674
+
675
+ # List all the UDFs registered
676
+ >>> list_udfs(True)
677
+ id name return_type file_name
678
+ 0 lower VARCHAR1024 tdml_udf_name_lower_udf_type_VARCHAR1024_register.py
679
+ 1 upper VARCHAR1024 tdml_udf_name_upper_udf_type_VARCHAR1024_register.py
680
+ 2 add_date DATE tdml_udf_name_add_date_udf_type_DATE_register.py
681
+ 3 sum_cols INTEGER tdml_udf_name_sum_cols_udf_type_INTEGER_register.py
682
+ >>>
683
+ """
684
+
685
+ if UtilFuncs._is_lake():
686
+ _Validators._check_auth_token("list_udfs")
687
+ env_name = UtilFuncs._get_env_name()
688
+ _df = get_env(env_name).files
689
+ if _df is not None:
690
+ # rename the existing DataFrame Column
691
+ _df.rename(columns={'File': 'Files'}, inplace=True)
692
+ _df = _df[_df['Files'].str.startswith('tdml_udf_') & _df['Files'].str.endswith('_register.py')][['Files']]
693
+ if len(_df) == 0:
694
+ print("No files found in remote user environment {}.".format(env_name))
695
+ else:
696
+ return _create_udf_dataframe(_df, show_files)
697
+
698
+ else:
699
+ _df = list_files()
700
+ _df = _df[_df['Files'].startswith('tdml_udf_') & _df['Files'].endswith('_register.py')].to_pandas()
701
+ if len(_df) == 0:
702
+ print("No files found in Vantage")
703
+ else:
704
+ return _create_udf_dataframe(_df, show_files)
705
+
706
+ def _create_udf_dataframe(pandas_df, show_files=False):
707
+ """
708
+ DESCRIPTION:
709
+ Internal function to return pandas DataFrame with
710
+ column names "id", "name", "return_type", "filename".
711
+
712
+ PARAMETERS:
713
+ pandas_df:
714
+ Required Argument.
715
+ Specifies the pandas DataFrame containing one column 'Files'.
716
+ Types: pandas DataFrame
717
+
718
+ show_files:
719
+ Optional Argument.
720
+ Specifies whether to show file names or not.
721
+ Types: bool
722
+
723
+ RETURNS:
724
+ pandas DataFrame.
725
+
726
+ EXAMPLES:
727
+ >>> _create_udf_dataframe(pandas_dataframe)
728
+
729
+ """
730
+ _lists = pandas_df.values.tolist()
731
+ _data = {"id": [], "name": [], "return_type": []}
732
+ if show_files:
733
+ _data.update({"file_name": []})
734
+
735
+ for _counter, _list in enumerate(_lists):
736
+ # Extract udf name and type "tdml_udf_name_fact_udf_type_VARCHAR1024_register.py" -> ['fact', 'VARCHAR1024']
737
+ value = _list[0][14:-12].split('_udf_type_')
738
+ _data["id"].append(_counter)
739
+ _data["name"].append(value[0])
740
+ _data["return_type"].append(value[1])
741
+ if show_files:
742
+ _data["file_name"].append(_list[0])
743
+ return pd.DataFrame(_data)
744
+
745
+
746
+ def deregister(name, returns=None):
747
+ """
748
+ DESCRIPTION:
749
+ Deregisters a user defined function (UDF).
750
+
751
+ PARAMETERS:
752
+ name:
753
+ Required Argument.
754
+ Specifies the name of the user defined function to deregister.
755
+ Types: str
756
+
757
+ returns:
758
+ Optional Argument.
759
+ Specifies the type used to deregister the user defined function.
760
+ Types: teradatasqlalchemy types object
761
+
762
+ RETURNS:
763
+ None
764
+
765
+ RAISES:
766
+ TeradataMLException.
767
+
768
+ EXAMPLES:
769
+ # Example 1: Register the user defined function to get the values in lower case,
770
+ # then deregister it.
771
+ >>> @udf
772
+ ... def to_lower(s):
773
+ ... if s is not None:
774
+ ... return s.lower()
775
+
776
+ # Register the created user defined function.
777
+ >>> register("lower", to_lower)
778
+
779
+ # List all the UDFs registered
780
+ >>> list_udfs(True)
781
+ id name return_type file_name
782
+ 0 lower VARCHAR1024 tdml_udf_name_lower_udf_type_VARCHAR1024_register.py
783
+ 1 upper VARCHAR1024 tdml_udf_name_upper_udf_type_VARCHAR1024_register.py
784
+ 2 add_date DATE tdml_udf_name_add_date_udf_type_DATE_register.py
785
+ 3 sum_cols INTEGER tdml_udf_name_sum_cols_udf_type_INTEGER_register.py
786
+ >>>
787
+
788
+ # Deregister the created user defined function.
789
+ >>> deregister("lower")
790
+
791
+ # List all the UDFs registered
792
+ >>> list_udfs(True)
793
+ id name return_type file_name
794
+ 0 upper VARCHAR1024 tdml_udf_name_upper_udf_type_VARCHAR1024_register.py
795
+ 1 add_date DATE tdml_udf_name_add_date_udf_type_DATE_register.py
796
+ 2 sum_cols INTEGER tdml_udf_name_sum_cols_udf_type_INTEGER_register.py
797
+ >>>
798
+
799
+ # Example 2: Deregister only specified udf function with it return type.
800
+ >>> @udf(returns=FLOAT())
801
+ ... def sum(x, y):
802
+ ... return len(x) + y
803
+
804
+ # Deregister the created user defined function.
805
+ >>> register("sum", sum)
806
+
807
+ # List all the UDFs registered
808
+ >>> list_udfs(True)
809
+ id name return_type file_name
810
+ 0 sum FLOAT tdml_udf_name_sum_udf_type_FLOAT_register.py
811
+ 1 sum INTEGER tdml_udf_name_sum_udf_type_INTEGER_register.py
812
+ >>>
813
+
814
+ # Deregister the created user defined function.
815
+ >>> from teradatasqlalchemy import FLOAT
816
+ >>> deregister("sum", FLOAT())
817
+
818
+ # List all the UDFs registered
819
+ >>> list_udfs(True)
820
+ id name return_type file_name
821
+ 0 sum INTEGER tdml_udf_name_sum_udf_type_INTEGER_register.py
822
+ >>>
823
+ """
824
+ _df = list_udfs(show_files=True)
825
+ # raise Exception list_udfs when DataFrame is empty
826
+ if _df is None:
827
+ raise TeradataMlException(Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
828
+ "'deregister'",
829
+ f"UDF '{name}' does not exist."),
830
+ MessageCodes.FUNC_EXECUTION_FAILED)
831
+
832
+ if returns is None:
833
+ _df = _df[_df['file_name'].str.startswith(f'tdml_udf_name_{name}_udf_type_')]
834
+ else:
835
+ _df = _df[_df['file_name'].str.startswith(f'tdml_udf_name_{name}_udf_type_{_create_return_type(returns)}_register.py')]
836
+
837
+ if len(_df) == 0:
838
+ raise TeradataMlException(Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
839
+ "'deregister'",
840
+ f"UDF '{name}' does not exist."),
841
+ MessageCodes.FUNC_EXECUTION_FAILED)
842
+
843
+ _df = _df.values.tolist()
844
+
845
+ # Remove the file on the lake/enterprise environment.
846
+ if UtilFuncs._is_lake():
847
+ env = get_env(UtilFuncs._get_env_name())
848
+ for file_name in _df:
849
+ env.remove_file(file_name[3], suppress_output=True)
850
+ else:
851
+ for file_name in _df:
852
+ remove_file(file_name[3][:-3], force_remove = True, suppress_output = True)
853
+
854
+
855
+ def _create_return_type(returns):
856
+ """
857
+ DESCRIPTION:
858
+ Internal function to return string representation of
859
+ type "returns" in such a way it is included in file name.
860
+
861
+ PARAMETERS:
862
+ returns:
863
+ Required Argument.
864
+ Specifies the teradatasqlalchemy types object.
865
+ Types: teradatasqlalchemy types object
866
+
867
+ RETURNS:
868
+ string
869
+
870
+ EXAMPLES:
871
+ >>> _create_udf_dataframe(VARCHAR(1024))
872
+ 'VARCHAR1024'
873
+ """
874
+ if isinstance(returns, (VARCHAR, CLOB, CHAR)):
875
+ # If the length is not provided, set it to empty string.
876
+ str_len = str(returns.length) if returns.length else ""
877
+ return_str = str(returns) + str_len
878
+ else:
879
+ return_str = str(returns)
880
+ # Replace the space with underscore in the return type.
881
+ return_str = return_str.replace(" ", "_")
882
+ return return_str