teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +193 -1
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +25 -18
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +20 -2
- teradataml/analytics/utils.py +15 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +341 -112
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +84 -42
- teradataml/automl/data_transformation.py +69 -33
- teradataml/automl/feature_engineering.py +76 -9
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +35 -14
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +122 -63
- teradataml/common/messagecodes.py +14 -3
- teradataml/common/messages.py +8 -4
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +366 -74
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +348 -86
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +45 -29
- teradataml/dataframe/data_transfer.py +72 -46
- teradataml/dataframe/dataframe.py +642 -166
- teradataml/dataframe/dataframe_utils.py +167 -22
- teradataml/dataframe/functions.py +135 -20
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +330 -78
- teradataml/dbutils/dbutils.py +556 -140
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -26
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +307 -40
- teradataml/scriptmgmt/lls_utils.py +428 -145
- teradataml/store/__init__.py +2 -3
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +48 -19
- teradataml/table_operators/Script.py +23 -2
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +49 -1
- teradataml/utils/internal_buffer.py +38 -0
- teradataml/utils/validators.py +377 -62
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"id","word","pos"
|
|
2
|
+
1,"roger","NN"
|
|
3
|
+
2,"federer","NN"
|
|
4
|
+
3,"born","VBN"
|
|
5
|
+
4,"on","IN"
|
|
6
|
+
5,"8","CD"
|
|
7
|
+
6,"august","NN"
|
|
8
|
+
7,"1981","CD"
|
|
9
|
+
8,",","O"
|
|
10
|
+
9,"is","VBZ"
|
|
11
|
+
10,"a","DT"
|
|
12
|
+
11,"greatest","JJS"
|
|
13
|
+
12,"tennis","NN"
|
|
14
|
+
13,"player","NN"
|
|
15
|
+
14,",","O"
|
|
16
|
+
15,"who","WP"
|
|
17
|
+
16,"has","VBZ"
|
|
18
|
+
17,"been","VBN"
|
|
19
|
+
18,"continuously","RB"
|
|
20
|
+
19,"ranked","VBN"
|
|
21
|
+
20,"inside","IN"
|
|
22
|
+
21,"the","DT"
|
|
23
|
+
22,"top","JJ"
|
|
24
|
+
23,"10","CD"
|
|
25
|
+
24,"since","IN"
|
|
26
|
+
25,"october","JJ"
|
|
27
|
+
26,"2002","CD"
|
|
28
|
+
27,"and","CC"
|
|
29
|
+
28,"has","VBZ"
|
|
30
|
+
29,"won","VBN"
|
|
31
|
+
30,"wimbledon","NN"
|
|
32
|
+
31,",","O"
|
|
33
|
+
32,"usopen","JJ"
|
|
34
|
+
33,",","O"
|
|
35
|
+
34,"australian","JJ"
|
|
36
|
+
35,"and","CC"
|
|
37
|
+
36,"frenchopen","JJ"
|
|
38
|
+
37,"titles","NNS"
|
|
39
|
+
38,"mutiple","JJ"
|
|
40
|
+
39,"times","NNS"
|
|
@@ -1356,6 +1356,10 @@
|
|
|
1356
1356
|
"format_col": "VARCHAR(50)",
|
|
1357
1357
|
"timezone_col": "VARCHAR(50)"
|
|
1358
1358
|
},
|
|
1359
|
+
"to_num_data":{
|
|
1360
|
+
"price": "VARCHAR(20)",
|
|
1361
|
+
"col_format": "VARCHAR(20)"
|
|
1362
|
+
},
|
|
1359
1363
|
"interval_data":{
|
|
1360
1364
|
"id": "INTEGER",
|
|
1361
1365
|
"int_col": "BIGINT",
|
|
@@ -1368,5 +1372,22 @@
|
|
|
1368
1372
|
"id": "INTEGER",
|
|
1369
1373
|
"urls": "VARCHAR(60)",
|
|
1370
1374
|
"part": "VARCHAR(20)"
|
|
1375
|
+
},
|
|
1376
|
+
"hnsw_data": {
|
|
1377
|
+
"id": "INTEGER",
|
|
1378
|
+
"array_col": "Vector"
|
|
1379
|
+
},
|
|
1380
|
+
"hnsw_alter_data": {
|
|
1381
|
+
"id": "INTEGER",
|
|
1382
|
+
"array_col": "Vector"
|
|
1383
|
+
},
|
|
1384
|
+
"tochar_data": {
|
|
1385
|
+
"id": "INTEGER",
|
|
1386
|
+
"int_col": "INTEGER",
|
|
1387
|
+
"float_col": "FLOAT",
|
|
1388
|
+
"date_col": "DATE",
|
|
1389
|
+
"int_format": "VARCHAR(20)",
|
|
1390
|
+
"float_format": "VARCHAR(20)",
|
|
1391
|
+
"date_format": "VARCHAR(20)"
|
|
1371
1392
|
}
|
|
1372
1393
|
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"location","tranid","period","storeid","region","item","sku","category"
|
|
2
|
+
"MiddleEast",999,"20100715",1,"west","milk,butter,eggs,flour,spinach",1,"dairy"
|
|
3
|
+
"MiddleEast",1000,"20100715",1,"west","milk,eggs,flour,spinach",1,"dairy"
|
|
4
|
+
"MiddleEast",1001,"20100715",1,"west","milk,butter,eggs",1,"dairy"
|
|
5
|
+
"MiddleEast",1002,"20100715",1,"west","milk,butter,spinach",1,"dairy"
|
|
6
|
+
"MiddleEast",1500,"20100715",3,"west","butter,eggs,flour",2,"dairy"
|
|
7
|
+
"AsiaPacific",999,"20100715",1,"west","milk,butter,eggs,flour,spinach",1,"dairy"
|
|
8
|
+
"AsiaPacific",1000,"20100715",1,"west","milk,eggs,flour,spinach",1,"dairy"
|
|
9
|
+
"AsiaPacific",1001,"20100715",1,"west","milk,butter,eggs",1,"dairy"
|
|
10
|
+
"AsiaPacific",1002,"20100715",1,"west","milk,butter,spinach",1,"dairy"
|
|
11
|
+
"AsiaPacific",1500,"20100715",3,"west","butter,eggs,flour",2,"dairy"
|
|
12
|
+
"LatinAmerica",999,"20100715",1,"west","milk,butter,eggs,flour,spinach",1,"dairy"
|
|
13
|
+
"LatinAmerica",1000,"20100715",1,"west","milk,eggs,flour,spinach",1,"dairy"
|
|
14
|
+
"LatinAmerica",1001,"20100715",1,"west","milk,butter,eggs",1,"dairy"
|
|
15
|
+
"LatinAmerica",1002,"20100715",1,"west","milk,butter,spinach",1,"dairy"
|
|
16
|
+
"LatinAmerica",1500,"20100715",3,"west","butter,eggs,flour",2,"dairy"
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"location","tranid","period","storeid","region","item","sku","category"
|
|
2
|
+
"MiddleEast",999,"20100715",1,"west","milk",1,"dairy"
|
|
3
|
+
"MiddleEast",999,"20100715",1,"west","butter",2,"dairy"
|
|
4
|
+
"MiddleEast",999,"20100715",1,"west","eggs",3,"dairy"
|
|
5
|
+
"MiddleEast",999,"19990715",1,"west","flour",4,"baking"
|
|
6
|
+
"MiddleEast",999,"19990715",1,"west","spinach",4,"produce"
|
|
7
|
+
"MiddleEast",1000,"20100715",1,"west","milk",1,"dairy"
|
|
8
|
+
"MiddleEast",1000,"20100715",1,"west","eggs",3,"dairy"
|
|
9
|
+
"MiddleEast",1000,"19990715",1,"west","flour",4,"baking"
|
|
10
|
+
"MiddleEast",1000,"19990715",1,"west","spinach",2,"produce"
|
|
11
|
+
"MiddleEast",1001,"20100715",1,"west","milk",1,"dairy"
|
|
12
|
+
"MiddleEast",1001,"20100715",1,"west","butter",2,"dairy"
|
|
13
|
+
"MiddleEast",1001,"20100715",1,"west","eggs",3,"dairy"
|
|
14
|
+
"MiddleEast",1002,"20100715",1,"west","milk",1,"dairy"
|
|
15
|
+
"MiddleEast",1002,"20100715",1,"west","butter",2,"dairy"
|
|
16
|
+
"MiddleEast",1002,"20100715",1,"west","spinach",3,"produce"
|
|
17
|
+
"MiddleEast",1500,"20100715",3,"west","butter",2,"dairy"
|
|
18
|
+
"MiddleEast",1500,"20100715",3,"west","eggs",3,"dairy"
|
|
19
|
+
"MiddleEast",1500,"20100715",3,"west","flour",4,"baking"
|
|
20
|
+
"AsiaPacific",999,"20100715",1,"west","milk",1,"dairy"
|
|
21
|
+
"AsiaPacific",999,"20100715",1,"west","butter",2,"dairy"
|
|
22
|
+
"AsiaPacific",999,"20100715",1,"west","eggs",3,"dairy"
|
|
23
|
+
"AsiaPacific",999,"19990715",1,"west","flour",4,"baking"
|
|
24
|
+
"AsiaPacific",999,"19990715",1,"west","spinach",4,"produce"
|
|
25
|
+
"AsiaPacific",1000,"20100715",1,"west","milk",1,"dairy"
|
|
26
|
+
"AsiaPacific",1000,"20100715",1,"west","eggs",3,"dairy"
|
|
27
|
+
"AsiaPacific",1000,"19990715",1,"west","flour",4,"baking"
|
|
28
|
+
"AsiaPacific",1000,"19990715",1,"west","spinach",2,"produce"
|
|
29
|
+
"AsiaPacific",1001,"20100715",1,"west","milk",1,"dairy"
|
|
30
|
+
"AsiaPacific",1001,"20100715",1,"west","butter",2,"dairy"
|
|
31
|
+
"AsiaPacific",1001,"20100715",1,"west","eggs",3,"dairy"
|
|
32
|
+
"AsiaPacific",1002,"20100715",1,"west","milk",1,"dairy"
|
|
33
|
+
"AsiaPacific",1002,"20100715",1,"west","butter",2,"dairy"
|
|
34
|
+
"AsiaPacific",1002,"20100715",1,"west","spinach",3,"produce"
|
|
35
|
+
"AsiaPacific",1500,"20100715",3,"west","butter",2,"dairy"
|
|
36
|
+
"AsiaPacific",1500,"20100715",3,"west","eggs",3,"dairy"
|
|
37
|
+
"AsiaPacific",1500,"20100715",3,"west","flour",4,"baking"
|
|
38
|
+
"LatinAmerica",999,"20100715",1,"west","milk",1,"dairy"
|
|
39
|
+
"LatinAmerica",999,"20100715",1,"west","butter",2,"dairy"
|
|
40
|
+
"LatinAmerica",999,"20100715",1,"west","eggs",3,"dairy"
|
|
41
|
+
"LatinAmerica",999,"19990715",1,"west","flour",4,"baking"
|
|
42
|
+
"LatinAmerica",999,"19990715",1,"west","spinach",4,"produce"
|
|
43
|
+
"LatinAmerica",1000,"20100715",1,"west","milk",1,"dairy"
|
|
44
|
+
"LatinAmerica",1000,"20100715",1,"west","eggs",3,"dairy"
|
|
45
|
+
"LatinAmerica",1000,"19990715",1,"west","flour",4,"baking"
|
|
46
|
+
"LatinAmerica",1000,"19990715",1,"west","spinach",2,"produce"
|
|
47
|
+
"LatinAmerica",1001,"20100715",1,"west","milk",1,"dairy"
|
|
48
|
+
"LatinAmerica",1001,"20100715",1,"west","butter",2,"dairy"
|
|
49
|
+
"LatinAmerica",1001,"20100715",1,"west","eggs",3,"dairy"
|
|
50
|
+
"LatinAmerica",1002,"20100715",1,"west","milk",1,"dairy"
|
|
51
|
+
"LatinAmerica",1002,"20100715",1,"west","butter",2,"dairy"
|
|
52
|
+
"LatinAmerica",1002,"20100715",1,"west","spinach",3,"produce"
|
|
53
|
+
"LatinAmerica",1500,"20100715",3,"west","butter",2,"dairy"
|
|
54
|
+
"LatinAmerica",1500,"20100715",3,"west","eggs",3,"dairy"
|
|
55
|
+
"LatinAmerica",1500,"20100715",3,"west","flour",4,"baking"
|
teradataml/dataframe/copy_to.py
CHANGED
|
@@ -297,10 +297,10 @@ def copy_to_sql(df, table_name,
|
|
|
297
297
|
>>> from teradatasqlalchemy.types import *
|
|
298
298
|
|
|
299
299
|
>>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
300
|
+
... 'emp_sage': [100, 200, 300, 400],
|
|
301
|
+
... 'emp_id': [133, 144, 155, 177],
|
|
302
|
+
... 'marks': [99.99, 97.32, 94.67, 91.00]
|
|
303
|
+
... }
|
|
304
304
|
|
|
305
305
|
>>> pandas_df = pd.DataFrame(df)
|
|
306
306
|
|
|
@@ -313,24 +313,35 @@ def copy_to_sql(df, table_name,
|
|
|
313
313
|
|
|
314
314
|
c) Save a Pandas DataFrame by specifying additional parameters:
|
|
315
315
|
>>> copy_to_sql(df = pandas_df, table_name = 'my_table_2', schema_name = 'alice',
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
316
|
+
... index = True, index_label = 'my_index_label', temporary = False,
|
|
317
|
+
... primary_index = ['emp_id'], if_exists = 'append',
|
|
318
|
+
... types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
|
|
319
|
+
... 'emp_id': BIGINT, 'marks': DECIMAL})
|
|
320
320
|
|
|
321
321
|
d) Saving with additional parameters as a SET table
|
|
322
322
|
>>> copy_to_sql(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
323
|
+
... index = True, index_label = 'my_index_label', temporary = False,
|
|
324
|
+
... primary_index = ['emp_id'], if_exists = 'append',
|
|
325
|
+
... types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
|
|
326
|
+
... 'emp_id': BIGINT, 'marks': DECIMAL},
|
|
327
|
+
... set_table=True)
|
|
328
328
|
|
|
329
329
|
e) Saving levels in index of type MultiIndex
|
|
330
330
|
>>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
|
|
331
331
|
>>> copy_to_sql(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
|
|
332
|
-
|
|
333
|
-
|
|
332
|
+
... index = True, index_label = ['index1', 'index2'], temporary = False,
|
|
333
|
+
... primary_index = ['index1'], if_exists = 'replace')
|
|
334
|
+
|
|
335
|
+
f) Save a Pandas DataFrame with VECTOR datatype:
|
|
336
|
+
>>> import pandas as pd
|
|
337
|
+
>>> VECTOR_data = {
|
|
338
|
+
... 'id': [10, 11, 12, 13],
|
|
339
|
+
... 'array_col': ['1,1', '2,2', '3,3', '4,4']
|
|
340
|
+
... }
|
|
341
|
+
>>> df = pd.DataFrame(VECTOR_data)
|
|
342
|
+
|
|
343
|
+
>>> from teradatasqlalchemy import VECTOR
|
|
344
|
+
>>> copy_to_sql(df=df, table_name='my_vector_table', types={'array_col': VECTOR})
|
|
334
345
|
|
|
335
346
|
2. Saving a teradataml DataFrame:
|
|
336
347
|
|
|
@@ -358,13 +369,13 @@ def copy_to_sql(df, table_name,
|
|
|
358
369
|
|
|
359
370
|
d) Save a teradataml DataFrame by using copy_to_sql with additional parameters:
|
|
360
371
|
>>> copy_to_sql(df = df2, table_name = 'my_tdml_table_3', schema_name = 'alice',
|
|
361
|
-
|
|
362
|
-
|
|
372
|
+
... temporary = False, primary_index = None, if_exists = 'append',
|
|
373
|
+
... types = {'masters': VARCHAR, 'gpa':INTEGER})
|
|
363
374
|
|
|
364
375
|
e) Saving as a SET table
|
|
365
376
|
>>> copy_to_sql(df = df2, table_name = 'my_tdml_set_table', schema_name = 'alice',
|
|
366
|
-
|
|
367
|
-
|
|
377
|
+
... temporary = False, primary_index = ['gpa'], if_exists = 'append',
|
|
378
|
+
... types = {'masters': VARCHAR, 'gpa':INTEGER}, set_table = True)
|
|
368
379
|
|
|
369
380
|
3. Saving a teradataml DataFrame as a PTI table:
|
|
370
381
|
|
|
@@ -377,19 +388,19 @@ def copy_to_sql(df, table_name,
|
|
|
377
388
|
|
|
378
389
|
a) Using copy_to_sql
|
|
379
390
|
>>> copy_to_sql(df3, "test_copyto_pti",
|
|
380
|
-
|
|
381
|
-
|
|
391
|
+
... timecode_column='clicktime',
|
|
392
|
+
... columns_list='event')
|
|
382
393
|
|
|
383
394
|
b) Alternatively, using DataFrame.to_sql
|
|
384
395
|
>>> df3.to_sql(table_name = "test_copyto_pti_1",
|
|
385
|
-
|
|
386
|
-
|
|
396
|
+
... timecode_column='clicktime',
|
|
397
|
+
... columns_list='event')
|
|
387
398
|
|
|
388
399
|
c) Saving as a SET table
|
|
389
400
|
>>> copy_to_sql(df3, "test_copyto_pti_2",
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
401
|
+
... timecode_column='clicktime',
|
|
402
|
+
... columns_list='event',
|
|
403
|
+
... set_table=True)
|
|
393
404
|
|
|
394
405
|
"""
|
|
395
406
|
# Deriving global connection using get_connection().
|
|
@@ -487,6 +498,11 @@ def copy_to_sql(df, table_name,
|
|
|
487
498
|
table_type=TeradataConstants.TERADATA_TABLE,
|
|
488
499
|
quote=False)
|
|
489
500
|
|
|
501
|
+
# If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
|
|
502
|
+
# table name in fully qualified format. Because of this , test cases started
|
|
503
|
+
# failing with Blank name in quotation mark. Hence, extracted only the table name.
|
|
504
|
+
table_name = UtilFuncs._extract_table_name(table_name)
|
|
505
|
+
|
|
490
506
|
# Let's create the SQLAlchemy table object to recreate the table
|
|
491
507
|
if not table_exists or if_exists.lower() == 'replace':
|
|
492
508
|
if not is_pti:
|
|
@@ -1459,9 +1475,9 @@ def _extract_column_info(df, types = None, index = False, index_label = None):
|
|
|
1459
1475
|
# If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
|
|
1460
1476
|
# TIMESTAMP(timezone=True) else map it according to default value.
|
|
1461
1477
|
col_types = [types.get(col_name) if types and col_name in types else
|
|
1462
|
-
TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes[key])
|
|
1478
|
+
TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes.iloc[key])
|
|
1463
1479
|
and (df[col_name].dt.tz is not None)
|
|
1464
|
-
else _get_sqlalchemy_mapping_types(str(df.dtypes[key]))
|
|
1480
|
+
else _get_sqlalchemy_mapping_types(str(df.dtypes.iloc[key]))
|
|
1465
1481
|
for key, col_name in enumerate(list(df.columns))]
|
|
1466
1482
|
|
|
1467
1483
|
ind_names = []
|
|
@@ -1469,7 +1485,7 @@ def _extract_column_info(df, types = None, index = False, index_label = None):
|
|
|
1469
1485
|
if index:
|
|
1470
1486
|
ind_names, ind_types = _get_index_labels(df, index_label)
|
|
1471
1487
|
ind_types = [types.get(ind_name) if types and ind_name in types
|
|
1472
|
-
else TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes[key])
|
|
1488
|
+
else TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes.iloc[key])
|
|
1473
1489
|
and (df[ind_name].dt.tz is not None)
|
|
1474
1490
|
else _get_sqlalchemy_mapping_types(str(ind_types[key]))
|
|
1475
1491
|
for key, ind_name in enumerate(ind_names)]
|
|
@@ -328,6 +328,7 @@ def fastexport(df, export_to="pandas", index_column=None,
|
|
|
328
328
|
# using fastexport datatransfer protocol.
|
|
329
329
|
# "require" is always True, because with this function user requires
|
|
330
330
|
# fastexport.
|
|
331
|
+
|
|
331
332
|
return dt_obj._fastexport_get_pandas_df(require=True, **kwargs)
|
|
332
333
|
|
|
333
334
|
# Convert teradataml DataFrame to CSV file.
|
|
@@ -689,77 +690,77 @@ def read_csv(filepath,
|
|
|
689
690
|
# while doing so catch all errors and warnings as well as store those in the table.
|
|
690
691
|
>>> types = OrderedDict(id=BIGINT, fname=VARCHAR, lname=VARCHAR, marks=FLOAT)
|
|
691
692
|
>>> read_csv(filepath='test_file.csv',
|
|
692
|
-
|
|
693
|
-
|
|
693
|
+
... table_name='my_first_table1', types=types,
|
|
694
|
+
... save_errors=True, catch_errors_warnings=True)
|
|
694
695
|
|
|
695
696
|
# Example 3: Load the data from CSV file into a table using fastload CSV protocol.
|
|
696
697
|
# If table exists, then replace the same. Catch all errors and warnings as well as
|
|
697
698
|
# store those in the table.
|
|
698
699
|
>>> types = OrderedDict(id=BIGINT, fname=VARCHAR, lname=VARCHAR, marks=FLOAT)
|
|
699
700
|
>>> read_csv(filepath='test_file.csv',
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
701
|
+
... table_name='my_first_table',
|
|
702
|
+
... types=types, if_exists='replace',
|
|
703
|
+
... save_errors=True, catch_errors_warnings=True)
|
|
703
704
|
|
|
704
705
|
# Example 4: Load the data from CSV file into a table using fastload CSV protocol.
|
|
705
706
|
# If table exists in specified schema, then append the same. Catch all
|
|
706
707
|
# errors and warnings as well as store those in the table.
|
|
707
708
|
>>> types = OrderedDict(id=BIGINT, fname=VARCHAR, lname=VARCHAR, marks=FLOAT)
|
|
708
709
|
>>> read_csv(filepath='test_file.csv',
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
710
|
+
... table_name='my_first_table',
|
|
711
|
+
... types=types, if_exists='fail',
|
|
712
|
+
... save_errors=True, catch_errors_warnings=True)
|
|
712
713
|
>>> read_csv(filepath='test_file.csv',
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
714
|
+
... table_name='my_first_table',
|
|
715
|
+
... if_exists='append',
|
|
716
|
+
... save_errors=True, catch_errors_warnings=True)
|
|
716
717
|
|
|
717
718
|
# Example 5: Load the data from CSV file into a SET table using fastload CSV protocol.
|
|
718
719
|
# Catch all errors and warnings as well as store those in the table.
|
|
719
720
|
>>> types = OrderedDict(id=BIGINT, fname=VARCHAR, lname=VARCHAR, marks=FLOAT)
|
|
720
721
|
>>> read_csv(filepath='test_file.csv',
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
722
|
+
... table_name='my_first_table',
|
|
723
|
+
... types=types, if_exists='replace',
|
|
724
|
+
... set_table=True, primary_index='id',
|
|
725
|
+
... save_errors=True, catch_errors_warnings=True)
|
|
725
726
|
|
|
726
727
|
# Example 6: Load the data from CSV file into a temporary table without fastloadCSV protocol.
|
|
727
728
|
# If table exists, then append to the same.
|
|
728
729
|
>>> types = OrderedDict(id=BIGINT, fname=VARCHAR, lname=VARCHAR, marks=FLOAT)
|
|
729
730
|
>>> read_csv(filepath='test_file.csv',
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
731
|
+
... table_name='my_first_table',
|
|
732
|
+
... types=types, if_exists='replace',
|
|
733
|
+
... temporary=True)
|
|
733
734
|
>>> read_csv(filepath='test_file.csv',
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
735
|
+
... table_name='my_first_table',
|
|
736
|
+
... if_exists='append',
|
|
737
|
+
... temporary=True)
|
|
737
738
|
|
|
738
739
|
# Example 7: Load the data from CSV file with DATE and TIMESTAMP columns into
|
|
739
740
|
# a table without Fastload protocol. If table exists in specified
|
|
740
741
|
# schema, then append to the table.
|
|
741
742
|
>>> types = OrderedDict(id=BIGINT, fname=VARCHAR, lname=VARCHAR, marks=FLOAT,
|
|
742
|
-
admission_date=DATE, admission_time=TIMESTAMP)
|
|
743
|
+
... admission_date=DATE, admission_time=TIMESTAMP)
|
|
743
744
|
>>> read_csv(filepath='test_file.csv',
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
745
|
+
... table_name='my_first_table',
|
|
746
|
+
... types=types, if_exists='fail',
|
|
747
|
+
... use_fastload=False)
|
|
747
748
|
>>> read_csv(filepath='test_file.csv',
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
749
|
+
... table_name='my_first_table',
|
|
750
|
+
... if_exists='append',
|
|
751
|
+
... use_fastload=False)
|
|
751
752
|
|
|
752
753
|
# Example 8: Load the data from CSV file with TIMESTAMP columns into
|
|
753
754
|
# a PTI table. If specified table exists then append to the table,
|
|
754
755
|
# otherwise creates new table.
|
|
755
756
|
>>> types = OrderedDict(partition_id=INTEGER, adid=INTEGER, productid=INTEGER,
|
|
756
|
-
|
|
757
|
+
... event=VARCHAR, clicktime=TIMESTAMP)
|
|
757
758
|
>>> read_csv(filepath='test_file.csv',
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
759
|
+
... table_name='my_first_read_csv_pti_table',
|
|
760
|
+
... types=types, if_exists='append',
|
|
761
|
+
... timecode_column='clicktime',
|
|
762
|
+
... columns_list='event',
|
|
763
|
+
... use_fastload=False)
|
|
763
764
|
|
|
764
765
|
# Example 9: Load the data from CSV file with TIMESTAMP columns into
|
|
765
766
|
# a SET PTI table. If specified table exists then append to the table,
|
|
@@ -767,11 +768,11 @@ def read_csv(filepath,
|
|
|
767
768
|
>>> types = OrderedDict(partition_id=INTEGER, adid=INTEGER, productid=INTEGER,
|
|
768
769
|
event=VARCHAR, clicktime=TIMESTAMP)
|
|
769
770
|
>>> read_csv(filepath='test_file.csv',
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
771
|
+
... table_name='my_first_read_csv_pti_table',
|
|
772
|
+
... types=types, if_exists='append',
|
|
773
|
+
... timecode_column='clicktime',
|
|
774
|
+
... columns_list='event',
|
|
775
|
+
... set_table=True)
|
|
775
776
|
|
|
776
777
|
# Example 10: Load the data from CSV file with TIMESTAMP columns into
|
|
777
778
|
# a temporary PTI table. If specified table exists then append to the table,
|
|
@@ -779,11 +780,11 @@ def read_csv(filepath,
|
|
|
779
780
|
>>> types = OrderedDict(partition_id=INTEGER, adid=INTEGER, productid=INTEGER,
|
|
780
781
|
event=VARCHAR, clicktime=TIMESTAMP)
|
|
781
782
|
>>> read_csv(filepath='test_file.csv',
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
783
|
+
... table_name='my_first_read_csv_pti_table',
|
|
784
|
+
... types=types, if_exists='append',
|
|
785
|
+
... timecode_column='clicktime',
|
|
786
|
+
... columns_list='event',
|
|
787
|
+
... temporary=True)
|
|
787
788
|
|
|
788
789
|
# Example 11: Load the data from CSV file into Vantage table by opening specified
|
|
789
790
|
# number of Teradata data transfer sesions.
|
|
@@ -795,8 +796,24 @@ def read_csv(filepath,
|
|
|
795
796
|
# through primary_index argument.
|
|
796
797
|
>>> types = OrderedDict(id=BIGINT, fname=VARCHAR, lname=VARCHAR, marks=FLOAT)
|
|
797
798
|
>>> read_csv(filepath='test_file.csv', table_name='my_first_table_with_primary_index',
|
|
798
|
-
|
|
799
|
+
... types=types, primary_index = ['fname'])
|
|
800
|
+
|
|
801
|
+
# Example 13: Load the data from CSV file into VECTOR datatype in Vantage table.
|
|
802
|
+
>>> from teradatasqlalchemy import VECTOR
|
|
803
|
+
>>> from pathlib import Path
|
|
804
|
+
>>> types = OrderedDict(id=BIGINT, array_col=VECTOR)
|
|
805
|
+
|
|
806
|
+
# Get the absolute path of the teradataml module
|
|
807
|
+
>>> import teradataml
|
|
808
|
+
>>> base_path = Path(teradataml.__path__[0])
|
|
799
809
|
|
|
810
|
+
# Append the relative path to the CSV file
|
|
811
|
+
>>> csv_path = os.path.join(base_path, "data", "hnsw_alter_data.csv")
|
|
812
|
+
|
|
813
|
+
>>> read_csv(filepath=csv_path,
|
|
814
|
+
... table_name='my_first_table_with_vector',
|
|
815
|
+
... types=types,
|
|
816
|
+
... use_fastload=False)
|
|
800
817
|
"""
|
|
801
818
|
# Deriving global connection using context.get_context()
|
|
802
819
|
con = get_context()
|
|
@@ -2553,7 +2570,6 @@ class _DataTransferUtils():
|
|
|
2553
2570
|
_create_staging_table_and_load_csv_data(column_info={"id": INTEGER}, primary_index = ['id'])
|
|
2554
2571
|
|
|
2555
2572
|
"""
|
|
2556
|
-
stag_table_name = ""
|
|
2557
2573
|
stage_table_created = False
|
|
2558
2574
|
try:
|
|
2559
2575
|
# Generate the temporary table.
|
|
@@ -2562,6 +2578,11 @@ class _DataTransferUtils():
|
|
|
2562
2578
|
quote=False,
|
|
2563
2579
|
table_type=TeradataConstants.TERADATA_TABLE)
|
|
2564
2580
|
|
|
2581
|
+
# If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
|
|
2582
|
+
# table name in fully qualified format. Because of this , test cases started
|
|
2583
|
+
# failing with Blank name in quotation mark. Hence, extracted only the table name.
|
|
2584
|
+
stag_table_name = UtilFuncs._extract_table_name(stag_table_name)
|
|
2585
|
+
|
|
2565
2586
|
# Information about uniqueness of primary index and
|
|
2566
2587
|
# SET/MULTISET property of existing table is not available,
|
|
2567
2588
|
# so over-assuming to be False.
|
|
@@ -2707,6 +2728,11 @@ class _DataTransferUtils():
|
|
|
2707
2728
|
quote=False,
|
|
2708
2729
|
table_type=TeradataConstants.TERADATA_TABLE)
|
|
2709
2730
|
|
|
2731
|
+
# If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
|
|
2732
|
+
# table name in fully qualified format. Because of this , test cases started
|
|
2733
|
+
# failing with Blank name in quotation mark. Hence, extracted only the table name.
|
|
2734
|
+
stag_table_name = UtilFuncs._extract_table_name(stag_table_name)
|
|
2735
|
+
|
|
2710
2736
|
# Get the teradataml dataframe from staging table using read_csv()
|
|
2711
2737
|
read_csv_output = read_csv(filepath=self.df, table_name=stag_table_name,
|
|
2712
2738
|
types=self.types, sep=self.sep,
|