wcp-library 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,112 @@
1
+ import requests
2
+ import json
3
+ import sys
4
+
5
+
6
+ # Taken from https://www.mydatahack.com/running-jobs-with-informatica-cloud-rest-api/
7
+ def get_session_id(username, password, logging):
8
+ """Authenticate with username and password and
9
+ retrieve icSessionId and serverUrl that are used for Subsequent API calls"""
10
+ session_id = ''
11
+ data = {'@type': 'login', 'username': username, 'password': password}
12
+ url = "https://dm-us.informaticacloud.com/ma/api/v2/user/login"
13
+ headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
14
+ # We need to pass data in string instead of dict so that the data gets posted directly.
15
+ r = requests.post(url, data=json.dumps(data), headers=headers)
16
+
17
+ logging.info('\tAPI Login Response Status Code: ' + str(r.status_code))
18
+
19
+ if r.status_code == 200:
20
+ session_id = r.json()["icSessionId"]
21
+ server_url = r.json()["serverUrl"]
22
+ logging.info('\tSession Id: ' + session_id)
23
+ logging.info('\tServer URL: ' + server_url)
24
+ else:
25
+ logging.info('API Login call failed:')
26
+ logging.info(r.headers)
27
+ logging.info(r.json())
28
+ sys.exit(1)
29
+
30
+ return session_id, server_url
31
+
32
+
33
+ def get_tasks(session_id, server_url, taskType, logging):
34
+ """ Use this method to get a list of tasks of a specified type. This may be used to determine the TaskID of a task.
35
+ Task Types: https://jsapi.apiary.io/apis/cloudrestapi/reference/job/list-of-tasks/login.html
36
+ AVS-Contact validation task
37
+ DMASK-Data masking task
38
+ DQA-Data assessment task
39
+ DRS-Data replication task
40
+ DSS-Data synchronization task
41
+ MTT-Mapping configuration task
42
+ PCS-PowerCenter task"""
43
+ task_list_url = server_url + "/api/v2/task?type=" + taskType
44
+ headers = {'icSessionId': session_id}
45
+ r = requests.get(task_list_url, headers=headers)
46
+
47
+ if r.status_code == 200:
48
+ logging.info('\tRetrieved list of all Tasks')
49
+ response_dict = json.loads(r.content)
50
+ return response_dict
51
+
52
+ else:
53
+ logging.info('\tFailed to get list of Tasks: ' + str(r.status_code))
54
+ return {}
55
+
56
+
57
+ def get_task_id(response_dict, taskName, logging):
58
+ for d in response_dict:
59
+ if d['name'] == taskName:
60
+ id = d['id']
61
+ logging.info('\tTaskID: ' + id)
62
+ return id
63
+
64
+ logging.info('\tCould not find TaskID for the Task Name specified')
65
+ return ""
66
+
67
+
68
+ def get_all_mapping_details(session_id, server_url, logging):
69
+ mapping_details_url = server_url + "/api/v2/mapping"
70
+ headers = {'icSessionId': session_id, 'HTTP': '1.0', 'Accept': 'application/json'}
71
+ r = requests.get(mapping_details_url, headers=headers)
72
+
73
+ if r.status_code == 200:
74
+ response_dict = json.loads(r.content)
75
+ return response_dict
76
+
77
+ else:
78
+ logging.info('\tFailed to get Mappings: ' + str(r.status_code))
79
+ return {}
80
+
81
+
82
+ def get_singular_mapping_details(session_id, server_url, logging, mappingID):
83
+ mapping_details_url = server_url + "/api/v2/mapping/" + mappingID
84
+ headers = {'icSessionId': session_id, 'Accept': 'application/json'}
85
+ r = requests.get(mapping_details_url, headers=headers)
86
+
87
+ if r.status_code == 200:
88
+ mapping_deets_dict = json.loads(r.content)
89
+ return mapping_deets_dict
90
+
91
+ else:
92
+ logging.info('\tFailed to get Mapping details for mapping ' + mappingID + ': ' + str(r.status_code))
93
+ return {}
94
+
95
+
96
+ def get_connection_details(session_id, server_url, logging):
97
+
98
+ # source_dict = {}
99
+ # target_dict = {}
100
+
101
+ connections_url = server_url + "/api/v2/connection"
102
+ # target_connections_url = server_url + "/api/v2/mapping"
103
+ headers = {'icSessionId': session_id, 'content-type': 'application/json'}
104
+ r = requests.get(connections_url, headers=headers)
105
+
106
+ if r.status_code == 200:
107
+ response_dict = json.loads(r.content)
108
+ return response_dict
109
+
110
+ else:
111
+ logging.info('\tFailed to get Mappings: ' + str(r.status_code))
112
+ return {}
wcp_library/logging.py ADDED
@@ -0,0 +1,51 @@
1
+ import logging
2
+ import sys
3
+
4
+ from WCP_Library import application_path
5
+
6
+
7
+ def create_log(level: int, iterations: int, project_name: str, mode: str = "w",
8
+ format: str = "%(asctime)s:%(levelname)s:%(module)s:%(filename)s:%(lineno)d:%(message)s"):
9
+ """
10
+ Create log file.
11
+
12
+ Log levels: CRITICAL, ERROR, WARNING, INFO, DEBUG, NOTSET
13
+
14
+ format help: https://docs.python.org/3/library/logging.html#logrecord-attributes
15
+
16
+ :param level: Logging level to output to log file.
17
+ :param iterations: Number of log files to keep.
18
+ :param project_name: Name of the project. (Used as the log file name)
19
+ :param mode: Mode to open the log file. (Default: "w")
20
+ :param format: Log Format (Default: "%(asctime)s:%(levelname)s:%(module)s:%(filename)s:%(lineno)d:%(message)s")
21
+ :return:
22
+ """
23
+
24
+
25
+ for i in range(iterations, 0, -1):
26
+ if (application_path / (project_name + f"_{i}.log")).exists():
27
+ (application_path / (project_name + f"_{i}.log")).rename((application_path / (project_name + f"_{i+1}.log")))
28
+ if (application_path / (project_name + ".log")).exists():
29
+ (application_path / (project_name + ".log")).rename((application_path / (project_name + "_1.log")))
30
+ if (application_path / (project_name + f"_{iterations + 1}.log")).exists():
31
+ (application_path / (project_name + f"_{iterations + 1}.log")).unlink()
32
+
33
+
34
+ logging.basicConfig(
35
+ filename=(application_path / (project_name + ".log")),
36
+ level=level,
37
+ format=format,
38
+ filemode=mode
39
+ )
40
+
41
+ MIN_LEVEL = logging.DEBUG
42
+ stdout_hdlr = logging.StreamHandler(sys.stdout)
43
+ stderr_hdlr = logging.StreamHandler(sys.stderr)
44
+ stdout_hdlr.setLevel(MIN_LEVEL)
45
+ stderr_hdlr.setLevel(max(MIN_LEVEL, logging.WARNING))
46
+
47
+ rootLogger = logging.getLogger()
48
+ rootLogger.addHandler(stdout_hdlr)
49
+ rootLogger.addHandler(stderr_hdlr)
50
+ logger = logging.getLogger(__name__)
51
+ logger.setLevel(logging.DEBUG)
@@ -0,0 +1,35 @@
1
+ import logging
2
+ from functools import wraps
3
+ from time import sleep
4
+
5
+ import oracledb
6
+ import psycopg
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def retry(f: callable) -> callable:
12
+ """
13
+ Decorator to retry a function
14
+
15
+ :param f: function
16
+ :return: function
17
+ """
18
+
19
+ @wraps(f)
20
+ def wrapper(self, *args, **kwargs):
21
+ self._retry_count = 0
22
+ while True:
23
+ try:
24
+ return f(self, *args, **kwargs)
25
+ except (oracledb.OperationalError, psycopg.OperationalError) as e:
26
+ error_obj, = e.args
27
+ if error_obj.full_code in self.retry_error_codes and self._retry_count < self.retry_limit:
28
+ self._retry_count += 1
29
+ logger.debug("Oracle connection error")
30
+ logger.debug(error_obj.message)
31
+ logger.info("Waiting 5 minutes before retrying Oracle connection")
32
+ sleep(300)
33
+ else:
34
+ raise e
35
+ return wrapper
@@ -0,0 +1,249 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ import pandas as pd
5
+ import oracledb
6
+ from oracledb import ConnectionPool
7
+
8
+ from WCP_Library.sql import retry
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def connect_warehouse(username: str, password: str, hostname: str, port: int, database: str) -> ConnectionPool:
14
+ """
15
+ Create Warehouse Connection
16
+
17
+ :param username: username
18
+ :param password: password
19
+ :param hostname: hostname
20
+ :param port: port
21
+ :param database: database
22
+ :return: session_pool
23
+ """
24
+
25
+ dsn = oracledb.makedsn(hostname, port, sid=database)
26
+ session_pool = oracledb.create_pool(
27
+ user=username,
28
+ password=password,
29
+ dsn=dsn,
30
+ min=2,
31
+ max=5,
32
+ increment=1,
33
+ threaded=True,
34
+ encoding="UTF-8"
35
+ )
36
+ return session_pool
37
+
38
+
39
+ class SQLConnection(object):
40
+ """
41
+ SQL Connection Class
42
+
43
+ :return: None
44
+ """
45
+
46
+ def __init__(self):
47
+ self._username: Optional[str] = None
48
+ self._password: Optional[str] = None
49
+ self._hostname: Optional[str] = None
50
+ self._port: Optional[int] = None
51
+ self._database: Optional[str] = None
52
+ self._sid: Optional[str] = None
53
+ self._session_pool: Optional[ConnectionPool] = None
54
+
55
+ self._retry_count = 0
56
+ self.retry_limit = 50
57
+ self.retry_error_codes = ['ORA-01033', 'DPY-6005', 'DPY-4011']
58
+
59
+ @retry
60
+ def _connect(self) -> None:
61
+ """
62
+ Connect to the warehouse
63
+
64
+ :return: None
65
+ """
66
+
67
+ sid_or_service = self._database if self._database else self._sid
68
+
69
+ self._session_pool = connect_warehouse(self._username, self._password, self._hostname, self._port, sid_or_service)
70
+
71
+ def set_user(self, credentials_dict: dict) -> None:
72
+ """
73
+ Set the user credentials and connect
74
+
75
+ :param credentials_dict: dictionary of connection details
76
+ :return: None
77
+ """
78
+
79
+ if not any([self._database, self._sid]):
80
+ raise ValueError("Either Service or SID must be provided")
81
+
82
+ self._username: Optional[str] = credentials_dict['UserName']
83
+ self._password: Optional[str] = credentials_dict['Password']
84
+ self._hostname: Optional[str] = credentials_dict['Host']
85
+ self._port: Optional[int] = int(credentials_dict['Port'])
86
+ self._database: Optional[str] = credentials_dict['Service'] if 'Service' in credentials_dict else None
87
+ self._sid: Optional[str] = credentials_dict['SID'] if 'SID' in credentials_dict else None
88
+
89
+ self._connect()
90
+
91
+ def close_connection(self) -> None:
92
+ """
93
+ Close the connection
94
+
95
+ :return: None
96
+ """
97
+
98
+ self._session_pool.close()
99
+
100
+ @retry
101
+ def execute(self, query: str) -> None:
102
+ """
103
+ Execute the query
104
+
105
+ :param query: query
106
+ :return: None
107
+ """
108
+
109
+ connection = self._session_pool.acquire()
110
+ cursor = connection.cursor()
111
+ cursor.execute(query)
112
+ connection.commit()
113
+ self._session_pool.release(connection)
114
+
115
+ @retry
116
+ def safe_execute(self, query: str, packed_values: dict) -> None:
117
+ """
118
+ Execute the query without SQL Injection possibility, to be used with external facing projects.
119
+
120
+ :param query: query
121
+ :param packed_values: dictionary of values
122
+ :return: None
123
+ """
124
+
125
+ connection = self._session_pool.acquire()
126
+ cursor = connection.cursor()
127
+ cursor.execute(query, packed_values)
128
+ connection.commit()
129
+ self._session_pool.release(connection)
130
+
131
+ @retry
132
+ def execute_multiple(self, queries: list[list[str, dict]]) -> None:
133
+ """
134
+ Execute multiple queries
135
+
136
+ :param queries: list of queries
137
+ :return: None
138
+ """
139
+
140
+ connection = self._session_pool.acquire()
141
+ cursor = connection.cursor()
142
+ for item in queries:
143
+ query = item[0]
144
+ packed_values = item[1]
145
+ if packed_values:
146
+ cursor.execute(query, packed_values)
147
+ else:
148
+ cursor.execute(query)
149
+ connection.commit()
150
+ self._session_pool.release(connection)
151
+
152
+ @retry
153
+ def execute_many(self, query: str, dictionary: list[dict]) -> None:
154
+ """
155
+ Execute many queries
156
+
157
+ :param query: query
158
+ :param dictionary: dictionary of values
159
+ :return: None
160
+ """
161
+
162
+ connection = self._session_pool.acquire()
163
+ cursor = connection.cursor()
164
+ cursor.executemany(query, dictionary)
165
+ connection.commit()
166
+ self._session_pool.release(connection)
167
+
168
+ @retry
169
+ def fetch_data(self, query: str, packed_data=None):
170
+ """
171
+ Fetch the data from the query
172
+
173
+ :param query: query
174
+ :param packed_data: packed data
175
+ :return: rows
176
+ """
177
+
178
+ connection = self._session_pool.acquire()
179
+ cursor = connection.cursor()
180
+ if packed_data:
181
+ cursor.execute(query, packed_data)
182
+ else:
183
+ cursor.execute(query)
184
+ rows = cursor.fetchall()
185
+ self._session_pool.release(connection)
186
+ return rows
187
+
188
+ @retry
189
+ def export_DF_to_warehouse(self, dfObj: pd.DataFrame, outputTableName: str, columns: list, remove_nan=False) -> None:
190
+ """
191
+ Export the DataFrame to the warehouse
192
+
193
+ :param dfObj: DataFrame
194
+ :param outputTableName: output table name
195
+ :param columns: list of columns
196
+ :param remove_nan: remove NaN values
197
+ :return: None
198
+ """
199
+
200
+ col = ', '.join(columns)
201
+ bindList = []
202
+ for column in columns:
203
+ bindList.append(':' + column)
204
+ bind = ', '.join(bindList)
205
+
206
+ main_dict = dfObj.to_dict('records')
207
+ if remove_nan:
208
+ for val, item in enumerate(main_dict):
209
+ for sub_item, value in item.items():
210
+ if pd.isna(value):
211
+ main_dict[val][sub_item] = None
212
+ else:
213
+ main_dict[val][sub_item] = value
214
+
215
+ query = """INSERT INTO {} ({}) VALUES ({})""".format(outputTableName, col, bind)
216
+ self.execute_many(query, main_dict)
217
+
218
+ @retry
219
+ def truncate_table(self, tableName: str) -> None:
220
+ """
221
+ Truncate the table
222
+
223
+ :param tableName: table name
224
+ :return: None
225
+ """
226
+
227
+ truncateQuery = """TRUNCATE TABLE {}""".format(tableName)
228
+ self.execute(truncateQuery)
229
+
230
+ @retry
231
+ def empty_table(self, tableName: str) -> None:
232
+ """
233
+ Empty the table
234
+
235
+ :param tableName: table name
236
+ :return: None
237
+ """
238
+
239
+ deleteQuery = """DELETE FROM {}""".format(tableName)
240
+ self.execute(deleteQuery)
241
+
242
+ def __del__(self) -> None:
243
+ """
244
+ Destructor
245
+
246
+ :return: None
247
+ """
248
+
249
+ self._session_pool.close()
@@ -0,0 +1,226 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ import pandas as pd
5
+ from psycopg.sql import SQL
6
+ from psycopg_pool import ConnectionPool
7
+
8
+ from WCP_Library.sql import retry
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def connect_warehouse(username: str, password: str, hostname: str, port: int, database: str) -> ConnectionPool:
14
+ """
15
+ Create Warehouse Connection
16
+
17
+ :param username: username
18
+ :param password: password
19
+ :param hostname: hostname
20
+ :param port: port
21
+ :param database: database
22
+ :return: session_pool
23
+ """
24
+
25
+ url = f"postgres://{username}:{password}@{hostname}:{port}/{database}"
26
+
27
+ session_pool = ConnectionPool(
28
+ conninfo=url,
29
+ min_size=2,
30
+ max_size=5,
31
+ open=True
32
+ )
33
+ return session_pool
34
+
35
+
36
+ class SQLConnection(object):
37
+ """
38
+ SQL Connection Class
39
+
40
+ :return: None
41
+ """
42
+
43
+ def __init__(self):
44
+ self._username: Optional[str] = None
45
+ self._password: Optional[str] = None
46
+ self._hostname: Optional[str] = None
47
+ self._port: Optional[int] = None
48
+ self._database: Optional[str] = None
49
+ self._session_pool: Optional[ConnectionPool] = None
50
+
51
+ self._retry_count = 0
52
+ self.retry_limit = 50
53
+ self.retry_error_codes = ['08001', '08004']
54
+
55
+ @retry
56
+ def _connect(self) -> None:
57
+ """
58
+ Connect to the warehouse
59
+
60
+ :return: None
61
+ """
62
+
63
+ self._session_pool = connect_warehouse(self._username, self._password, self._hostname, self._port, self._database)
64
+
65
+ def set_user(self, credentials_dict: dict) -> None:
66
+ """
67
+ Set the user credentials and connect
68
+
69
+ :param credentials_dict: dictionary of connection details
70
+ :return: None
71
+ """
72
+
73
+ self._username: Optional[str] = credentials_dict['UserName']
74
+ self._password: Optional[str] = credentials_dict['Password']
75
+ self._hostname: Optional[str] = credentials_dict['Host']
76
+ self._port: Optional[int] = int(credentials_dict['Port'])
77
+ self._database: Optional[str] = credentials_dict['Database']
78
+
79
+ self._connect()
80
+
81
+ def close_connection(self) -> None:
82
+ """
83
+ Close the connection
84
+
85
+ :return: None
86
+ """
87
+
88
+ self._session_pool.close()
89
+
90
+ @retry
91
+ def execute(self, query: SQL | str) -> None:
92
+ """
93
+ Execute the query
94
+
95
+ :param query: query
96
+ :return: None
97
+ """
98
+
99
+ with self._session_pool.connection() as connection:
100
+ connection.execute(query)
101
+
102
+ @retry
103
+ def safe_execute(self, query: SQL | str, packed_values: dict) -> None:
104
+ """
105
+ Execute the query without SQL Injection possibility, to be used with external facing projects.
106
+
107
+ :param query: query
108
+ :param packed_values: dictionary of values
109
+ :return: None
110
+ """
111
+
112
+ with self._session_pool.connection() as connection:
113
+ connection.execute(query, packed_values)
114
+
115
+ @retry
116
+ def execute_multiple(self, queries: list[list[SQL | str, dict]]) -> None:
117
+ """
118
+ Execute multiple queries
119
+
120
+ :param queries: list of queries
121
+ :return: None
122
+ """
123
+
124
+ with self._session_pool.connection() as connection:
125
+ for item in queries:
126
+ query = item[0]
127
+ packed_values = item[1]
128
+ if packed_values:
129
+ connection.execute(query, packed_values)
130
+ else:
131
+ connection.execute(query)
132
+
133
+ @retry
134
+ def execute_many(self, query: SQL | str, dictionary: list[dict]) -> None:
135
+ """
136
+ Execute many queries
137
+
138
+ :param query: query
139
+ :param dictionary: dictionary of values
140
+ :return: None
141
+ """
142
+
143
+ with self._session_pool.connection() as connection:
144
+ connection.executemany(query, dictionary)
145
+
146
+ @retry
147
+ def fetch_data(self, query: SQL | str, packed_data=None):
148
+ """
149
+ Fetch the data from the query
150
+
151
+ :param query: query
152
+ :param packed_data: packed data
153
+ :return: rows
154
+ """
155
+
156
+ with self._session_pool.connection() as connection:
157
+ cursor = connection.cursor()
158
+ if packed_data:
159
+ cursor.execute(query, packed_data)
160
+ else:
161
+ cursor.execute(query)
162
+ rows = cursor.fetchall()
163
+ return rows
164
+
165
+ @retry
166
+ def export_DF_to_warehouse(self, dfObj: pd.DataFrame, outputTableName: str, columns: list, remove_nan=False) -> None:
167
+ """
168
+ Export the DataFrame to the warehouse
169
+
170
+ :param dfObj: DataFrame
171
+ :param outputTableName: output table name
172
+ :param columns: list of columns
173
+ :param remove_nan: remove NaN values
174
+ :return: None
175
+ """
176
+
177
+ col = ', '.join(columns)
178
+ param_list = []
179
+ for column in columns:
180
+ param_list.append(f"%({column})s")
181
+ params = ', '.join(param_list)
182
+
183
+ main_dict = dfObj.to_dict('records')
184
+ if remove_nan:
185
+ for val, item in enumerate(main_dict):
186
+ for sub_item, value in item.items():
187
+ if pd.isna(value):
188
+ main_dict[val][sub_item] = None
189
+ else:
190
+ main_dict[val][sub_item] = value
191
+
192
+ query = """INSERT INTO {} ({}) VALUES ({})""".format(outputTableName, col, params)
193
+ self.execute_many(query, main_dict)
194
+
195
+ @retry
196
+ def truncate_table(self, tableName: str) -> None:
197
+ """
198
+ Truncate the table
199
+
200
+ :param tableName: table name
201
+ :return: None
202
+ """
203
+
204
+ truncateQuery = """TRUNCATE TABLE {}""".format(tableName)
205
+ self.execute(truncateQuery)
206
+
207
+ @retry
208
+ def empty_table(self, tableName: str) -> None:
209
+ """
210
+ Empty the table
211
+
212
+ :param tableName: table name
213
+ :return: None
214
+ """
215
+
216
+ deleteQuery = """DELETE FROM {}""".format(tableName)
217
+ self.execute(deleteQuery)
218
+
219
+ def __del__(self) -> None:
220
+ """
221
+ Destructor
222
+
223
+ :return: None
224
+ """
225
+
226
+ self._session_pool.close()