webengage-migration 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- historical_data_migration/__init__.py +0 -0
- historical_data_migration/migration.py +196 -0
- webengage_migration-0.1.0.dist-info/METADATA +123 -0
- webengage_migration-0.1.0.dist-info/RECORD +7 -0
- webengage_migration-0.1.0.dist-info/WHEEL +5 -0
- webengage_migration-0.1.0.dist-info/entry_points.txt +2 -0
- webengage_migration-0.1.0.dist-info/top_level.txt +1 -0
File without changes
|
@@ -0,0 +1,196 @@
|
|
1
|
+
import csv
|
2
|
+
import json
|
3
|
+
import tempfile
|
4
|
+
import os
|
5
|
+
import requests
|
6
|
+
import time
|
7
|
+
import argparse
|
8
|
+
from datetime import datetime
|
9
|
+
|
10
|
+
def process_csv_and_send_batches(type, filename, datacenter, license_code):
|
11
|
+
print("đ Step 1: Preparing JSON to the file...!")
|
12
|
+
col_length=0
|
13
|
+
custom_name=""
|
14
|
+
json_obj={}
|
15
|
+
# Step 1: Process CSV and write JSONs to new column in temp file
|
16
|
+
with tempfile.NamedTemporaryFile(mode='w', delete=False, newline='', encoding='utf-8') as temp_output:
|
17
|
+
with open(filename, mode='r', encoding='utf-8-sig', newline='') as original_file:
|
18
|
+
reader = csv.reader(original_file)
|
19
|
+
writer = csv.writer(temp_output)
|
20
|
+
|
21
|
+
header = next(reader)
|
22
|
+
new_header = ["converted JSON"] + header
|
23
|
+
writer.writerow(new_header)
|
24
|
+
|
25
|
+
for row in reader:
|
26
|
+
if type=="events":
|
27
|
+
col_length=3
|
28
|
+
custom_name="eventData"
|
29
|
+
json_obj = {
|
30
|
+
header[0]: row[0],
|
31
|
+
header[1]: row[1],
|
32
|
+
header[2]: row[2],
|
33
|
+
"eventData": {}
|
34
|
+
}
|
35
|
+
for i in range(col_length, len(header)):
|
36
|
+
json_obj[f"{custom_name}"][header[i]] = datatype(row[i])
|
37
|
+
|
38
|
+
|
39
|
+
if type=="users":
|
40
|
+
col_length=11
|
41
|
+
custom_name="attributes"
|
42
|
+
json_obj={
|
43
|
+
header[0]: row[0],
|
44
|
+
header[1]: row[1],
|
45
|
+
header[2]: row[2],
|
46
|
+
header[3]: birthDate(row[3]),
|
47
|
+
header[4]: row[4],
|
48
|
+
header[5]: row[5],
|
49
|
+
header[6]: row[6],
|
50
|
+
header[7]: row[7],
|
51
|
+
header[8]: row[8],
|
52
|
+
header[9]: row[9],
|
53
|
+
header[10]: row[10]
|
54
|
+
}
|
55
|
+
if len(header) > 11:
|
56
|
+
json_obj["attributes"] = {}
|
57
|
+
for i in range(col_length, len(header)):
|
58
|
+
json_obj[f"{custom_name}"][header[i]] = datatype(row[i])
|
59
|
+
|
60
|
+
json_str = json.dumps(json_obj, indent=2)
|
61
|
+
writer.writerow([json_str] + row)
|
62
|
+
|
63
|
+
# Updating the CSV with JSOn data
|
64
|
+
os.replace(temp_output.name, filename)
|
65
|
+
print(f"â
JSON creation complete: {filename}")
|
66
|
+
|
67
|
+
print("\nđ Step 2: Sending data in batches to WebEngage...!")
|
68
|
+
|
69
|
+
# Step 2: Read the new file and send events in batches
|
70
|
+
send_json_batches_from_csv(type, filename, datacenter, license_code)
|
71
|
+
|
72
|
+
def send_json_batches_from_csv(type, csv_filename, datacenter, licence_code):
|
73
|
+
|
74
|
+
|
75
|
+
apikey = input("Please enter your API Key: ")
|
76
|
+
|
77
|
+
if datacenter=="default":
|
78
|
+
url = f"https://api.webengage.com/v1/accounts/{licence_code}/bulk-{type}"
|
79
|
+
else:
|
80
|
+
url = f"https://api.{datacenter}.webengage.com/v1/accounts/{licence_code}/bulk-{type}"
|
81
|
+
|
82
|
+
headers = {
|
83
|
+
'Authorization': f'Bearer {apikey}',
|
84
|
+
'Content-Type': 'application/json'
|
85
|
+
}
|
86
|
+
|
87
|
+
batch_size = 25
|
88
|
+
current_batch = []
|
89
|
+
row_numbers = []
|
90
|
+
total_batches = 0
|
91
|
+
|
92
|
+
with open(csv_filename, mode='r', encoding='utf-8-sig') as file:
|
93
|
+
reader = csv.reader(file)
|
94
|
+
|
95
|
+
next(reader) # skipping the header row
|
96
|
+
row_number = 1
|
97
|
+
|
98
|
+
for row in reader:
|
99
|
+
row_number += 1
|
100
|
+
json_str = row[0].strip()
|
101
|
+
|
102
|
+
if not json_str:
|
103
|
+
print(f"â ď¸ Skipping row {row_number} - empty JSON")
|
104
|
+
continue
|
105
|
+
|
106
|
+
try:
|
107
|
+
event = json.loads(json_str)
|
108
|
+
current_batch.append(event)
|
109
|
+
row_numbers.append(row_number)
|
110
|
+
except json.JSONDecodeError as e:
|
111
|
+
print(f"â ď¸ Skipping row {row_number} due to JSON decode error: {e}")
|
112
|
+
continue
|
113
|
+
|
114
|
+
if len(current_batch) == batch_size:
|
115
|
+
total_batches += 1
|
116
|
+
send_batch(type, current_batch, row_numbers, url, headers, total_batches)
|
117
|
+
current_batch = []
|
118
|
+
row_numbers = []
|
119
|
+
time.sleep(0.15) # ~6.6 requests/sec = 396 req/min (safe)
|
120
|
+
|
121
|
+
# Final leftover batch
|
122
|
+
if current_batch:
|
123
|
+
total_batches += 1
|
124
|
+
send_batch(type, current_batch, row_numbers, url, headers, total_batches)
|
125
|
+
|
126
|
+
print(f"\nâ
All batches processed successfully. Total sent: {total_batches}")
|
127
|
+
|
128
|
+
def send_batch(type, batch, row_numbers, url, headers, batch_number):
|
129
|
+
payload = {f"{type}": batch}
|
130
|
+
|
131
|
+
try:
|
132
|
+
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
133
|
+
if response.status_code == 200 or response.status_code == 201:
|
134
|
+
print(f"â
Batch {batch_number} with rows {row_numbers} sent successfully.")
|
135
|
+
else:
|
136
|
+
print(f"â Batch {batch_number} failed with status code {response.status_code}. Trying individual rows...")
|
137
|
+
# Try sending each row individually
|
138
|
+
for i, single_event in enumerate(batch):
|
139
|
+
row_num = row_numbers[i]
|
140
|
+
single_payload = {f"{type}": [single_event]}
|
141
|
+
try:
|
142
|
+
r = requests.post(url, headers=headers, data=json.dumps(single_payload))
|
143
|
+
if r.status_code == 200 or r.status_code == 201:
|
144
|
+
print(f" â
Row {row_num} sent successfully.")
|
145
|
+
else:
|
146
|
+
print(f" â Row {row_num} failed with status {r.status_code}: {r.text}")
|
147
|
+
except Exception as e:
|
148
|
+
print(f" â Row {row_num} failed due to error: {e}")
|
149
|
+
|
150
|
+
except Exception as e:
|
151
|
+
print(f"â Batch {batch_number} (rows {row_numbers}) failed due to error: {e}")
|
152
|
+
|
153
|
+
|
154
|
+
def datatype(value):
|
155
|
+
value = value.strip()
|
156
|
+
if value.lower() == 'true':
|
157
|
+
return True
|
158
|
+
if value.lower() == 'false':
|
159
|
+
return False
|
160
|
+
|
161
|
+
try:
|
162
|
+
return int(value)
|
163
|
+
except ValueError:
|
164
|
+
try:
|
165
|
+
return float(value)
|
166
|
+
except ValueError:
|
167
|
+
return value # keep as string
|
168
|
+
|
169
|
+
|
170
|
+
def birthDate(value):
|
171
|
+
value=value.strip()
|
172
|
+
|
173
|
+
if value=="":
|
174
|
+
return ""
|
175
|
+
else:
|
176
|
+
date_obj = datetime.strptime(value.strip(), "%d-%m-%Y")
|
177
|
+
return f"{date_obj.strftime("%Y-%m-%d")}T11:11:00-0800"
|
178
|
+
|
179
|
+
|
180
|
+
def main():
|
181
|
+
parser = argparse.ArgumentParser(description="Migrate historical data to WebEngage for the provided license code and datacenter.")
|
182
|
+
|
183
|
+
parser.add_argument("-f", "--filename", required=True, help="CSV filename with extension.")
|
184
|
+
parser.add_argument("-d", "--datacenter", default="default", help="(optional) provide datacenter, default set to global.")
|
185
|
+
parser.add_argument("-lc", "--license_code", required=True, help="WebEngage account license code.")
|
186
|
+
parser.add_argument("--migrate", choices=["users", "events"], required=True, help="use this flag to start the migration.")
|
187
|
+
|
188
|
+
args = parser.parse_args()
|
189
|
+
|
190
|
+
if args.migrate in ["users", "events"]:
|
191
|
+
process_csv_and_send_batches(args.migrate, args.filename, args.datacenter, args.license_code)
|
192
|
+
else:
|
193
|
+
print("â Migration not started. Use --migrate to initiate the migration.")
|
194
|
+
|
195
|
+
if __name__=="__main__":
|
196
|
+
main()
|
@@ -0,0 +1,123 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: webengage-migration
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Webengage internal tool to migrate other party historical data into webengage ecosystem
|
5
|
+
Home-page: https://www.webengage.com/
|
6
|
+
Author: Nipun Patel
|
7
|
+
Author-email: nipunp27@gmail.com
|
8
|
+
License: MIT
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
11
|
+
Classifier: Operating System :: OS Independent
|
12
|
+
Requires-Python: >=3.6
|
13
|
+
Description-Content-Type: text/markdown
|
14
|
+
Requires-Dist: requests
|
15
|
+
|
16
|
+
# WebEngage Migration Tool
|
17
|
+
|
18
|
+
A lightweight tool to migrate historical user or event data into WebEngage ecosystem. Supports large datasets and can handle millions of rows, automatic batching, and adhere to WebEngage API rate limits.
|
19
|
+
|
20
|
+
---
|
21
|
+
|
22
|
+
## Features
|
23
|
+
|
24
|
+
- Handles large files efficiently (2M+ rows)
|
25
|
+
- Batches events or users into groups of 25 adhering API rate limits
|
26
|
+
- Supports multiple datacenter routing (in, ksa, us)
|
27
|
+
- Built-in logger for tracking batch status - (success / failure)
|
28
|
+
|
29
|
+
---
|
30
|
+
|
31
|
+
## Instructions
|
32
|
+
|
33
|
+
- Data should always be in **CSV** format for both **events** and **users**.
|
34
|
+
|
35
|
+
- **Delete the first column ("converted JSON")** before saving the file if you plan to **resend** the data to WebEngage.
|
36
|
+
|
37
|
+
#### Sending Users
|
38
|
+
|
39
|
+
- **Column 4**: Reserve this column for Date of Birth. The header name should be **"birthDate"**
|
40
|
+
|
41
|
+
- **Columns 1 to 11**: should contain the following fields: **userId**, **firstName**, **lastName**, **birthDate**, **gender**, **email**, **phone**, **company**, **city**, **region**, **country** You can shuffle these columns except for **birthDate**, which will always remain in **column 4**
|
42
|
+
|
43
|
+
- **birthDate** values should follow this format: `yyyy-mm-dd` (eg: 1999-12-25)
|
44
|
+
|
45
|
+
- **Columns 12 onwards** will contain **custom user attributes**.
|
46
|
+
|
47
|
+
|
48
|
+
#### Sending Events
|
49
|
+
|
50
|
+
- **Columns 1 to 3**: should contain the following fields: **userId**, **eventName**, **eventTime**. Shuffling of the columns is allowed within this range.
|
51
|
+
|
52
|
+
- **eventTime** should follow this format: `yyyy-mm-ddTHH:MM:SS-0800` (eg: 1986-08-19T15:45:00-0800).
|
53
|
+
|
54
|
+
- **Columns 4 onwards** will contain **event data**.
|
55
|
+
|
56
|
+
---
|
57
|
+
|
58
|
+
## Installation
|
59
|
+
|
60
|
+
|
61
|
+
To install the package run:
|
62
|
+
|
63
|
+
<pre style="font-size: 16px;">
|
64
|
+
pip install webengage-migration
|
65
|
+
</pre>
|
66
|
+
|
67
|
+
---
|
68
|
+
|
69
|
+
## Usage
|
70
|
+
|
71
|
+
To initiate the migration process run the below command:
|
72
|
+
|
73
|
+
<pre style="font-size: 16px;">
|
74
|
+
we --migrate {users/events} -f "{filename.csv}" -d {datacenter} -lc {license_code}
|
75
|
+
</pre>
|
76
|
+
|
77
|
+
<br>
|
78
|
+
|
79
|
+
| Argument | Description | Type |
|
80
|
+
|--------------------|-------------------------------------------------------|------------------------------------|
|
81
|
+
| `{users/events}` | Type of data you're migrating (`users` or `events`) | â ď¸ Required |
|
82
|
+
| `-f "filename.csv"`| Path to the input CSV file | â ď¸ Required |
|
83
|
+
| `-d {datacenter}` | WebEngage datacenter (`in`, `ksa`, default is `us`) | âď¸ Optional |
|
84
|
+
| `-lc {license_code}`| Your WebEngage license code | â ď¸ Required |
|
85
|
+
|
86
|
+
<br>
|
87
|
+
|
88
|
+
**Example:**
|
89
|
+
|
90
|
+
|
91
|
+
**Send users**: `we --migrate users -f "datafile.csv" -d in -lc in~~1234c456`
|
92
|
+
|
93
|
+
**Send events**: `we --migrate events -f "datafile.csv" -d in -lc in~~1234c456`
|
94
|
+
|
95
|
+
<br>
|
96
|
+
|
97
|
+
Once migration is initiated, provide the **API key for authorization**, **API key** will be found in **webengage dashboard for provided license code** under:
|
98
|
+
|
99
|
+
***Data Platforms â Integrations â Rest API â `API KEY`***
|
100
|
+
|
101
|
+

|
102
|
+
|
103
|
+
---
|
104
|
+
|
105
|
+
|
106
|
+
## WebEngage API Rate Limits
|
107
|
+
|
108
|
+
WebEngage limits bulk event / users API usage to:
|
109
|
+
|
110
|
+
- **25 events / users** per API request
|
111
|
+
- **500 requests** per minute (Total **12,500 events** per minute)
|
112
|
+
|
113
|
+
This tool automatically respects the above limits by batching and throttling requests.
|
114
|
+
|
115
|
+
---
|
116
|
+
|
117
|
+
## Legal Notice
|
118
|
+
|
119
|
+
This tool is an **internal property** of **WebEngage** and is strictly for **migration purposes**. It is owned by **Nipun Patel (Copyright)** and any misuse, unauthorized distribution, or external sharing will lead to **legal consequences**.
|
120
|
+
|
121
|
+
---
|
122
|
+
|
123
|
+
Š WebEngage. All rights reserved.
|
@@ -0,0 +1,7 @@
|
|
1
|
+
historical_data_migration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
historical_data_migration/migration.py,sha256=kMEMzJkzIy5ByxdIzl8NUWaZ2xVLpoOWceGUEPljxfw,7606
|
3
|
+
webengage_migration-0.1.0.dist-info/METADATA,sha256=ITKobxmAppLvZyDUI1ZQ1szE8ddHSu_fmNmWyHFhloY,4310
|
4
|
+
webengage_migration-0.1.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
5
|
+
webengage_migration-0.1.0.dist-info/entry_points.txt,sha256=G8Cemkeiyux-EdOQm3dwgld2cJJWAamJrlshhvcyzVo,64
|
6
|
+
webengage_migration-0.1.0.dist-info/top_level.txt,sha256=WcrA3bD35D8SpxbNNnAy0T1ztVO2L2Esk7yPjViNCcE,26
|
7
|
+
webengage_migration-0.1.0.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
historical_data_migration
|