sortinghat-eclipse-foundation 0.1.2rc1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/PKG-INFO +2 -2
  2. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/README.md +1 -1
  3. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/pyproject.toml +1 -1
  4. sortinghat_eclipse_foundation-0.2.0/sortinghat/core/importer/backends/_version.py +2 -0
  5. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/sortinghat/core/importer/backends/eclipse.py +101 -52
  6. sortinghat_eclipse_foundation-0.1.2rc1/sortinghat/core/importer/backends/_version.py +0 -2
  7. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/AUTHORS +0 -0
  8. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/LICENSE.txt +0 -0
  9. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/__init__.py +0 -0
  10. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/data/eclipse_accounts_page_1.json +0 -0
  11. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/data/eclipse_accounts_page_2.json +0 -0
  12. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/data/eclipse_accounts_page_3.json +0 -0
  13. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/data/eclipse_jdoe.json +0 -0
  14. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/data/eclipse_jdoe_employment.json +0 -0
  15. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/data/eclipse_jrae.json +0 -0
  16. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/data/eclipse_jrae_employment.json +0 -0
  17. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/data/eclipse_jsmith.json +0 -0
  18. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/data/eclipse_jsmith_employment.json +0 -0
  19. {sortinghat_eclipse_foundation-0.1.2rc1 → sortinghat_eclipse_foundation-0.2.0}/tests/test_eclipse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sortinghat-eclipse-foundation
3
- Version: 0.1.2rc1
3
+ Version: 0.2.0
4
4
  Summary: SortingHat backend to import identities from the Eclipse Foundation
5
5
  License-File: AUTHORS
6
6
  License-File: LICENSE.txt
@@ -22,7 +22,7 @@ SortingHat backend to import identities from Eclipse Foundation
22
22
 
23
23
  ## Requirements
24
24
 
25
- - Python >= 3.9
25
+ - Python >= 3.10
26
26
 
27
27
  You will also need some other libraries for running the tool, you can find the
28
28
  whole list of dependencies in [pyproject.toml](pyproject.toml) file.
@@ -4,7 +4,7 @@ SortingHat backend to import identities from Eclipse Foundation
4
4
 
5
5
  ## Requirements
6
6
 
7
- - Python >= 3.9
7
+ - Python >= 3.10
8
8
 
9
9
  You will also need some other libraries for running the tool, you can find the
10
10
  whole list of dependencies in [pyproject.toml](pyproject.toml) file.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sortinghat-eclipse-foundation"
3
- version = "0.1.2-rc.1"
3
+ version = "0.2.0"
4
4
  description = "SortingHat backend to import identities from the Eclipse Foundation"
5
5
  authors = [
6
6
  {name = "Bitergia Developers"}
@@ -0,0 +1,2 @@
1
+ # File auto-generated by semverup on 2025-11-11 14:58:49.929536
2
+ __version__ = "0.2.0"
@@ -16,10 +16,13 @@
16
16
  #
17
17
 
18
18
  import logging
19
+ import time
19
20
 
20
21
  import dateutil.relativedelta
21
22
  import requests
22
23
 
24
+ from concurrent.futures import ThreadPoolExecutor, as_completed, wait, FIRST_COMPLETED
25
+
23
26
  from django.conf import settings
24
27
  from django.db.models import (Q, Subquery)
25
28
 
@@ -46,6 +49,11 @@ from sortinghat.core import models as sh_models
46
49
  ECLIPSE_SOURCE = "eclipsefdn"
47
50
  GITHUB_SOURCE = "github"
48
51
 
52
+ # Parallel processing
53
+ MAX_WORKERS = 8
54
+ MAX_QUEUE_SIZE = 100
55
+
56
+ REQUEST_TIMEOUT = 30
49
57
 
50
58
  logger = logging.getLogger(__name__)
51
59
 
@@ -73,7 +81,7 @@ class EclipseFoundationAccountsImporter(IdentitiesImporter):
73
81
  """
74
82
  NAME = "EclipseFoundation"
75
83
 
76
- def __init__(self, ctx, url, from_date=None):
84
+ def __init__(self, ctx, url, from_date=None, from_page=1):
77
85
  super().__init__(ctx, url)
78
86
 
79
87
  min_date = datetime_utcnow() - dateutil.relativedelta.relativedelta(years=1)
@@ -85,6 +93,11 @@ class EclipseFoundationAccountsImporter(IdentitiesImporter):
85
93
  else:
86
94
  self.from_date = from_date
87
95
 
96
+ if not from_page:
97
+ self.from_page = 1
98
+ else:
99
+ self.from_page = int(from_page)
100
+
88
101
  if self.from_date < min_date:
89
102
  msg = (
90
103
  "Invalid 'from_date' value. It can only import identities updated since a year ago."
@@ -104,68 +117,99 @@ class EclipseFoundationAccountsImporter(IdentitiesImporter):
104
117
 
105
118
  epoch = int(self.from_date.timestamp())
106
119
 
107
- # Fetch accounts pages
108
- for account in client.fetch_accounts(epoch=epoch):
109
- ef_profile = client.fetch_account_profile(account['name'])
120
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
121
+ pending = set()
122
+
123
+ for account in client.fetch_accounts(epoch=epoch, from_page=self.from_page):
124
+ future = executor.submit(self.process_account, client, account)
125
+ pending.add(future)
126
+
127
+ # Wait to complete when reaching max queue size
128
+ if len(pending) >= MAX_QUEUE_SIZE:
129
+ done, pending = wait(pending, return_when=FIRST_COMPLETED, timeout=120)
130
+ if not done:
131
+ raise TimeoutError("Timeout waiting for Eclipse account processing")
132
+ for future in done:
133
+ try:
134
+ individual = future.result()
135
+ if individual:
136
+ yield individual
137
+ except Exception as exc:
138
+ logger.error(f"Error processing Eclipse account {account}; error={exc}")
139
+
140
+ # Process remaining futures
141
+ for future in as_completed(pending, timeout=600):
142
+ try:
143
+ individual = future.result()
144
+ if individual:
145
+ yield individual
146
+ except Exception as exc:
147
+ logger.error(f"Error processing Eclipse account; error={exc}")
110
148
 
111
- if not ef_profile:
112
- continue
149
+ @staticmethod
150
+ def process_account(client, account):
151
+ """Process a single Eclipse account to create an Individual."""
113
152
 
114
- individual = Individual(uuid=ef_profile['uid'])
153
+ ef_profile = client.fetch_account_profile(account['name'])
115
154
 
116
- name = ef_profile['first_name'] + ' ' + ef_profile['last_name']
117
- email = ef_profile['mail']
155
+ if not ef_profile:
156
+ return None
118
157
 
119
- prf = Profile()
120
- prf.name = name
121
- prf.email = email
158
+ individual = Individual(uuid=ef_profile['uid'])
122
159
 
123
- individual.profile = prf
160
+ name = ef_profile['first_name'] + ' ' + ef_profile['last_name']
161
+ email = ef_profile['mail']
124
162
 
125
- eclipse_id = Identity(
126
- source=ECLIPSE_SOURCE,
163
+ prf = Profile()
164
+ prf.name = name
165
+ prf.email = email
166
+
167
+ individual.profile = prf
168
+
169
+ eclipse_id = Identity(
170
+ source=ECLIPSE_SOURCE,
171
+ name=name,
172
+ email=email,
173
+ username=ef_profile['name'],
174
+ )
175
+ individual.identities.append(eclipse_id)
176
+
177
+ if ef_profile['github_handle']:
178
+ idt = Identity(
179
+ source=GITHUB_SOURCE,
127
180
  name=name,
181
+ username=ef_profile['github_handle'],
128
182
  email=email,
129
- username=ef_profile['name'],
130
183
  )
131
- individual.identities.append(eclipse_id)
132
-
133
- if ef_profile['github_handle']:
134
- idt = Identity(
135
- source=GITHUB_SOURCE,
136
- name=name,
137
- username=ef_profile['github_handle'],
138
- email=email,
139
- )
140
- individual.identities.append(idt)
184
+ individual.identities.append(idt)
141
185
 
142
- # Fetch enrollments for the identity. If no enrollment is set
143
- # use the organization field from the profile, if set.
144
- employment_history = client.fetch_employment_history(account['name'])
186
+ # Fetch enrollments for the identity. If no enrollment is set
187
+ # use the organization field from the profile, if set.
188
+ employment_history = client.fetch_employment_history(account['name'])
145
189
 
146
- if employment_history:
147
- for employment in employment_history:
148
- org = Organization(name=employment['organization_name'])
149
- start, end = None, None
190
+ if employment_history:
191
+ for employment in employment_history:
192
+ org = Organization(name=employment['organization_name'])
193
+ start, end = None, None
150
194
 
151
- if employment['start']:
152
- start = str_to_datetime(employment['start'])
153
- if employment['end']:
154
- end = str_to_datetime(employment['end'])
195
+ if employment['start']:
196
+ start = str_to_datetime(employment['start'])
197
+ if employment['end']:
198
+ end = str_to_datetime(employment['end'])
155
199
 
156
- enr = Enrollment(org, start=start, end=end)
157
- individual.enrollments.append(enr)
200
+ enr = Enrollment(org, start=start, end=end)
201
+ individual.enrollments.append(enr)
158
202
 
159
- if not individual.enrollments:
160
- company = ef_profile.get('org', None)
161
- if company:
162
- org = Organization(name=company)
163
- enr = Enrollment(org)
164
- individual.enrollments.append(enr)
203
+ if not individual.enrollments:
204
+ company = ef_profile.get('org', None)
205
+ if company:
206
+ org = Organization(name=company)
207
+ enr = Enrollment(org)
208
+ individual.enrollments.append(enr)
165
209
 
166
- logger.info(f"Eclipse account processed; account={account['name']}; changed={account['changed']}")
210
+ logger.info(f"Eclipse account processed; account={account['name']}; changed={account['changed']}")
167
211
 
168
- yield individual
212
+ return individual
169
213
 
170
214
  def post_process_individual(self, individual, uuid):
171
215
  """Post processing for Eclipse identities.
@@ -235,10 +279,10 @@ class EclipseFoundationAPIClient:
235
279
 
236
280
  self.token = None
237
281
 
238
- def fetch_accounts(self, epoch):
282
+ def fetch_accounts(self, epoch, from_page=1):
239
283
  """Fetch accounts updated from a given UNIX time."""
240
284
 
241
- page = 1
285
+ page = from_page
242
286
  total_accounts = 0
243
287
 
244
288
  logger.info(f"Fetching accounts from API; url={self.ECLIPSE_ACCOUNTS_URL}, epoch={epoch}")
@@ -249,10 +293,14 @@ class EclipseFoundationAPIClient:
249
293
  'since': epoch,
250
294
  'page': page,
251
295
  'pagesize': 100,
296
+ 'sortby': 'uid',
252
297
  }
253
298
 
254
299
  logger.debug(f"Fetching accounts from API; url={url}, params={params}")
255
300
  data = self._fetch(url, params=params)
301
+ if not data:
302
+ logger.error(f"No data returned from API; url={url}, params={params}")
303
+ continue
256
304
 
257
305
  for account in data['result']:
258
306
  yield account
@@ -316,7 +364,7 @@ class EclipseFoundationAPIClient:
316
364
 
317
365
  while retries < max_retries:
318
366
  try:
319
- response = requests.get(url, params=params, auth=self.token)
367
+ response = requests.get(url, params=params, auth=self.token, timeout=REQUEST_TIMEOUT)
320
368
  except ExpiredAccessToken:
321
369
  # Refresh token and try again
322
370
  self.login(self.user_id, self.password)
@@ -332,13 +380,14 @@ class EclipseFoundationAPIClient:
332
380
  elif 500 <= response.status_code < 600:
333
381
  # Errors could have been related to server overloading
334
382
  retries += 1
383
+ time.sleep(2 ** retries)
335
384
  else:
336
385
  response.raise_for_status()
337
386
 
338
- response = requests.get(url, params=params, auth=self.token)
387
+ response = requests.get(url, params=params, auth=self.token, timeout=REQUEST_TIMEOUT)
339
388
  response.raise_for_status()
340
389
 
341
- return response
390
+ return response.json()
342
391
 
343
392
  def _authenticate(self, client_id, client_secret, scope):
344
393
  """Authenticate using OAuth2.
@@ -1,2 +0,0 @@
1
- # File auto-generated by semverup on 2025-10-31 13:33:01.052904
2
- __version__ = "0.1.2-rc.1"