sortition-algorithms 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ """Sortition algorithms for democratic lotteries."""
2
+
3
+ from sortition_algorithms.adapters import CSVAdapter, GSheetAdapter
4
+ from sortition_algorithms.core import (
5
+ find_random_sample,
6
+ run_stratification,
7
+ selected_remaining_tables,
8
+ )
9
+ from sortition_algorithms.features import read_in_features
10
+ from sortition_algorithms.people import read_in_people
11
+ from sortition_algorithms.settings import Settings
12
+
13
+ __all__ = [
14
+ "CSVAdapter",
15
+ "GSheetAdapter",
16
+ "Settings",
17
+ "find_random_sample",
18
+ "read_in_features",
19
+ "read_in_people",
20
+ "run_stratification",
21
+ "selected_remaining_tables",
22
+ ]
@@ -0,0 +1,229 @@
1
+ from pathlib import Path
2
+
3
+ import click
4
+
5
+ from sortition_algorithms import adapters, core, people_features
6
+ from sortition_algorithms.settings import Settings
7
+
8
+
9
+ def echo_all(msgs: list[str]) -> None:
10
+ for msg in msgs:
11
+ click.echo(msg)
12
+
13
+
14
+ @click.group()
15
+ def cli() -> None:
16
+ """A command line tool to exercise the sortition algorithms."""
17
+ pass
18
+
19
+
20
+ @cli.command()
21
+ @click.option(
22
+ "-S",
23
+ "--settings",
24
+ envvar="SORTITION_SETTINGS",
25
+ type=click.Path(dir_okay=False),
26
+ required=True,
27
+ help="Settings for the sortition run. Will auto-create if not present.",
28
+ )
29
+ @click.option(
30
+ "-f",
31
+ "--features-csv",
32
+ type=click.Path(exists=True, dir_okay=False),
33
+ required=True,
34
+ help="Path to CSV with features defined.",
35
+ )
36
+ @click.option(
37
+ "-p",
38
+ "--people-csv",
39
+ type=click.Path(exists=True, dir_okay=False),
40
+ required=True,
41
+ help="Path to CSV with people defined.",
42
+ )
43
+ @click.option(
44
+ "-s",
45
+ "--selected-csv",
46
+ type=click.Path(dir_okay=False, writable=True),
47
+ required=True,
48
+ help="Path to CSV file to write selected people to.",
49
+ )
50
+ @click.option(
51
+ "-r",
52
+ "--remaining-csv",
53
+ type=click.Path(dir_okay=False, writable=True),
54
+ required=True,
55
+ help="Path to CSV file to write remaining people to.",
56
+ )
57
+ @click.option(
58
+ "-n",
59
+ "--number-wanted",
60
+ type=click.IntRange(min=1),
61
+ required=True,
62
+ help="Number of people to select.",
63
+ )
64
+ def csv(
65
+ settings: str,
66
+ features_csv: str,
67
+ people_csv: str,
68
+ selected_csv: str,
69
+ remaining_csv: str,
70
+ number_wanted: int,
71
+ ) -> None:
72
+ """Do sortition with CSV files."""
73
+ adapter = adapters.CSVAdapter()
74
+ settings_obj, msg = Settings.load_from_file(settings_file_path=Path(settings))
75
+ echo_all([msg])
76
+ features, msgs = adapter.load_features_from_file(Path(features_csv))
77
+ echo_all(msgs)
78
+
79
+ people, msgs = adapter.load_people_from_file(Path(people_csv), settings_obj, features)
80
+ echo_all(msgs)
81
+
82
+ success, people_selected, msgs = core.run_stratification(features, people, number_wanted, settings_obj)
83
+ echo_all(msgs)
84
+ if not success:
85
+ raise click.ClickException("Selection not successful, no files written.")
86
+
87
+ selected_rows, remaining_rows, _ = core.selected_remaining_tables(
88
+ people, people_selected[0], features, settings_obj
89
+ )
90
+ with (
91
+ open(selected_csv, "w", newline="") as selected_f,
92
+ open(remaining_csv, "w", newline="") as remaining_f,
93
+ ):
94
+ adapter.selected_file = selected_f
95
+ adapter.remaining_file = remaining_f
96
+ adapter.output_selected_remaining(selected_rows, remaining_rows)
97
+
98
+
99
+ @cli.command()
100
+ @click.option(
101
+ "-S",
102
+ "--settings",
103
+ envvar="SORTITION_SETTINGS",
104
+ type=click.Path(exists=True, dir_okay=False),
105
+ required=True,
106
+ help="Settings for the sortition run. Will auto-create if not present.",
107
+ )
108
+ @click.option(
109
+ "--auth-json-file",
110
+ envvar="SORTITION_GDOC_AUTH",
111
+ type=click.Path(exists=True, dir_okay=False),
112
+ required=True,
113
+ help="Path to file with OAuth2 details to access google account.",
114
+ )
115
+ @click.option("--gen-rem-tab/--no-gen-rem-tab", default=True, help="Generate a 'Remaining' tab.")
116
+ @click.option("-g", "--gsheet-name", required=True, help="Name of GDoc Spreadsheet to use.")
117
+ @click.option(
118
+ "-f",
119
+ "--feature-tab-name",
120
+ default="Categories",
121
+ required=True,
122
+ help="Name of tab containing features/categories.",
123
+ )
124
+ @click.option(
125
+ "-p",
126
+ "--people-tab-name",
127
+ default="Categories",
128
+ required=True,
129
+ help="Name of tab containing people/respondents.",
130
+ )
131
+ @click.option(
132
+ "-s",
133
+ "--selected-tab-name",
134
+ default="Selected",
135
+ required=True,
136
+ help="Name of tab to write selected people to.",
137
+ )
138
+ @click.option(
139
+ "-r",
140
+ "--remaining-tab-name",
141
+ default="Remaining",
142
+ help="Name of tab to write remaining people to.",
143
+ )
144
+ @click.option(
145
+ "-n",
146
+ "--number-wanted",
147
+ type=click.IntRange(min=1),
148
+ required=True,
149
+ help="Number of people to select.",
150
+ )
151
+ def gsheet(
152
+ settings: str,
153
+ auth_json_file: str,
154
+ gen_rem_tab: bool,
155
+ gsheet_name: str,
156
+ feature_tab_name: str,
157
+ people_tab_name: str,
158
+ selected_tab_name: str,
159
+ remaining_tab_name: str,
160
+ number_wanted: int,
161
+ ) -> None:
162
+ """Do sortition with Google Spreadsheets."""
163
+ gen_rem_tab_value = "on" if gen_rem_tab else "off"
164
+ adapter = adapters.GSheetAdapter(Path(auth_json_file), gen_rem_tab_value)
165
+ settings_obj, msg = Settings.load_from_file(settings_file_path=Path(settings))
166
+ echo_all([msg])
167
+
168
+ features, msgs = adapter.load_features(gsheet_name, feature_tab_name)
169
+ echo_all(msgs)
170
+ if features is None:
171
+ raise click.ClickException("Could not load features, exiting.")
172
+
173
+ people, msgs = adapter.load_people(people_tab_name, settings_obj, features)
174
+ echo_all(msgs)
175
+ if people is None:
176
+ raise click.ClickException("Could not load people, exiting.")
177
+
178
+ success, people_selected, msgs = core.run_stratification(features, people, number_wanted, settings_obj)
179
+ echo_all(msgs)
180
+ if not success:
181
+ raise click.ClickException("Selection not successful, no files written.")
182
+
183
+ selected_rows, remaining_rows, _ = core.selected_remaining_tables(
184
+ people, people_selected[0], features, settings_obj
185
+ )
186
+ adapter.selected_tab_name = selected_tab_name
187
+ adapter.remaining_tab_name = remaining_tab_name
188
+ adapter.output_selected_remaining(selected_rows, remaining_rows, settings_obj)
189
+
190
+
191
+ @cli.command()
192
+ @click.option(
193
+ "-S",
194
+ "--settings",
195
+ envvar="SORTITION_SETTINGS",
196
+ type=click.Path(exists=True, dir_okay=False),
197
+ required=True,
198
+ help="Settings for the sortition run. Will auto-create if not present.",
199
+ )
200
+ @click.option(
201
+ "-f",
202
+ "--features-csv",
203
+ type=click.Path(exists=True, dir_okay=False),
204
+ required=True,
205
+ help="Path to CSV with features defined.",
206
+ )
207
+ @click.option(
208
+ "-p",
209
+ "--people-csv",
210
+ type=click.Path(dir_okay=False, writable=True),
211
+ required=True,
212
+ help="Path to CSV to write sample people to.",
213
+ )
214
+ @click.option(
215
+ "-n",
216
+ "--number-wanted",
217
+ type=click.IntRange(min=1),
218
+ required=True,
219
+ help="Number of people to write.",
220
+ )
221
+ def gen_sample(settings: str, features_csv: str, people_csv: str, number_wanted: int) -> None:
222
+ """Generate a sample CSV file of people compatible with features and settings."""
223
+ adapter = adapters.CSVAdapter()
224
+ settings_obj, msg = Settings.load_from_file(settings_file_path=Path(settings))
225
+ echo_all([msg])
226
+ features, msgs = adapter.load_features_from_file(Path(features_csv))
227
+ echo_all(msgs)
228
+ with open(people_csv, "w", newline="") as people_f:
229
+ people_features.create_readable_sample_file(features, people_f, number_wanted, settings_obj)
@@ -0,0 +1,308 @@
1
+ """
2
+ Adapters for loading and saving data.
3
+
4
+ Initially we have CSV files locally, and Google Docs Spreadsheets.
5
+ """
6
+
7
+ import csv
8
+ from collections.abc import Iterable
9
+ from io import StringIO
10
+ from pathlib import Path
11
+ from typing import ClassVar, TextIO
12
+
13
+ import gspread
14
+ from oauth2client.service_account import ServiceAccountCredentials
15
+
16
+ from sortition_algorithms.features import FeatureCollection, read_in_features
17
+ from sortition_algorithms.people import People, read_in_people
18
+ from sortition_algorithms.settings import Settings
19
+
20
+ # TODO: should features_loaded and people_loaded be removed?
21
+ # have external thing keep track of it instead?
22
+
23
+
24
+ def _stringify_records(
25
+ records: Iterable[dict[str, str | int | float]],
26
+ ) -> list[dict[str, str]]:
27
+ new_records: list[dict[str, str]] = []
28
+ for record in records:
29
+ new_records.append({k: str(v) for k, v in record.items()})
30
+ return new_records
31
+
32
+
33
+ class CSVAdapter:
34
+ def __init__(self) -> None:
35
+ self.selected_file: TextIO = StringIO()
36
+ self.remaining_file: TextIO = StringIO()
37
+ self.features_loaded = False
38
+ self.people_loaded = False
39
+ self.enable_selected_file_download = False
40
+ self.enable_remaining_file_download = False
41
+
42
+ def load_features_from_file(
43
+ self,
44
+ features_file: Path,
45
+ ) -> tuple[FeatureCollection, list[str]]:
46
+ with open(features_file, newline="") as csv_file:
47
+ return self._load_features(csv_file)
48
+
49
+ def load_features_from_str(self, file_contents: str) -> tuple[FeatureCollection, list[str]]:
50
+ return self._load_features(StringIO(file_contents))
51
+
52
+ def _load_features(self, file_obj: TextIO) -> tuple[FeatureCollection, list[str]]:
53
+ feature_reader = csv.DictReader(file_obj)
54
+ assert feature_reader.fieldnames is not None
55
+ features, msgs = read_in_features(list(feature_reader.fieldnames), feature_reader)
56
+ self.features_loaded = True
57
+ return features, msgs
58
+
59
+ def load_people_from_file(
60
+ self,
61
+ people_file: Path,
62
+ settings: Settings,
63
+ features: FeatureCollection,
64
+ ) -> tuple[People, list[str]]:
65
+ with open(people_file, newline="") as csv_file:
66
+ return self._load_people(csv_file, settings, features)
67
+
68
+ def load_people_from_str(
69
+ self,
70
+ file_contents: str,
71
+ settings: Settings,
72
+ features: FeatureCollection,
73
+ ) -> tuple[People, list[str]]:
74
+ return self._load_people(StringIO(file_contents), settings, features)
75
+
76
+ def _load_people(
77
+ self,
78
+ file_obj: TextIO,
79
+ settings: Settings,
80
+ features: FeatureCollection,
81
+ ) -> tuple[People, list[str]]:
82
+ people_data = csv.DictReader(file_obj)
83
+ people_str_data = _stringify_records(people_data)
84
+ assert people_data.fieldnames is not None
85
+ people, msgs = read_in_people(list(people_data.fieldnames), people_str_data, features, settings)
86
+ self.people_loaded = True
87
+ return people, msgs
88
+
89
+ def _write_rows(self, out_file: TextIO, rows: list[list[str]]) -> None:
90
+ writer = csv.writer(
91
+ out_file,
92
+ delimiter=",",
93
+ quotechar='"',
94
+ quoting=csv.QUOTE_MINIMAL,
95
+ )
96
+ for row in rows:
97
+ writer.writerow(row)
98
+
99
+ # Actually useful to also write to a file all those who are NOT selected for later selection if people pull out etc
100
+ # BUT, we should not include in this people from the same address as someone who has been selected!
101
+ def output_selected_remaining(
102
+ self,
103
+ people_selected_rows: list[list[str]],
104
+ people_remaining_rows: list[list[str]],
105
+ ) -> None:
106
+ self._write_rows(self.selected_file, people_selected_rows)
107
+ self._write_rows(self.remaining_file, people_remaining_rows)
108
+ # we have succeeded in CSV so can activate buttons in GUI...
109
+ self.enable_selected_file_download = True
110
+ self.enable_remaining_file_download = True
111
+
112
+ def output_multi_selections(
113
+ self,
114
+ multi_selections: list[list[str]],
115
+ ) -> None:
116
+ self._write_rows(self.selected_file, multi_selections)
117
+ # we have succeeded in CSV so can activate buttons in GUI...
118
+ self.enable_selected_file_download = True
119
+
120
+
121
+ class GSheetAdapter:
122
+ scope: ClassVar = [
123
+ "https://spreadsheets.google.com/feeds",
124
+ "https://www.googleapis.com/auth/drive",
125
+ ]
126
+ hl_light_blue: ClassVar = {
127
+ "backgroundColor": {
128
+ "red": 153 / 255,
129
+ "green": 204 / 255,
130
+ "blue": 255 / 255,
131
+ }
132
+ }
133
+ hl_orange: ClassVar = {"backgroundColor": {"red": 5, "green": 2.5, "blue": 0}}
134
+
135
+ def __init__(self, auth_json_path: Path, gen_rem_tab: str = "on") -> None:
136
+ self.auth_json_path = auth_json_path
137
+ self._client: gspread.client.Client | None = None
138
+ self._spreadsheet: gspread.Spreadsheet | None = None
139
+ self.original_selected_tab_name = "Original Selected - output - "
140
+ self.selected_tab_name = "Selected"
141
+ self.columns_selected_first = "C"
142
+ self.column_selected_blank_num = 6
143
+ self.remaining_tab_name = "Remaining - output - "
144
+ self.new_tab_default_size_rows = 2
145
+ self.new_tab_default_size_cols = 40
146
+ self.g_sheet_name = ""
147
+ self._messages: list[str] = []
148
+ self.features_loaded = False
149
+ self.people_loaded = False
150
+ self.gen_rem_tab = gen_rem_tab # Added for checkbox.
151
+
152
+ def messages(self) -> list[str]:
153
+ """Return accumulated messages and reset"""
154
+ messages = self._messages
155
+ self._messages = []
156
+ return messages
157
+
158
+ @property
159
+ def client(self) -> gspread.client.Client:
160
+ if self._client is None:
161
+ creds = ServiceAccountCredentials.from_json_keyfile_name(
162
+ str(self.auth_json_path),
163
+ self.scope,
164
+ )
165
+ self._client = gspread.authorize(creds)
166
+ return self._client
167
+
168
+ @property
169
+ def spreadsheet(self) -> gspread.Spreadsheet:
170
+ if self._spreadsheet is None:
171
+ self._spreadsheet = self.client.open(self.g_sheet_name)
172
+ self._messages.append(f"Opened Google Sheet: '{self.g_sheet_name}'. ")
173
+ return self._spreadsheet
174
+
175
+ def _tab_exists(self, tab_name: str) -> bool:
176
+ if self.spreadsheet is None:
177
+ return False
178
+ tab_list = self.spreadsheet.worksheets()
179
+ return any(tab.title == tab_name for tab in tab_list)
180
+
181
+ def _clear_or_create_tab(self, tab_name: str, other_tab_name: str, inc: int) -> gspread.Worksheet:
182
+ # this now does not clear data but increments the sheet number...
183
+ num = 0
184
+ tab_ready: gspread.Worksheet | None = None
185
+ tab_name_new = f"{tab_name}{num}"
186
+ other_tab_name_new = f"{other_tab_name}{num}"
187
+ while tab_ready is None:
188
+ if self._tab_exists(tab_name_new) or self._tab_exists(other_tab_name_new):
189
+ num += 1
190
+ tab_name_new = f"{tab_name}{num}"
191
+ other_tab_name_new = f"{other_tab_name}{num}"
192
+ else:
193
+ if inc == -1:
194
+ tab_name_new = f"{tab_name}{num - 1}"
195
+ tab_ready = self.spreadsheet.add_worksheet(
196
+ title=tab_name_new,
197
+ rows=self.new_tab_default_size_rows,
198
+ cols=self.new_tab_default_size_cols,
199
+ )
200
+ return tab_ready
201
+
202
+ def load_features(self, g_sheet_name: str, feature_tab_name: str) -> tuple[FeatureCollection | None, list[str]]:
203
+ self.g_sheet_name = g_sheet_name
204
+ features: FeatureCollection | None = None
205
+ try:
206
+ if not self._tab_exists(feature_tab_name):
207
+ self._messages.append(f"Error in Google sheet: no tab called '{feature_tab_name}' found. ")
208
+ return None, self.messages()
209
+ except gspread.SpreadsheetNotFound:
210
+ self._messages.append(f"Google spreadsheet not found: {self.g_sheet_name}. ")
211
+ return None, self.messages()
212
+ tab_features = self.spreadsheet.worksheet(feature_tab_name)
213
+ feature_head = tab_features.row_values(1)
214
+ feature_body = _stringify_records(tab_features.get_all_records(expected_headers=[]))
215
+ features, msgs = read_in_features(feature_head, feature_body)
216
+ self.features_loaded = True
217
+ self._messages += msgs
218
+ return features, self.messages()
219
+
220
+ def load_people(
221
+ self,
222
+ respondents_tab_name: str,
223
+ settings: Settings,
224
+ features: FeatureCollection,
225
+ ) -> tuple[People | None, list[str]]:
226
+ self._messages = []
227
+ people: People | None = None
228
+ try:
229
+ if not self._tab_exists(respondents_tab_name):
230
+ self._messages.append(
231
+ f"Error in Google sheet: no tab called '{respondents_tab_name}' found. ",
232
+ )
233
+ return None, self.messages()
234
+ except gspread.SpreadsheetNotFound:
235
+ self._messages.append(f"Google spreadsheet not found: {self.g_sheet_name}. ")
236
+ return None, self.messages()
237
+
238
+ tab_people = self.spreadsheet.worksheet(respondents_tab_name)
239
+ # if we don't read this in here we can't check if there are 2 columns with the same name
240
+ people_head = tab_people.row_values(1)
241
+ # the numericise_ignore doesn't convert the phone numbers to ints...
242
+ # 1 Oct 2024: the final argument with expected_headers is to deal with the fact that
243
+ # updated versions of gspread can't cope with duplicate headers
244
+ people_body = _stringify_records(
245
+ tab_people.get_all_records(
246
+ numericise_ignore=["all"],
247
+ expected_headers=[],
248
+ )
249
+ )
250
+ self._messages.append(f"Reading in '{respondents_tab_name}' tab in above Google sheet.")
251
+ people, msgs = read_in_people(people_head, people_body, features, settings)
252
+ self._messages += msgs
253
+ self.people_loaded = True
254
+ return people, self.messages()
255
+
256
+ def output_selected_remaining(
257
+ self,
258
+ people_selected_rows: list[list[str]],
259
+ people_remaining_rows: list[list[str]],
260
+ settings: Settings,
261
+ ) -> list[int]:
262
+ tab_original_selected = self._clear_or_create_tab(
263
+ self.original_selected_tab_name,
264
+ self.remaining_tab_name,
265
+ 0,
266
+ )
267
+ tab_original_selected.update(people_selected_rows)
268
+ tab_original_selected.format("A1:U1", self.hl_light_blue)
269
+ dupes: list[int] = []
270
+ if self.gen_rem_tab == "on":
271
+ tab_remaining = self._clear_or_create_tab(
272
+ self.remaining_tab_name,
273
+ self.original_selected_tab_name,
274
+ -1,
275
+ )
276
+ tab_remaining.update(people_remaining_rows)
277
+ tab_remaining.format("A1:U1", self.hl_light_blue)
278
+ # highlight any people in remaining tab at the same address
279
+ # TODO: do we ever actually hit this code? We should have deleted
280
+ # all the people who might have been duplicates in selected_remaining_tables()
281
+ if settings.check_same_address:
282
+ address_cols: list[int] = [tab_remaining.find(csa).col for csa in settings.check_same_address_columns] # type: ignore[union-attr]
283
+ dupes_set: set[int] = set()
284
+ n = len(people_remaining_rows)
285
+ for i in range(n):
286
+ rowrem1 = people_remaining_rows[i]
287
+ for j in range(i + 1, n):
288
+ rowrem2 = people_remaining_rows[j]
289
+ if rowrem1 != rowrem2 and all(rowrem1[col] == rowrem2[col] for col in address_cols):
290
+ dupes_set.add(i + 1)
291
+ dupes_set.add(j + 1)
292
+ dupes = sorted(dupes_set)
293
+ for i in range(min(30, len(dupes))):
294
+ tab_remaining.format(str(dupes[i]), self.hl_orange)
295
+ return dupes
296
+
297
+ def output_multi_selections(
298
+ self,
299
+ multi_selections: list[list[str]],
300
+ ) -> None:
301
+ assert self.gen_rem_tab == "off"
302
+ tab_original_selected = self._clear_or_create_tab(
303
+ self.original_selected_tab_name,
304
+ "ignoreme",
305
+ 0,
306
+ )
307
+ tab_original_selected.update(multi_selections)
308
+ tab_original_selected.format("A1:U1", self.hl_light_blue)