PyPI - sortition-algorithms - Versions diffs - 0.9.0__py3-none-any.whl - Mend

sortition-algorithms 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

sortition_algorithms/__init__.py +22 -0
sortition_algorithms/__main__.py +229 -0
sortition_algorithms/adapters.py +308 -0
sortition_algorithms/committee_generation.py +1377 -0
sortition_algorithms/core.py +591 -0
sortition_algorithms/errors.py +26 -0
sortition_algorithms/features.py +351 -0
sortition_algorithms/find_sample.py +93 -0
sortition_algorithms/people.py +175 -0
sortition_algorithms/people_features.py +288 -0
sortition_algorithms/settings.py +110 -0
sortition_algorithms/utils.py +107 -0
sortition_algorithms-0.9.0.dist-info/METADATA +191 -0
sortition_algorithms-0.9.0.dist-info/RECORD +17 -0
sortition_algorithms-0.9.0.dist-info/WHEEL +4 -0
sortition_algorithms-0.9.0.dist-info/entry_points.txt +2 -0
sortition_algorithms-0.9.0.dist-info/licenses/LICENSE +674 -0

sortition_algorithms/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""Sortition algorithms for democratic lotteries."""
+from sortition_algorithms.adapters import CSVAdapter, GSheetAdapter
+from sortition_algorithms.core import (
+    find_random_sample,
+    run_stratification,
+    selected_remaining_tables,
+)
+from sortition_algorithms.features import read_in_features
+from sortition_algorithms.people import read_in_people
+from sortition_algorithms.settings import Settings
+__all__ = [
+    "CSVAdapter",
+    "GSheetAdapter",
+    "Settings",
+    "find_random_sample",
+    "read_in_features",
+    "read_in_people",
+    "run_stratification",
+    "selected_remaining_tables",
+]

sortition_algorithms/__main__.py ADDED Viewed

@@ -0,0 +1,229 @@
+from pathlib import Path
+import click
+from sortition_algorithms import adapters, core, people_features
+from sortition_algorithms.settings import Settings
+def echo_all(msgs: list[str]) -> None:
+    for msg in msgs:
+        click.echo(msg)
+@click.group()
+def cli() -> None:
+    """A command line tool to exercise the sortition algorithms."""
+    pass
+@cli.command()
+@click.option(
+    "-S",
+    "--settings",
+    envvar="SORTITION_SETTINGS",
+    type=click.Path(dir_okay=False),
+    required=True,
+    help="Settings for the sortition run. Will auto-create if not present.",
+)
+@click.option(
+    "-f",
+    "--features-csv",
+    type=click.Path(exists=True, dir_okay=False),
+    required=True,
+    help="Path to CSV with features defined.",
+)
+@click.option(
+    "-p",
+    "--people-csv",
+    type=click.Path(exists=True, dir_okay=False),
+    required=True,
+    help="Path to CSV with people defined.",
+)
+@click.option(
+    "-s",
+    "--selected-csv",
+    type=click.Path(dir_okay=False, writable=True),
+    required=True,
+    help="Path to CSV file to write selected people to.",
+)
+@click.option(
+    "-r",
+    "--remaining-csv",
+    type=click.Path(dir_okay=False, writable=True),
+    required=True,
+    help="Path to CSV file to write remaining people to.",
+)
+@click.option(
+    "-n",
+    "--number-wanted",
+    type=click.IntRange(min=1),
+    required=True,
+    help="Number of people to select.",
+)
+def csv(
+    settings: str,
+    features_csv: str,
+    people_csv: str,
+    selected_csv: str,
+    remaining_csv: str,
+    number_wanted: int,
+) -> None:
+    """Do sortition with CSV files."""
+    adapter = adapters.CSVAdapter()
+    settings_obj, msg = Settings.load_from_file(settings_file_path=Path(settings))
+    echo_all([msg])
+    features, msgs = adapter.load_features_from_file(Path(features_csv))
+    echo_all(msgs)
+    people, msgs = adapter.load_people_from_file(Path(people_csv), settings_obj, features)
+    echo_all(msgs)
+    success, people_selected, msgs = core.run_stratification(features, people, number_wanted, settings_obj)
+    echo_all(msgs)
+    if not success:
+        raise click.ClickException("Selection not successful, no files written.")
+    selected_rows, remaining_rows, _ = core.selected_remaining_tables(
+        people, people_selected[0], features, settings_obj
+    )
+    with (
+        open(selected_csv, "w", newline="") as selected_f,
+        open(remaining_csv, "w", newline="") as remaining_f,
+    ):
+        adapter.selected_file = selected_f
+        adapter.remaining_file = remaining_f
+        adapter.output_selected_remaining(selected_rows, remaining_rows)
+@cli.command()
+@click.option(
+    "-S",
+    "--settings",
+    envvar="SORTITION_SETTINGS",
+    type=click.Path(exists=True, dir_okay=False),
+    required=True,
+    help="Settings for the sortition run. Will auto-create if not present.",
+)
+@click.option(
+    "--auth-json-file",
+    envvar="SORTITION_GDOC_AUTH",
+    type=click.Path(exists=True, dir_okay=False),
+    required=True,
+    help="Path to file with OAuth2 details to access google account.",
+)
+@click.option("--gen-rem-tab/--no-gen-rem-tab", default=True, help="Generate a 'Remaining' tab.")
+@click.option("-g", "--gsheet-name", required=True, help="Name of GDoc Spreadsheet to use.")
+@click.option(
+    "-f",
+    "--feature-tab-name",
+    default="Categories",
+    required=True,
+    help="Name of tab containing features/categories.",
+)
+@click.option(
+    "-p",
+    "--people-tab-name",
+    default="Categories",
+    required=True,
+    help="Name of tab containing people/respondents.",
+)
+@click.option(
+    "-s",
+    "--selected-tab-name",
+    default="Selected",
+    required=True,
+    help="Name of tab to write selected people to.",
+)
+@click.option(
+    "-r",
+    "--remaining-tab-name",
+    default="Remaining",
+    help="Name of tab to write remaining people to.",
+)
+@click.option(
+    "-n",
+    "--number-wanted",
+    type=click.IntRange(min=1),
+    required=True,
+    help="Number of people to select.",
+)
+def gsheet(
+    settings: str,
+    auth_json_file: str,
+    gen_rem_tab: bool,
+    gsheet_name: str,
+    feature_tab_name: str,
+    people_tab_name: str,
+    selected_tab_name: str,
+    remaining_tab_name: str,
+    number_wanted: int,
+) -> None:
+    """Do sortition with Google Spreadsheets."""
+    gen_rem_tab_value = "on" if gen_rem_tab else "off"
+    adapter = adapters.GSheetAdapter(Path(auth_json_file), gen_rem_tab_value)
+    settings_obj, msg = Settings.load_from_file(settings_file_path=Path(settings))
+    echo_all([msg])
+    features, msgs = adapter.load_features(gsheet_name, feature_tab_name)
+    echo_all(msgs)
+    if features is None:
+        raise click.ClickException("Could not load features, exiting.")
+    people, msgs = adapter.load_people(people_tab_name, settings_obj, features)
+    echo_all(msgs)
+    if people is None:
+        raise click.ClickException("Could not load people, exiting.")
+    success, people_selected, msgs = core.run_stratification(features, people, number_wanted, settings_obj)
+    echo_all(msgs)
+    if not success:
+        raise click.ClickException("Selection not successful, no files written.")
+    selected_rows, remaining_rows, _ = core.selected_remaining_tables(
+        people, people_selected[0], features, settings_obj
+    )
+    adapter.selected_tab_name = selected_tab_name
+    adapter.remaining_tab_name = remaining_tab_name
+    adapter.output_selected_remaining(selected_rows, remaining_rows, settings_obj)
+@cli.command()
+@click.option(
+    "-S",
+    "--settings",
+    envvar="SORTITION_SETTINGS",
+    type=click.Path(exists=True, dir_okay=False),
+    required=True,
+    help="Settings for the sortition run. Will auto-create if not present.",
+)
+@click.option(
+    "-f",
+    "--features-csv",
+    type=click.Path(exists=True, dir_okay=False),
+    required=True,
+    help="Path to CSV with features defined.",
+)
+@click.option(
+    "-p",
+    "--people-csv",
+    type=click.Path(dir_okay=False, writable=True),
+    required=True,
+    help="Path to CSV to write sample people to.",
+)
+@click.option(
+    "-n",
+    "--number-wanted",
+    type=click.IntRange(min=1),
+    required=True,
+    help="Number of people to write.",
+)
+def gen_sample(settings: str, features_csv: str, people_csv: str, number_wanted: int) -> None:
+    """Generate a sample CSV file of people compatible with features and settings."""
+    adapter = adapters.CSVAdapter()
+    settings_obj, msg = Settings.load_from_file(settings_file_path=Path(settings))
+    echo_all([msg])
+    features, msgs = adapter.load_features_from_file(Path(features_csv))
+    echo_all(msgs)
+    with open(people_csv, "w", newline="") as people_f:
+        people_features.create_readable_sample_file(features, people_f, number_wanted, settings_obj)

sortition_algorithms/adapters.py ADDED Viewed

@@ -0,0 +1,308 @@
+"""
+Adapters for loading and saving data.
+Initially we have CSV files locally, and Google Docs Spreadsheets.
+"""
+import csv
+from collections.abc import Iterable
+from io import StringIO
+from pathlib import Path
+from typing import ClassVar, TextIO
+import gspread
+from oauth2client.service_account import ServiceAccountCredentials
+from sortition_algorithms.features import FeatureCollection, read_in_features
+from sortition_algorithms.people import People, read_in_people
+from sortition_algorithms.settings import Settings
+# TODO: should features_loaded and people_loaded be removed?
+# have external thing keep track of it instead?
+def _stringify_records(
+    records: Iterable[dict[str, str | int | float]],
+) -> list[dict[str, str]]:
+    new_records: list[dict[str, str]] = []
+    for record in records:
+        new_records.append({k: str(v) for k, v in record.items()})
+    return new_records
+class CSVAdapter:
+    def __init__(self) -> None:
+        self.selected_file: TextIO = StringIO()
+        self.remaining_file: TextIO = StringIO()
+        self.features_loaded = False
+        self.people_loaded = False
+        self.enable_selected_file_download = False
+        self.enable_remaining_file_download = False
+    def load_features_from_file(
+        self,
+        features_file: Path,
+    ) -> tuple[FeatureCollection, list[str]]:
+        with open(features_file, newline="") as csv_file:
+            return self._load_features(csv_file)
+    def load_features_from_str(self, file_contents: str) -> tuple[FeatureCollection, list[str]]:
+        return self._load_features(StringIO(file_contents))
+    def _load_features(self, file_obj: TextIO) -> tuple[FeatureCollection, list[str]]:
+        feature_reader = csv.DictReader(file_obj)
+        assert feature_reader.fieldnames is not None
+        features, msgs = read_in_features(list(feature_reader.fieldnames), feature_reader)
+        self.features_loaded = True
+        return features, msgs
+    def load_people_from_file(
+        self,
+        people_file: Path,
+        settings: Settings,
+        features: FeatureCollection,
+    ) -> tuple[People, list[str]]:
+        with open(people_file, newline="") as csv_file:
+            return self._load_people(csv_file, settings, features)
+    def load_people_from_str(
+        self,
+        file_contents: str,
+        settings: Settings,
+        features: FeatureCollection,
+    ) -> tuple[People, list[str]]:
+        return self._load_people(StringIO(file_contents), settings, features)
+    def _load_people(
+        self,
+        file_obj: TextIO,
+        settings: Settings,
+        features: FeatureCollection,
+    ) -> tuple[People, list[str]]:
+        people_data = csv.DictReader(file_obj)
+        people_str_data = _stringify_records(people_data)
+        assert people_data.fieldnames is not None
+        people, msgs = read_in_people(list(people_data.fieldnames), people_str_data, features, settings)
+        self.people_loaded = True
+        return people, msgs
+    def _write_rows(self, out_file: TextIO, rows: list[list[str]]) -> None:
+        writer = csv.writer(
+            out_file,
+            delimiter=",",
+            quotechar='"',
+            quoting=csv.QUOTE_MINIMAL,
+        )
+        for row in rows:
+            writer.writerow(row)
+    # Actually useful to also write to a file all those who are NOT selected for later selection if people pull out etc
+    # BUT, we should not include in this people from the same address as someone who has been selected!
+    def output_selected_remaining(
+        self,
+        people_selected_rows: list[list[str]],
+        people_remaining_rows: list[list[str]],
+    ) -> None:
+        self._write_rows(self.selected_file, people_selected_rows)
+        self._write_rows(self.remaining_file, people_remaining_rows)
+        # we have succeeded in CSV so can activate buttons in GUI...
+        self.enable_selected_file_download = True
+        self.enable_remaining_file_download = True
+    def output_multi_selections(
+        self,
+        multi_selections: list[list[str]],
+    ) -> None:
+        self._write_rows(self.selected_file, multi_selections)
+        # we have succeeded in CSV so can activate buttons in GUI...
+        self.enable_selected_file_download = True
+class GSheetAdapter:
+    scope: ClassVar = [
+        "https://spreadsheets.google.com/feeds",
+        "https://www.googleapis.com/auth/drive",
+    ]
+    hl_light_blue: ClassVar = {
+        "backgroundColor": {
+            "red": 153 / 255,
+            "green": 204 / 255,
+            "blue": 255 / 255,
+        }
+    }
+    hl_orange: ClassVar = {"backgroundColor": {"red": 5, "green": 2.5, "blue": 0}}
+    def __init__(self, auth_json_path: Path, gen_rem_tab: str = "on") -> None:
+        self.auth_json_path = auth_json_path
+        self._client: gspread.client.Client | None = None
+        self._spreadsheet: gspread.Spreadsheet | None = None
+        self.original_selected_tab_name = "Original Selected - output - "
+        self.selected_tab_name = "Selected"
+        self.columns_selected_first = "C"
+        self.column_selected_blank_num = 6
+        self.remaining_tab_name = "Remaining - output - "
+        self.new_tab_default_size_rows = 2
+        self.new_tab_default_size_cols = 40
+        self.g_sheet_name = ""
+        self._messages: list[str] = []
+        self.features_loaded = False
+        self.people_loaded = False
+        self.gen_rem_tab = gen_rem_tab  # Added for checkbox.
+    def messages(self) -> list[str]:
+        """Return accumulated messages and reset"""
+        messages = self._messages
+        self._messages = []
+        return messages
+    @property
+    def client(self) -> gspread.client.Client:
+        if self._client is None:
+            creds = ServiceAccountCredentials.from_json_keyfile_name(
+                str(self.auth_json_path),
+                self.scope,
+            )
+            self._client = gspread.authorize(creds)
+        return self._client
+    @property
+    def spreadsheet(self) -> gspread.Spreadsheet:
+        if self._spreadsheet is None:
+            self._spreadsheet = self.client.open(self.g_sheet_name)
+            self._messages.append(f"Opened Google Sheet: '{self.g_sheet_name}'. ")
+        return self._spreadsheet
+    def _tab_exists(self, tab_name: str) -> bool:
+        if self.spreadsheet is None:
+            return False
+        tab_list = self.spreadsheet.worksheets()
+        return any(tab.title == tab_name for tab in tab_list)
+    def _clear_or_create_tab(self, tab_name: str, other_tab_name: str, inc: int) -> gspread.Worksheet:
+        # this now does not clear data but increments the sheet number...
+        num = 0
+        tab_ready: gspread.Worksheet | None = None
+        tab_name_new = f"{tab_name}{num}"
+        other_tab_name_new = f"{other_tab_name}{num}"
+        while tab_ready is None:
+            if self._tab_exists(tab_name_new) or self._tab_exists(other_tab_name_new):
+                num += 1
+                tab_name_new = f"{tab_name}{num}"
+                other_tab_name_new = f"{other_tab_name}{num}"
+            else:
+                if inc == -1:
+                    tab_name_new = f"{tab_name}{num - 1}"
+                tab_ready = self.spreadsheet.add_worksheet(
+                    title=tab_name_new,
+                    rows=self.new_tab_default_size_rows,
+                    cols=self.new_tab_default_size_cols,
+                )
+        return tab_ready
+    def load_features(self, g_sheet_name: str, feature_tab_name: str) -> tuple[FeatureCollection | None, list[str]]:
+        self.g_sheet_name = g_sheet_name
+        features: FeatureCollection | None = None
+        try:
+            if not self._tab_exists(feature_tab_name):
+                self._messages.append(f"Error in Google sheet: no tab called '{feature_tab_name}' found. ")
+                return None, self.messages()
+        except gspread.SpreadsheetNotFound:
+            self._messages.append(f"Google spreadsheet not found: {self.g_sheet_name}. ")
+            return None, self.messages()
+        tab_features = self.spreadsheet.worksheet(feature_tab_name)
+        feature_head = tab_features.row_values(1)
+        feature_body = _stringify_records(tab_features.get_all_records(expected_headers=[]))
+        features, msgs = read_in_features(feature_head, feature_body)
+        self.features_loaded = True
+        self._messages += msgs
+        return features, self.messages()
+    def load_people(
+        self,
+        respondents_tab_name: str,
+        settings: Settings,
+        features: FeatureCollection,
+    ) -> tuple[People | None, list[str]]:
+        self._messages = []
+        people: People | None = None
+        try:
+            if not self._tab_exists(respondents_tab_name):
+                self._messages.append(
+                    f"Error in Google sheet: no tab called '{respondents_tab_name}' found. ",
+                )
+                return None, self.messages()
+        except gspread.SpreadsheetNotFound:
+            self._messages.append(f"Google spreadsheet not found: {self.g_sheet_name}. ")
+            return None, self.messages()
+        tab_people = self.spreadsheet.worksheet(respondents_tab_name)
+        # if we don't read this in here we can't check if there are 2 columns with the same name
+        people_head = tab_people.row_values(1)
+        # the numericise_ignore doesn't convert the phone numbers to ints...
+        # 1 Oct 2024: the final argument with expected_headers is to deal with the fact that
+        # updated versions of gspread can't cope with duplicate headers
+        people_body = _stringify_records(
+            tab_people.get_all_records(
+                numericise_ignore=["all"],
+                expected_headers=[],
+            )
+        )
+        self._messages.append(f"Reading in '{respondents_tab_name}' tab in above Google sheet.")
+        people, msgs = read_in_people(people_head, people_body, features, settings)
+        self._messages += msgs
+        self.people_loaded = True
+        return people, self.messages()
+    def output_selected_remaining(
+        self,
+        people_selected_rows: list[list[str]],
+        people_remaining_rows: list[list[str]],
+        settings: Settings,
+    ) -> list[int]:
+        tab_original_selected = self._clear_or_create_tab(
+            self.original_selected_tab_name,
+            self.remaining_tab_name,
+            0,
+        )
+        tab_original_selected.update(people_selected_rows)
+        tab_original_selected.format("A1:U1", self.hl_light_blue)
+        dupes: list[int] = []
+        if self.gen_rem_tab == "on":
+            tab_remaining = self._clear_or_create_tab(
+                self.remaining_tab_name,
+                self.original_selected_tab_name,
+                -1,
+            )
+            tab_remaining.update(people_remaining_rows)
+            tab_remaining.format("A1:U1", self.hl_light_blue)
+            # highlight any people in remaining tab at the same address
+            # TODO: do we ever actually hit this code? We should have deleted
+            # all the people who might have been duplicates in selected_remaining_tables()
+            if settings.check_same_address:
+                address_cols: list[int] = [tab_remaining.find(csa).col for csa in settings.check_same_address_columns]  # type: ignore[union-attr]
+                dupes_set: set[int] = set()
+                n = len(people_remaining_rows)
+                for i in range(n):
+                    rowrem1 = people_remaining_rows[i]
+                    for j in range(i + 1, n):
+                        rowrem2 = people_remaining_rows[j]
+                        if rowrem1 != rowrem2 and all(rowrem1[col] == rowrem2[col] for col in address_cols):
+                            dupes_set.add(i + 1)
+                            dupes_set.add(j + 1)
+                dupes = sorted(dupes_set)
+                for i in range(min(30, len(dupes))):
+                    tab_remaining.format(str(dupes[i]), self.hl_orange)
+        return dupes
+    def output_multi_selections(
+        self,
+        multi_selections: list[list[str]],
+    ) -> None:
+        assert self.gen_rem_tab == "off"
+        tab_original_selected = self._clear_or_create_tab(
+            self.original_selected_tab_name,
+            "ignoreme",
+            0,
+        )
+        tab_original_selected.update(multi_selections)
+        tab_original_selected.format("A1:U1", self.hl_light_blue)