PyPI - tunned-geobr - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tunned-geobr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

tunned_geobr/__init__.py +38 -0
tunned_geobr/constants.py +13 -0
tunned_geobr/data/grid_state_correspondence_table.csv +140 -0
tunned_geobr/list_geobr.py +39 -0
tunned_geobr/lookup_muni.py +111 -0
tunned_geobr/read_amazon.py +42 -0
tunned_geobr/read_amazon_ibas.py +92 -0
tunned_geobr/read_atlantic_forest_ibas.py +93 -0
tunned_geobr/read_biomes.py +43 -0
tunned_geobr/read_census_tract.py +97 -0
tunned_geobr/read_climate_aggressiveness.py +74 -0
tunned_geobr/read_comparable_areas.py +75 -0
tunned_geobr/read_conservation_units.py +43 -0
tunned_geobr/read_country.py +43 -0
tunned_geobr/read_disaster_risk_area.py +47 -0
tunned_geobr/read_geology.py +77 -0
tunned_geobr/read_geomorphology.py +77 -0
tunned_geobr/read_health_facilities.py +49 -0
tunned_geobr/read_health_region.py +52 -0
tunned_geobr/read_immediate_region.py +81 -0
tunned_geobr/read_indigenous_land.py +44 -0
tunned_geobr/read_intermediate_region.py +61 -0
tunned_geobr/read_meso_region.py +78 -0
tunned_geobr/read_metro_area.py +44 -0
tunned_geobr/read_micro_region.py +78 -0
tunned_geobr/read_mining_processes.py +76 -0
tunned_geobr/read_municipal_seat.py +41 -0
tunned_geobr/read_municipality.py +83 -0
tunned_geobr/read_neighborhood.py +39 -0
tunned_geobr/read_pedology.py +77 -0
tunned_geobr/read_pop_arrangements.py +45 -0
tunned_geobr/read_region.py +41 -0
tunned_geobr/read_schools.py +44 -0
tunned_geobr/read_semiarid.py +42 -0
tunned_geobr/read_settlements.py +85 -0
tunned_geobr/read_state.py +88 -0
tunned_geobr/read_statistical_grid.py +127 -0
tunned_geobr/read_urban_area.py +44 -0
tunned_geobr/read_urban_concentrations.py +46 -0
tunned_geobr/read_weighting_area.py +74 -0
tunned_geobr/utils.py +326 -0
tunned_geobr-0.1.0.dist-info/METADATA +103 -0
tunned_geobr-0.1.0.dist-info/RECORD +46 -0
tunned_geobr-0.1.0.dist-info/WHEEL +4 -0
tunned_geobr-0.1.0.dist-info/entry_points.txt +4 -0
tunned_geobr-0.1.0.dist-info/licenses/LICENSE.txt +2 -0

tunned_geobr/read_pedology.py ADDED Viewed

@@ -0,0 +1,77 @@
+import geopandas as gpd
+import tempfile
+import os
+import requests
+from zipfile import ZipFile
+from io import BytesIO
+def read_pedology(simplified=False):
+    """Download official pedology (soil) data from IBGE.
+    This function downloads and processes pedological data from IBGE (Brazilian Institute of Geography and Statistics).
+    The data includes soil units and classifications at 1:250,000 scale.
+    Original source: IBGE
+    Parameters
+    ----------
+    simplified : boolean, by default True
+        If True, returns a simplified version of the dataset with fewer columns
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Geodataframe with pedological data
+    Example
+    -------
+    >>> from cursed_geobr import read_pedology
+    # Read pedology data
+    >>> pedology = read_pedology()
+    """
+    url = "https://geoftp.ibge.gov.br/informacoes_ambientais/pedologia/vetores/escala_250_mil/versao_2023/pedo_area.zip"
+    try:
+        # Download the zip file
+        response = requests.get(url)
+        if response.status_code != 200:
+            raise Exception("Failed to download data from IBGE")
+        # Create a temporary directory
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Extract zip content
+            with ZipFile(BytesIO(response.content)) as zip_ref:
+                zip_ref.extractall(temp_dir)
+            # Find the shapefile
+            shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
+            if not shp_files:
+                raise Exception("No shapefile found in the downloaded data")
+            # Read the shapefile
+            gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
+            if simplified:
+                # Keep only the most relevant columns
+                # Note: These columns are based on typical soil data structure
+                # You may want to adjust these based on the actual data
+                columns_to_keep = [
+                    'geometry',
+                    'CLASSE1',    # Main soil class
+                    'CLASSE2',    # Secondary soil class
+                    'TEXTURA',    # Soil texture
+                    'RELEVO',     # Relief
+                    'FASE',       # Phase
+                    'ORDEM',      # Order
+                    'SUBORDEM',   # Suborder
+                ]
+                # Filter columns that actually exist in the dataset
+                existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
+                gdf = gdf[existing_columns]
+    except Exception as e:
+        raise Exception(f"Error downloading pedology data: {str(e)}")
+    return gdf

tunned_geobr/read_pop_arrangements.py ADDED Viewed

@@ -0,0 +1,45 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_pop_arrangements(year=2015, simplified=True, verbose=False):
+    r""" Download population arrangements in Brazil
+    This function reads the official data on population arrangements (Arranjos
+    Populacionais) of Brazil. Original data were generated by the Institute of
+    Geography and Statistics (IBGE)  For more information about the methodology,
+    see details at \url{https://www.ibge.gov.br/apps/arranjos_populacionais/2015/pdf/publicacao.pdf}
+    Parameters
+    ----------
+    year : int, optional
+        Year of the data, by default 2015
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_pop_arrangements
+    # Read specific state at a given year
+    >>> df = read_pop_arrangements(year=2015)
+    """
+    metadata = select_metadata('pop_arrengements', year=year, simplified=simplified)
+    gdf = download_gpkg(metadata)
+    return gdf

tunned_geobr/read_region.py ADDED Viewed

@@ -0,0 +1,41 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_region(year=2010, simplified=True, verbose=False):
+    """ Download shape file of Brazil Regions as sf objects.
+     Data at scale 1:250,000, using Geodetic reference system "SIRGAS2000" and CRS(4674)
+    Parameters
+    ----------
+    year : int, optional
+        Year of the data, by default 2010
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_region
+    # Read specific state at a given year
+    >>> df = read_region(year=2010)
+    """
+    metadata = select_metadata("regions", year=year, simplified=simplified)
+    gdf = download_gpkg(metadata)
+    return gdf

tunned_geobr/read_schools.py ADDED Viewed

@@ -0,0 +1,44 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_schools(year=2020, verbose=False):
+    r"""Download geolocated data of schools
+    Data comes from the School Census collected by INEP, the National Institute
+    for Educational Studies and Research Anisio Teixeira. The date of the last
+    data update is registered in the database in the column 'date_update'. These
+    data uses Geodetic reference system "SIRGAS2000" and CRS(4674). The coordinates
+    of each school if collected by INEP. Periodically the coordinates are revised
+    with the objective of improving the quality of the data. More information
+    available at \url{https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/inep-data/catalogo-de-escolas/}
+    Parameters
+    ----------
+    year : int, optional
+        Year of the data, by default 2020
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_schools
+    # Read specific state at a given year
+    >>> df = read_schools(year=2020)
+    """
+    metadata = select_metadata("schools", year=year, simplified=False)
+    gdf = download_gpkg(metadata)
+    return gdf

tunned_geobr/read_semiarid.py ADDED Viewed

@@ -0,0 +1,42 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_semiarid(year=2017, simplified=True, verbose=False):
+    """ Download official data of Brazilian Semiarid as an sf object.
+     This data set covers the whole of Brazilian Semiarid as defined in the resolution in  23/11/2017). The original
+ data comes from the Brazilian Institute of Geography and Statistics (IBGE) and can be found at https://www.ibge.gov.br/geociencias/cartas-e-mapas/mapas-regionais/15974-semiarido-brasileiro.html?=&t=downloads
+    Parameters
+    ----------
+    year : int, optional
+        Year of the data, by default 2017
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_semiarid
+    # Read specific state at a given year
+    >>> df = read_semiarid(year=2017)
+    """
+    metadata = select_metadata("semiarid", year=year, simplified=simplified)
+    gdf = download_gpkg(metadata)
+    return gdf

tunned_geobr/read_settlements.py ADDED Viewed

@@ -0,0 +1,85 @@
+import geopandas as gpd
+import tempfile
+import os
+import requests
+from zipfile import ZipFile
+from io import BytesIO
+def read_settlements(simplified=False):
+    """Download official settlements data from INCRA.
+    This function downloads and processes data about settlements (assentamentos)
+    from INCRA (Instituto Nacional de Colonização e Reforma Agrária).
+    Original source: INCRA - Certificação de Imóveis Rurais
+    Parameters
+    ----------
+    simplified : boolean, by default False
+        If True, returns a simplified version of the dataset with fewer columns
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Geodataframe with settlements data
+    Example
+    -------
+    >>> from cursed_geobr import read_settlements
+    # Read settlements data
+    >>> settlements = read_settlements()
+    """
+    url = "https://certificacao.incra.gov.br/csv_shp/zip/Assentamento%20Brasil.zip"
+    try:
+        # Download the zip file
+        # Disable SSL verification due to INCRA's certificate issues
+        response = requests.get(url, verify=False)
+        if response.status_code != 200:
+            raise Exception("Failed to download data from INCRA")
+        # Suppress the InsecureRequestWarning
+        import urllib3
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        # Create a temporary directory
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Extract the zip file
+            with ZipFile(BytesIO(response.content)) as zip_ref:
+                zip_ref.extractall(temp_dir)
+            # Find the shapefile
+            shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
+            if not shp_files:
+                raise Exception("No shapefile found in the downloaded data")
+            # Read the shapefile
+            gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
+            if simplified:
+                # Keep only the most relevant columns
+                columns_to_keep = [
+                    'geometry',
+                    'NOME_PROJE',  # Nome do Projeto de Assentamento
+                    'MUNICIPIO',   # Município
+                    'UF',          # Estado
+                    'AREA_HA',     # Área em hectares
+                    'NUM_FAMILI',  # Número de famílias
+                    'CAPACIDADE',  # Capacidade de famílias
+                    'DT_CRIACAO',  # Data de criação
+                    'SITUACAO'     # Situação do assentamento
+                ]
+                # Filter columns that actually exist in the dataset
+                existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
+                gdf = gdf[existing_columns]
+    except Exception as e:
+        raise Exception(f"Error downloading settlements data: {str(e)}")
+    return gdf
+if __name__ == '__main__':
+    settlements = read_settlements()
+    print(settlements)

tunned_geobr/read_state.py ADDED Viewed

@@ -0,0 +1,88 @@
+import geopandas as gpd
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_state(code_state="all", year=2010, simplified=True, verbose=False):
+    """Download shapefiles of Brazilian states as geopandas objects.
+     Data at scale 1:250,000, using Geodetic reference system "SIRGAS2000" and CRS(4674)
+    Parameters
+    ----------
+    code_state : str, optional
+        The two-digit code of a state or a two-letter uppercase abbreviation
+        (e.g. 33 or "RJ"). If code_state="all", all states will be loaded (Default).
+    year : int, optional
+        Year of the data, by default 2010
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_state
+    # Read specific state at a given year
+    >>> uf = read_state(code_state=12, year=2017)
+     # Read specific state at a given year with normal geopackages
+    >>> uf = read_state(code_state="SC", year=2000, tp='normal')
+     # Read all states at a given year
+    >>> ufs = read_state(code_state="all", year=2010)
+    """
+    metadata = select_metadata("state", year=year, simplified=simplified)
+    if code_state is None:
+        raise Exception("Value to argument 'code_state' cannot be None")
+    # From 1872 to 1991 and all
+    if (year < 1992) or (code_state == "all"):
+        if verbose:
+            print("Loading data for the whole country\n")
+        return download_gpkg(metadata)
+    # From 2000 onwards
+    else:
+        if (
+            str(code_state)[0:2] not in metadata["code"].unique()
+            and str(code_state)[0:2] not in metadata["code_abbrev"].unique()
+        ):
+            raise Exception("Error: Invalid Value to argument code_state.")
+        else:
+            if isinstance(code_state, int):
+                metadata = metadata.query(f'code == "{str(code_state)[0:2]}"')
+            if isinstance(code_state, str):
+                metadata = metadata.query(f'code_abbrev == "{code_state[0:2]}"')
+            gdf = download_gpkg(metadata)
+            if len(str(code_state)) == 2:
+                return gdf
+            elif code_state in list(gdf["code_state"]):
+                return gdf.query('code_state == "code_state"')
+            else:
+                raise Exception("Error: Invalid Value to argument code_state.")

tunned_geobr/read_statistical_grid.py ADDED Viewed

@@ -0,0 +1,127 @@
+import sys
+from geobr import __path__ as geobr_directory
+from cursed_geobr.utils import select_metadata, download_gpkg
+from numpy import unique
+from pandas import read_csv
+def read_statistical_grid(code_grid="all", year=2010, simplified=False, verbose=False):
+    r"""Download spatial data of IBGE's statistical grid
+        @description
+    Data at scale 1:250,000, using Geodetic reference system "SIRGAS2000" and CRS(4674)
+       Parameters
+       ----------
+       code_grid:
+           If two-letter abbreviation or two-digit code of a state is
+           passed, the function will load all grid quadrants that
+           intersect with that state. If `code_grid="all"`, the grid of
+           the whole country will be loaded. Users may also pass a
+           grid quadrant id to load an specific quadrant. Quadrant ids
+           can be consulted at `grid_state_correspondence_table.csv`.
+       year : int, optional
+           Year of the data, by default 2010
+       simplified: boolean, by default False
+           Data 'type', indicating whether the function returns the 'original' dataset
+           with high resolution or a dataset with 'simplified' borders (Default)
+       verbose : bool, optional
+           by default False
+       Returns
+       -------
+       gpd.GeoDataFrame
+           Metadata and geopackage of selected states
+       Raises
+       ------
+       Exception
+           If parameters are not found or not well defined
+       Example
+       -------
+       >>> from cursed_geobr import read_statistical_grid
+       # Read specific state at a given year
+       >>> df = read_statistical_grid(year=2010)
+    """
+    temp_meta = select_metadata(
+        geo="statistical_grid", year=year, simplified=simplified
+    )
+    if temp_meta is None:
+        return None
+    if(len(geobr_directory) == 0):
+        sys.exit("Geobr installation directory not found.")
+    grid_state_correspondence_table = None
+    grid_file_path = geobr_directory[0] + "/data/grid_state_correspondence_table.csv"
+    with open(grid_file_path, "rb") as file:
+        dtypes = {"name_state": str, "abbrev_state": str, "code_grid": str}
+        grid_state_correspondence_table = read_csv(
+            file, encoding="latin-1", dtype=dtypes
+        )
+    # Test if code_grid input is null
+    if code_grid == None:
+        sys.exit("Value to argument 'code_grid' cannot be NULL")
+    # if code_grid=="all", read the entire country
+    if code_grid == "all":
+        if verbose:
+            print("Loading data for the whole country. This might take a few minutes.")
+        file_url = temp_meta["download_path"]
+        temp_gpd = download_gpkg(file_url)
+        return temp_gpd
+    # Select abbrev_state column
+    grid_abbrev_state = grid_state_correspondence_table["abbrev_state"]
+    # Error if the input does not match any state abbreviation
+    if isinstance(code_grid, str) and not (code_grid in grid_abbrev_state.to_list()):
+        sys.exit(
+            "Error: Invalid Value to argument 'code_grid'. It must be one of the following: "
+            + str(unique(grid_abbrev_state.to_numpy().tolist()))
+        )
+    # Valid state abbreviation
+    elif isinstance(code_grid, str) and code_grid in grid_abbrev_state.to_list():
+        # Find grid quadrants that intersect with the passed state abbreviation
+        grid_state_correspondence_table_tmp = grid_state_correspondence_table[
+            grid_state_correspondence_table["abbrev_state"] == code_grid
+        ]
+        # Strips 'ID_' from code_grid string and gets only the int code value
+        grid_ids = [
+            substr[substr.index("_") + 1 :]
+            for substr in grid_state_correspondence_table_tmp["code_grid"].to_list()
+        ]
+        file_url = temp_meta[temp_meta["code"].isin(grid_ids)]
+        temp_gpd = download_gpkg(file_url)
+        return temp_gpd
+    # If code_grid is int
+    if isinstance(code_grid, int):
+        # Converts to str to match the following queries
+        code_grid = str(code_grid)
+        # Single digit case: adds a leading 0 (ex: 4 -> 04)
+        if len(code_grid) == 1:
+            code_grid = "0" + code_grid
+    if not (code_grid in temp_meta["code"].to_list()):
+        sys.exit("Error: Invalid Value to argument code_grid.")
+    else:
+        # Filters by code then download a list of gpkg filtered paths
+        file_url = temp_meta[temp_meta["code"].isin([code_grid])]
+        temp_gpd = download_gpkg(file_url)
+        return temp_gpd

tunned_geobr/read_urban_area.py ADDED Viewed

@@ -0,0 +1,44 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_urban_area(year=2015, simplified=True, verbose=False):
+    """ Download official data of urbanized areas in Brazil as an sf object.
+     This function reads the official data on the urban footprint of Brazilian cities
+ in the years 2005 and 2015. Orignal data were generated by Institute of Geography
+ and Statistics (IBGE)  For more information about the methodology, see deails at
+ https://biblioteca.ibge.gov.br/visualizacao/livros/liv100639.pdf
+    Parameters
+    ----------
+    year : int, optional
+        Year of the data, by default 2015
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_urban_area
+    # Read specific state at a given year
+    >>> df = read_urban_area(year=2015)
+    """
+    metadata = select_metadata("urban_area", year=year, simplified=simplified)
+    gdf = download_gpkg(metadata)
+    return gdf

tunned_geobr/read_urban_concentrations.py ADDED Viewed

@@ -0,0 +1,46 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_urban_concentrations(year=2015, simplified=True, verbose=False):
+    r""" Download urban concentration areas in Brazil
+     @description
+ This function reads the official data on the urban concentration areas (Areas
+ de Concentracao de Populacao) of Brazil. Original data were generated by the
+ Institute of Geography and Statistics (IBGE)  For more information about the
+ methodology, see details at \url{https://www.ibge.gov.br/apps/arranjos_populacionais/2015/pdf/publicacao.pdf}
+    Parameters
+    ----------
+    year : int, optional
+        Year of the data, by default 2015
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_urban_concentrations
+    # Read specific state at a given year
+    >>> df = read_urban_concentrations(year=2015)
+    """
+    metadata = select_metadata('urban_concentrations', year=year, simplified=simplified)
+    gdf = download_gpkg(metadata)
+    return gdf