PyPI - tunned-geobr - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tunned-geobr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

tunned_geobr/__init__.py +38 -0
tunned_geobr/constants.py +13 -0
tunned_geobr/data/grid_state_correspondence_table.csv +140 -0
tunned_geobr/list_geobr.py +39 -0
tunned_geobr/lookup_muni.py +111 -0
tunned_geobr/read_amazon.py +42 -0
tunned_geobr/read_amazon_ibas.py +92 -0
tunned_geobr/read_atlantic_forest_ibas.py +93 -0
tunned_geobr/read_biomes.py +43 -0
tunned_geobr/read_census_tract.py +97 -0
tunned_geobr/read_climate_aggressiveness.py +74 -0
tunned_geobr/read_comparable_areas.py +75 -0
tunned_geobr/read_conservation_units.py +43 -0
tunned_geobr/read_country.py +43 -0
tunned_geobr/read_disaster_risk_area.py +47 -0
tunned_geobr/read_geology.py +77 -0
tunned_geobr/read_geomorphology.py +77 -0
tunned_geobr/read_health_facilities.py +49 -0
tunned_geobr/read_health_region.py +52 -0
tunned_geobr/read_immediate_region.py +81 -0
tunned_geobr/read_indigenous_land.py +44 -0
tunned_geobr/read_intermediate_region.py +61 -0
tunned_geobr/read_meso_region.py +78 -0
tunned_geobr/read_metro_area.py +44 -0
tunned_geobr/read_micro_region.py +78 -0
tunned_geobr/read_mining_processes.py +76 -0
tunned_geobr/read_municipal_seat.py +41 -0
tunned_geobr/read_municipality.py +83 -0
tunned_geobr/read_neighborhood.py +39 -0
tunned_geobr/read_pedology.py +77 -0
tunned_geobr/read_pop_arrangements.py +45 -0
tunned_geobr/read_region.py +41 -0
tunned_geobr/read_schools.py +44 -0
tunned_geobr/read_semiarid.py +42 -0
tunned_geobr/read_settlements.py +85 -0
tunned_geobr/read_state.py +88 -0
tunned_geobr/read_statistical_grid.py +127 -0
tunned_geobr/read_urban_area.py +44 -0
tunned_geobr/read_urban_concentrations.py +46 -0
tunned_geobr/read_weighting_area.py +74 -0
tunned_geobr/utils.py +326 -0
tunned_geobr-0.1.0.dist-info/METADATA +103 -0
tunned_geobr-0.1.0.dist-info/RECORD +46 -0
tunned_geobr-0.1.0.dist-info/WHEEL +4 -0
tunned_geobr-0.1.0.dist-info/entry_points.txt +4 -0
tunned_geobr-0.1.0.dist-info/licenses/LICENSE.txt +2 -0

tunned_geobr/read_census_tract.py ADDED Viewed

@@ -0,0 +1,97 @@
+from cursed_geobr.utils import select_metadata, download_gpkg, test_options
+def read_census_tract(
+    code_tract, year=2010, zone="urban", simplified=True, verbose=False
+):
+    """Download shape files of census tracts of the Brazilian Population Census (Only years 2000 and 2010 are currently available).
+    Parameters
+    ----------
+    code_tract: int
+        The 7-digit code of a Municipality. If the two-digit code or a two-letter uppercase abbreviation of
+        a state is passed, (e.g. 33 or "RJ") the function will load all census tracts of that state. If code_tract="all",
+        all census tracts of the country are loaded.
+    year : int, optional
+        Year of the data, by default 2010
+    zone: string, optional
+        "urban" or "rural" census tracts come in separate files in the year 2000, by default urban
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_census_tract
+    # Read rural census tracts for years before 2007
+    >>> df = read_census_tract(code_tract=5201108, year=2000, zone='rural')
+    # Read all census tracts of a state at a given year
+    >>> df = read_census_tract(code_tract=53, year=2010) # or
+    >>> df = read_census_tract(code_tract="DF", year=2010)
+    # Read all census tracts of a municipality at a given year
+    >>> df = read_census_tract(code_tract=5201108, year=2010)
+    # Read all census tracts of the country at a given year
+    >>> df = read_census_tract(code_tract="all", year=2010)
+    """
+    test_options(zone, "zone", allowed=["urban", "rural"])
+    test_options(code_tract, "code_tract", not_allowed=[None])
+    metadata = select_metadata("census_tract", year=year, simplified=simplified)
+    # For year <= 2007, the code, eg. U11, comes with a trailing letter U for urban and
+    # R for rural. So, this code checks if the trailing code letter is the same as
+    # the argument zone.
+    if year <= 2007:
+        metadata = metadata[
+            metadata["code"].apply(lambda x: x[0].lower() == zone[0].lower())
+        ]
+        #    [R]12      ==     [r]ural
+    if code_tract == "all":
+        if verbose:
+            print("Loading data for the whole country. This might take a few minutes.")
+        return download_gpkg(metadata)
+    else:
+        metadata = metadata[
+            metadata[["code", "code_abbrev"]].apply(
+                lambda x: str(code_tract)[:2] in str(x["code"])
+                or str(code_tract)[:2]  # if number e.g. 12
+                in str(x["code_abbrev"]),  # if UF e.g. RO
+                1,
+            )
+        ]
+    gdf = download_gpkg(metadata)
+    if len(str(code_tract)) == 2:
+        return gdf
+    elif code_tract in gdf["code_muni"].tolist():
+        return gdf.query(f"code_muni == {code_tract}")
+    else:
+        raise Exception("Invalid Value to argument code_tract.")

tunned_geobr/read_climate_aggressiveness.py ADDED Viewed

@@ -0,0 +1,74 @@
+import geopandas as gpd
+import tempfile
+import os
+import requests
+from zipfile import ZipFile
+from io import BytesIO
+def read_climate_aggressiveness(simplified=False):
+    """Download climate aggressiveness potential data from IBGE.
+    This function downloads and processes climate aggressiveness potential data from IBGE
+    (Brazilian Institute of Geography and Statistics). The data represents areas with
+    different levels of climate aggressiveness based on rainfall patterns and other factors.
+    Original source: IBGE
+    Parameters
+    ----------
+    simplified : boolean, by default False
+        If True, returns a simplified version of the dataset with fewer columns
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Geodataframe with climate aggressiveness potential data
+    Example
+    -------
+    >>> from cursed_geobr import read_climate_aggressiveness
+    # Read climate aggressiveness data
+    >>> climate = read_climate_aggressiveness()
+    """
+    url = "https://geoftp.ibge.gov.br/informacoes_ambientais/climatologia/vetores/regionais/shapes_potencial_agressividade_climatica.zip"
+    try:
+        # Download the zip file
+        response = requests.get(url)
+        if response.status_code != 200:
+            raise Exception("Failed to download data from IBGE")
+        # Create a temporary directory
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Extract zip content
+            with ZipFile(BytesIO(response.content)) as zip_ref:
+                zip_ref.extractall(temp_dir)
+            # Find the shapefile
+            shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
+            if not shp_files:
+                raise Exception("No shapefile found in the downloaded data")
+            # Read the shapefile
+            gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
+            if simplified:
+                # Keep only the most relevant columns
+                # Note: These columns are based on typical climate data structure
+                # You may want to adjust these based on the actual data
+                columns_to_keep = [
+                    'geometry',
+                    'POTENCIAL',   # Aggressiveness potential
+                    'CLASSE',      # Class
+                    'DESCRICAO',   # Description
+                ]
+                # Filter columns that actually exist in the dataset
+                existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
+                gdf = gdf[existing_columns]
+    except Exception as e:
+        raise Exception(f"Error downloading climate aggressiveness data: {str(e)}")
+    return gdf

tunned_geobr/read_comparable_areas.py ADDED Viewed

@@ -0,0 +1,75 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_comparable_areas(
+    start_year=1970, end_year=2010, simplified=True, verbose=False
+):
+    r"""Download spatial data of historically comparable municipalities
+    This function downloads the shape file of minimum comparable area of
+    municipalities, known in Portuguese as 'Areas minimas comparaveis (AMCs)'.
+    The data is available for any combination of census years between 1872-2010.
+    These data sets are generated based on the Stata code originally developed by
+    \doi{10.1590/0101-416147182phe}{Philipp Ehrl}, and translated
+    into `R` by the `geobr` team.
+    Years available:
+        1872,1900,1911,1920,1933,1940,1950,1960,1970,1980,1991,2000,2010
+    Parameters
+    ----------
+    year : int, optional
+        Year of the data, by default
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_comparable_areas
+    # Read specific state at a given year
+    >>> df = read_comparable_areas(year=)
+    """
+    years_available = [
+        1872,
+        1900,
+        1911,
+        1920,
+        1933,
+        1940,
+        1950,
+        1960,
+        1970,
+        1980,
+        1991,
+        2000,
+        2010,
+    ]
+    if (start_year not in years_available) or (end_year not in years_available):
+        raise ValueError(
+            "Invalid `start_year` or `end_year`."
+            f"It must be one of the following: {years_available}"
+        )
+    metadata = select_metadata("amc", year=start_year, simplified=simplified)
+    metadata = metadata.query(f'download_path.str.contains("{start_year}_{end_year}")')
+    gdf = download_gpkg(metadata)
+    return gdf

tunned_geobr/read_conservation_units.py ADDED Viewed

@@ -0,0 +1,43 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_conservation_units(date=201909, simplified=True, verbose=False):
+    """ Download official data of Brazilian conservation untis as an sf object.
+     This data set covers the whole of Brazil and it includes the polygons of all conservation untis present in Brazilian
+ territory. The last update of the data was 09-2019. The original
+ data comes from MMA and can be found at http://mapas.mma.gov.br/i3geo/datadownload.htm .
+    Parameters
+    ----------
+    date : int, optional
+        A date number in YYYYMM format, by default 201909
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_conservation_units
+    # Read specific state at a given year
+    >>> df = read_conservation_units(date=201909)
+    """
+    metadata = select_metadata("conservation_units", year=date, simplified=simplified)
+    gdf = download_gpkg(metadata)
+    return gdf

tunned_geobr/read_country.py ADDED Viewed

@@ -0,0 +1,43 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_country(year=2010, simplified=True, verbose=False):
+    """ Download shape file of Brazil as sf objects. Data at scale 1:250,000, using Geodetic reference system "SIRGAS2000" and CRS(4674)
+     @param year Year of the data (defaults to 2010)
+ @param simplifiedWhether the function returns the 'original' dataset with high resolution or a dataset with 'simplified' borders (Default)
+ @param showProgress Logical. Defaults to (TRUE) display progress bar
+    Parameters
+    ----------
+    year : int, optional
+        Year of the data, by default 2010
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_country
+    # Read specific state at a given year
+    >>> df = read_country(year=2010)
+    """
+    metadata = select_metadata("country", year=year, simplified=simplified)
+    gdf = download_gpkg(metadata)
+    return gdf

tunned_geobr/read_disaster_risk_area.py ADDED Viewed

@@ -0,0 +1,47 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_disaster_risk_area(year=2010, simplified=True, verbose=False):
+    """ Download official data of disaster risk areas as an sf object.
+     This function reads the the official data of disaster risk areas in Brazil. It specifically focuses
+ on geodynamic and hydro-meteorological disasters capable of triggering landslides and floods. The
+ data set covers the whole country. Each risk area polygon (known as 'BATER') has unique code id (column 'geo_bater').
+ The data set brings information on the extent to which the risk area polygons overlap with census
+ tracts and block faces (column "acuracia") and number of ris areas within each risk area (column 'num').
+ Orignal data were generated by IBGE and CEMADEN. For more information about the methodology, see deails
+ at https://www.ibge.gov.br/geociencias/organizacao-do-territorio/tipologias-do-territorio/21538-populacao-em-areas-de-risco-no-brasil.html
+    Parameters
+    ----------
+    year : int, optional
+        Year of the data, by default 2010
+    simplified: boolean, by default True
+        Data 'type', indicating whether the function returns the 'original' dataset
+        with high resolution or a dataset with 'simplified' borders (Default)
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_disaster_risk_area
+    # Read specific state at a given year
+    >>> df = read_disaster_risk_area(year=2010)
+    """
+    metadata = select_metadata("disaster_risk_area", year=year, simplified=simplified)
+    gdf = download_gpkg(metadata)
+    return gdf

tunned_geobr/read_geology.py ADDED Viewed

@@ -0,0 +1,77 @@
+import geopandas as gpd
+import tempfile
+import os
+import requests
+from zipfile import ZipFile
+from io import BytesIO
+def read_geology(simplified=False):
+    """Download official geology data from IBGE.
+    This function downloads and processes geological data from IBGE (Brazilian Institute of Geography and Statistics).
+    The data includes geological formations and units at 1:250,000 scale.
+    Original source: IBGE
+    Parameters
+    ----------
+    simplified : boolean, by default False
+        If True, returns a simplified version of the dataset with fewer columns
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Geodataframe with geological data
+    Example
+    -------
+    >>> from cursed_geobr import read_geology
+    # Read geology data
+    >>> geology = read_geology()
+    """
+    url = "https://geoftp.ibge.gov.br/informacoes_ambientais/geologia/levantamento_geologico/vetores/escala_250_mil/versao_2023/geol_area.zip"
+    try:
+        # Download the zip file
+        response = requests.get(url)
+        if response.status_code != 200:
+            raise Exception("Failed to download data from IBGE")
+        # Create a temporary directory
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Extract zip content
+            with ZipFile(BytesIO(response.content)) as zip_ref:
+                zip_ref.extractall(temp_dir)
+            # Find the shapefile
+            shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
+            if not shp_files:
+                raise Exception("No shapefile found in the downloaded data")
+            # Read the shapefile
+            gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
+            if simplified:
+                # Keep only the most relevant columns
+                # Note: These columns are based on typical geological data structure
+                # You may want to adjust these based on the actual data
+                columns_to_keep = [
+                    'geometry',
+                    'SIGLA_UNID',  # Unit code
+                    'NOME_UNIDA',  # Unit name
+                    'HIERARQUIA',  # Hierarchy
+                    'IDADE_MAX',   # Maximum age
+                    'IDADE_MIN',   # Minimum age
+                    'ERRO_MAX',    # Maximum error
+                    'ERRO_MIN',    # Minimum error
+                    'ORIGEM',      # Origin
+                    'LITOTIPO1',   # Main lithotype
+                    'LITOTIPO2',   # Secondary lithotype
+                ]
+                gdf = gdf[columns_to_keep]
+    except Exception as e:
+        raise Exception(f"Error downloading geology data: {str(e)}")
+    return gdf

tunned_geobr/read_geomorphology.py ADDED Viewed

@@ -0,0 +1,77 @@
+import geopandas as gpd
+import tempfile
+import os
+import requests
+from zipfile import ZipFile
+from io import BytesIO
+def read_geomorphology(simplified=False):
+    """Download official geomorphology data from IBGE.
+    This function downloads and processes geomorphological data from IBGE (Brazilian Institute of Geography and Statistics).
+    The data includes geomorphological units and features at 1:250,000 scale.
+    Original source: IBGE
+    Parameters
+    ----------
+    simplified : boolean, by default False
+        If True, returns a simplified version of the dataset with fewer columns
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Geodataframe with geomorphological data
+    Example
+    -------
+    >>> from cursed_geobr import read_geomorphology
+    # Read geomorphology data
+    >>> geomorph = read_geomorphology()
+    """
+    url = "https://geoftp.ibge.gov.br/informacoes_ambientais/geomorfologia/vetores/escala_250_mil/versao_2023/geom_area.zip"
+    try:
+        # Download the zip file
+        response = requests.get(url)
+        if response.status_code != 200:
+            raise Exception("Failed to download data from IBGE")
+        # Create a temporary directory
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Extract zip content
+            with ZipFile(BytesIO(response.content)) as zip_ref:
+                zip_ref.extractall(temp_dir)
+            # Find the shapefile
+            shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
+            if not shp_files:
+                raise Exception("No shapefile found in the downloaded data")
+            # Read the shapefile
+            gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
+            if simplified:
+                # Keep only the most relevant columns
+                # Note: These columns are based on typical geomorphological data structure
+                # You may want to adjust these based on the actual data
+                columns_to_keep = [
+                    'geometry',
+                    'COMPART',    # Compartment
+                    'MODELADO',   # Landform model
+                    'MORFOEST',   # Morphostructure
+                    'MORFOLOG',   # Morphology
+                    'DECLIVIDA',  # Slope
+                    'AMPLIT_A',   # Amplitude
+                    'ORDEM_REL',  # Relief order
+                ]
+                # Filter columns that actually exist in the dataset
+                existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
+                gdf = gdf[existing_columns]
+    except Exception as e:
+        raise Exception(f"Error downloading geomorphology data: {str(e)}")
+    return gdf

tunned_geobr/read_health_facilities.py ADDED Viewed

@@ -0,0 +1,49 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_health_facilities(date=202303, verbose=False):
+    """ Download geolocated data of health facilities as an sf object.
+     Data comes from the National Registry of Healthcare facilities (Cadastro Nacional de Estabelecimentos de Saude - CNES),
+ originally collected by the Brazilian Ministry of Health. The date of the last data update is
+ registered in the database in the columns 'date_update' and 'year_update'. These data uses Geodetic reference
+ system "SIRGAS2000" and CRS(4674). The coordinates of each facility was obtained by CNES
+ and validated by means of space operations. These operations verify if the point is in the
+ municipality, considering a radius of 5,000 meters. When the coordinate is not correct,
+ further searches are done in other systems of the Ministry of Health and in web services
+ like Google Maps . Finally, if the coordinates have been correctly obtained in this process,
+ the coordinates of the municipal head office are used. The final source used is registered
+ in the database in a specific column 'data_source'. Periodically the coordinates are revised
+ with the objective of improving the quality of the data. More information
+ available at http://dados.gov.br/dataset/cnes
+    Parameters
+    ----------
+    date : Numeric.  Date of the data in YYYYMM format. Defaults to `202303`,
+        which was the latest data available by the time of this update.
+    verbose : bool, optional
+        by default False
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Metadata and geopackage of selected states
+    Raises
+    ------
+    Exception
+        If parameters are not found or not well defined
+    Example
+    -------
+    >>> from cursed_geobr import read_health_facilities
+    # Read specific state at a given year
+    >>> df = read_health_facilities()
+    """
+    metadata = select_metadata("health_facilities", year=date, simplified=False)
+    gdf = download_gpkg(metadata)
+    return gdf

tunned_geobr/read_health_region.py ADDED Viewed

@@ -0,0 +1,52 @@
+from cursed_geobr.utils import select_metadata, download_gpkg
+def read_health_region(year=2013, macro=False, simplified=True, verbose=False):
+    """Download official data of Brazilian health regions as an sf object.
+    Health regions are used to guide the the regional and state planning of health services.
+    Macro health regions, in particular, are used to guide the planning of high complexity
+    health services. These services involve larger economics of scale and are concentrated in
+    few municipalities because they are generally more technology intensive, costly and face
+    shortages of specialized professionals. A macro region comprises one or more health regions.
+     Parameters
+     ----------
+     year : int, optional
+         Year of the data, by default 2013
+     macro: If `False` (default), the function downloads health regions data.
+            If `True`, the function downloads macro regions data.
+     simplified: boolean, by default True
+         Data 'type', indicating whether the function returns the 'original' dataset
+         with high resolution or a dataset with 'simplified' borders (Default)
+     verbose : bool, optional
+         by default False
+     Returns
+     -------
+     gpd.GeoDataFrame
+         Metadata and geopackage of selected states
+     Raises
+     ------
+     Exception
+         If parameters are not found or not well defined
+     Example
+     -------
+     >>> from cursed_geobr import read_health_region
+     # Read specific state at a given year
+     >>> df = read_health_region(year=2013)
+    """
+    if macro:
+        metadata = select_metadata(
+            "health_region_macro", year=year, simplified=simplified
+        )
+    else:
+        metadata = select_metadata("health_region", year=year, simplified=simplified)
+    gdf = download_gpkg(metadata)
+    return gdf