tunned-geobr 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. tunned_geobr/__init__.py +34 -2
  2. tunned_geobr/list_geobr.py +112 -34
  3. tunned_geobr/read_apcb_amazon.py +78 -0
  4. tunned_geobr/read_apcb_caatinga.py +78 -0
  5. tunned_geobr/read_apcb_cerrado_pantanal.py +78 -0
  6. tunned_geobr/read_apcb_mata_atlantica.py +78 -0
  7. tunned_geobr/read_apcb_pampa.py +78 -0
  8. tunned_geobr/read_apcb_zcm.py +78 -0
  9. tunned_geobr/read_archaeological_sites.py +94 -0
  10. tunned_geobr/read_atlantic_forest_law_limits.py +74 -0
  11. tunned_geobr/read_baze_sites.py +155 -0
  12. tunned_geobr/read_biosphere_reserves.py +85 -0
  13. tunned_geobr/read_cave_potential.py +79 -0
  14. tunned_geobr/read_census_tract_2022.py +101 -0
  15. tunned_geobr/read_ebas.py +80 -0
  16. tunned_geobr/read_federal_highways.py +79 -0
  17. tunned_geobr/read_fossil_occurrences.py +94 -0
  18. tunned_geobr/read_geographic_regions.py +88 -0
  19. tunned_geobr/read_heliports.py +81 -0
  20. tunned_geobr/read_municipality_direct.py +127 -0
  21. tunned_geobr/read_natural_caves.py +83 -0
  22. tunned_geobr/read_neighborhoods_2022.py +99 -0
  23. tunned_geobr/read_pan_strategic_areas.py +89 -0
  24. tunned_geobr/read_ports.py +80 -0
  25. tunned_geobr/read_private_aerodromes.py +81 -0
  26. tunned_geobr/read_public_aerodromes.py +81 -0
  27. tunned_geobr/read_quilombola_areas.py +85 -0
  28. tunned_geobr/read_quilombola_areas_temp.py +103 -0
  29. tunned_geobr/read_railways.py +80 -0
  30. tunned_geobr/read_rppn.py +107 -0
  31. tunned_geobr/read_sigef_properties.py +83 -0
  32. tunned_geobr/read_snci_properties.py +83 -0
  33. tunned_geobr/read_state_direct.py +103 -0
  34. tunned_geobr/read_state_highways.py +79 -0
  35. tunned_geobr/read_transmission_lines_ons.py +87 -0
  36. tunned_geobr/read_vegetation.py +84 -0
  37. tunned_geobr/read_water_bodies_ana.py +87 -0
  38. tunned_geobr/read_waterways.py +80 -0
  39. {tunned_geobr-0.1.2.dist-info → tunned_geobr-0.2.1.dist-info}/METADATA +35 -3
  40. tunned_geobr-0.2.1.dist-info/RECORD +82 -0
  41. tunned_geobr-0.1.2.dist-info/RECORD +0 -46
  42. {tunned_geobr-0.1.2.dist-info → tunned_geobr-0.2.1.dist-info}/WHEEL +0 -0
  43. {tunned_geobr-0.1.2.dist-info → tunned_geobr-0.2.1.dist-info}/entry_points.txt +0 -0
  44. {tunned_geobr-0.1.2.dist-info → tunned_geobr-0.2.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,78 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ from zipfile import ZipFile
6
+ from io import BytesIO
7
+
8
+ def read_apcb_zcm(simplified=False):
9
+ """Download Priority Areas for Biodiversity Conservation (APCB) data for the Coastal and Marine Zone.
10
+
11
+ This function downloads and processes APCB data from the Ministry of Environment (MMA).
12
+ The data includes priority areas for biodiversity conservation in the Coastal and Marine Zone (ZCM).
13
+ Original source: Ministério do Meio Ambiente (MMA)
14
+
15
+ Parameters
16
+ ----------
17
+ simplified : boolean, by default False
18
+ If True, returns a simplified version of the dataset with fewer columns
19
+
20
+ Returns
21
+ -------
22
+ gpd.GeoDataFrame
23
+ Geodataframe with Coastal and Marine Zone APCB data
24
+
25
+ Example
26
+ -------
27
+ >>> from tunned_geobr import read_apcb_zcm
28
+
29
+ # Read Coastal and Marine Zone APCB data
30
+ >>> apcb = read_apcb_zcm()
31
+ """
32
+
33
+ url = "https://www.gov.br/mma/pt-br/assuntos/biodiversidade-e-biomas/biomas-e-ecossistemas/conservacao-1/areas-prioritarias/arquivos/zcm.zip"
34
+
35
+ try:
36
+ # Download the zip file
37
+ response = requests.get(url)
38
+ if response.status_code != 200:
39
+ raise Exception("Failed to download data from MMA")
40
+
41
+ # Create a temporary directory
42
+ with tempfile.TemporaryDirectory() as temp_dir:
43
+ # Extract the zip file
44
+ with ZipFile(BytesIO(response.content)) as zip_ref:
45
+ zip_ref.extractall(temp_dir)
46
+
47
+ # Find the shapefile
48
+ shp_files = []
49
+ for root, dirs, files in os.walk(temp_dir):
50
+ shp_files.extend([os.path.join(root, f) for f in files if f.endswith('.shp')])
51
+
52
+ if not shp_files:
53
+ raise Exception("No shapefile found in the downloaded data")
54
+
55
+ # Read the shapefile
56
+ gdf = gpd.read_file(shp_files[0])
57
+ gdf = gdf.to_crs(4674) # Convert to SIRGAS 2000
58
+
59
+ if simplified:
60
+ # Keep only the most relevant columns
61
+ columns_to_keep = [
62
+ 'geometry',
63
+ 'Nome', # Area name
64
+ 'Importanci', # Importance
65
+ 'Prioridade', # Priority
66
+ 'Area_km2', # Area in km²
67
+ 'Oportunida', # Opportunity
68
+ 'Ameaca' # Threats
69
+ ]
70
+
71
+ # Filter columns that actually exist in the dataset
72
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
73
+ gdf = gdf[existing_columns]
74
+
75
+ except Exception as e:
76
+ raise Exception(f"Error downloading Coastal and Marine Zone APCB data: {str(e)}")
77
+
78
+ return gdf
@@ -0,0 +1,94 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ from zipfile import ZipFile
6
+ from io import BytesIO
7
+
8
+ def read_archaeological_sites(simplified=False):
9
+ """Download Archaeological Sites data from IPHAN.
10
+
11
+ This function downloads and processes data about archaeological sites in Brazil
12
+ from IPHAN (Instituto do Patrimônio Histórico e Artístico Nacional). The data
13
+ includes location and basic information about registered archaeological sites.
14
+ Original source: IPHAN - Instituto do Patrimônio Histórico e Artístico Nacional
15
+
16
+ Parameters
17
+ ----------
18
+ simplified : boolean, by default False
19
+ If True, returns a simplified version of the dataset with fewer columns
20
+
21
+ Returns
22
+ -------
23
+ gpd.GeoDataFrame
24
+ Geodataframe with archaeological sites data
25
+ Columns:
26
+ - geometry: Site location
27
+ - nome: Site name
28
+ - municipio: Municipality
29
+ - uf: State
30
+ - tipo: Site type
31
+ - exposicao: Exposure type
32
+ - relevancia: Relevance
33
+ - preservacao: Preservation state
34
+ - datacao: Dating
35
+ - artefatos: Artifacts found
36
+ - fonte: Data source
37
+
38
+ Example
39
+ -------
40
+ >>> from tunned_geobr import read_archaeological_sites
41
+
42
+ # Read archaeological sites data
43
+ >>> sites = read_archaeological_sites()
44
+ """
45
+
46
+ url = "http://portal.iphan.gov.br/geoserver/SICG/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=SICG:sitios&maxFeatures=50000&outputFormat=SHAPE-ZIP"
47
+
48
+ try:
49
+ # Download the zip file
50
+ response = requests.get(url)
51
+ if response.status_code != 200:
52
+ raise Exception("Failed to download data from IPHAN")
53
+
54
+ # Create a temporary directory
55
+ with tempfile.TemporaryDirectory() as temp_dir:
56
+ # Extract the zip file
57
+ with ZipFile(BytesIO(response.content)) as zip_ref:
58
+ zip_ref.extractall(temp_dir)
59
+
60
+ # Find the shapefile
61
+ shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
62
+ if not shp_files:
63
+ raise Exception("No shapefile found in the downloaded data")
64
+
65
+ # Read the shapefile
66
+ gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
67
+ gdf = gdf.to_crs(4674) # Convert to SIRGAS 2000
68
+
69
+ # Print columns for debugging
70
+ print("Available columns:", gdf.columns)
71
+
72
+ if simplified:
73
+ # Keep only the most relevant columns
74
+ columns_to_keep = [
75
+ 'geometry',
76
+ 'nome', # Site name
77
+ 'municipio', # Municipality
78
+ 'uf', # State
79
+ 'tipo', # Site type
80
+ 'relevancia', # Relevance
81
+ 'preservacao' # Preservation state
82
+ ]
83
+
84
+ # Filter columns that actually exist in the dataset
85
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
86
+ gdf = gdf[existing_columns]
87
+
88
+ except Exception as e:
89
+ raise Exception(f"Error downloading archaeological sites data: {str(e)}")
90
+
91
+ return gdf
92
+
93
+ if __name__ == '__main__':
94
+ read_archaeological_sites()
@@ -0,0 +1,74 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ from zipfile import ZipFile
6
+ from io import BytesIO
7
+
8
+ def read_atlantic_forest_law_limits(simplified=False):
9
+ """Download Atlantic Forest Legal Limits data from MMA/IBGE.
10
+
11
+ This function downloads and processes data about the Atlantic Forest legal limits
12
+ as defined by Law 11.428/2006. The data is provided by IBGE and MMA (Ministry of Environment).
13
+ Original source: MMA - Ministério do Meio Ambiente
14
+
15
+ Parameters
16
+ ----------
17
+ simplified : boolean, by default False
18
+ If True, returns a simplified version of the dataset with fewer columns
19
+
20
+ Returns
21
+ -------
22
+ gpd.GeoDataFrame
23
+ Geodataframe with Atlantic Forest legal limits data
24
+
25
+ Example
26
+ -------
27
+ >>> from tunned_geobr import read_atlantic_forest_law_limits
28
+
29
+ # Read Atlantic Forest legal limits data
30
+ >>> limits = read_atlantic_forest_law_limits()
31
+ """
32
+
33
+ url = "http://antigo.mma.gov.br/estruturas/202/_arquivos/shape_mata_atlantica_ibge_5milhoes_policonica_sirgas2000shp_202.zip"
34
+
35
+ try:
36
+ # Download the zip file
37
+ response = requests.get(url)
38
+ if response.status_code != 200:
39
+ raise Exception("Failed to download data from MMA")
40
+
41
+ # Create a temporary directory
42
+ with tempfile.TemporaryDirectory() as temp_dir:
43
+ # Extract the zip file
44
+ with ZipFile(BytesIO(response.content)) as zip_ref:
45
+ zip_ref.extractall(temp_dir)
46
+
47
+ # Find the shapefile
48
+ shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
49
+ if not shp_files:
50
+ raise Exception("No shapefile found in the downloaded data")
51
+
52
+ # Read the shapefile
53
+ gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
54
+ gdf = gdf.to_crs(4674) # Convert to SIRGAS 2000
55
+
56
+ if simplified:
57
+ # Keep only the most relevant columns
58
+ columns_to_keep = [
59
+ 'geometry',
60
+ 'NM_TEMA', # Theme name
61
+ 'NM_REGIAO', # Region name
62
+ 'AREA_KM2', # Area in km²
63
+ 'LEI', # Law reference
64
+ 'FONTE' # Data source
65
+ ]
66
+
67
+ # Filter columns that actually exist in the dataset
68
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
69
+ gdf = gdf[existing_columns]
70
+
71
+ except Exception as e:
72
+ raise Exception(f"Error downloading Atlantic Forest legal limits data: {str(e)}")
73
+
74
+ return gdf
@@ -0,0 +1,155 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ import subprocess
6
+ import platform
7
+ import shutil
8
+ from io import BytesIO
9
+
10
+ def read_baze_sites(simplified=False):
11
+ """Download Brazilian BAZE Sites data from MMA.
12
+
13
+ This function downloads and processes the Brazilian BAZE Sites data
14
+ (Sites of Biological Importance and Ecosystem Services) from the
15
+ Ministry of Environment (MMA).
16
+ Original source: MMA - Ministério do Meio Ambiente
17
+
18
+ Note: This function requires either 'unrar' or 'unar' to be installed on your system
19
+ to extract the RAR file. If you don't have these tools installed, you'll need to
20
+ install them:
21
+ - On macOS: brew install unrar or brew install unar
22
+ - On Ubuntu/Debian: sudo apt-get install unrar or sudo apt-get install unar
23
+ - On Windows: Install WinRAR or 7-Zip
24
+
25
+ Parameters
26
+ ----------
27
+ simplified : boolean, by default False
28
+ If True, returns a simplified version of the dataset with fewer columns
29
+
30
+ Returns
31
+ -------
32
+ gpd.GeoDataFrame
33
+ Geodataframe with Brazilian BAZE Sites data
34
+
35
+ Example
36
+ -------
37
+ >>> from tunned_geobr import read_baze_sites
38
+
39
+ # Read BAZE Sites data
40
+ >>> baze_sites = read_baze_sites()
41
+ """
42
+
43
+ url = "http://antigo.mma.gov.br/images/arquivo/80046/Especies/SitiosBAZE_2018.rar"
44
+
45
+ # Check if extraction tools are available
46
+ unrar_available = shutil.which('unrar') is not None
47
+ unar_available = shutil.which('unar') is not None
48
+ seven_zip_available = shutil.which('7z') is not None
49
+
50
+ if not (unrar_available or unar_available or seven_zip_available):
51
+ os_name = platform.system()
52
+ if os_name == 'Darwin': # macOS
53
+ install_msg = "Install with: brew install unrar or brew install unar"
54
+ elif os_name == 'Linux':
55
+ install_msg = "Install with: sudo apt-get install unrar or sudo apt-get install unar"
56
+ elif os_name == 'Windows':
57
+ install_msg = "Install WinRAR or 7-Zip"
58
+ else:
59
+ install_msg = "Install unrar, unar, or 7-Zip"
60
+
61
+ raise Exception(f"No RAR extraction tool found. This function requires unrar, unar, or 7-Zip to extract the data. {install_msg}")
62
+
63
+ try:
64
+ # Create a temporary directory
65
+ with tempfile.TemporaryDirectory() as temp_dir:
66
+ # Download the RAR file to the temporary directory
67
+ rar_file_path = os.path.join(temp_dir, "baze_sites.rar")
68
+
69
+ # Download the file
70
+ response = requests.get(url)
71
+ if response.status_code != 200:
72
+ raise Exception("Failed to download BAZE Sites data from MMA")
73
+
74
+ # Save the content to a file
75
+ with open(rar_file_path, 'wb') as f:
76
+ f.write(response.content)
77
+
78
+ # Extract the RAR file using available tools
79
+ extraction_success = False
80
+ extraction_error = ""
81
+
82
+ if unrar_available:
83
+ try:
84
+ subprocess.run(['unrar', 'x', rar_file_path, temp_dir],
85
+ check=True,
86
+ stdout=subprocess.PIPE,
87
+ stderr=subprocess.PIPE)
88
+ extraction_success = True
89
+ except subprocess.CalledProcessError as e:
90
+ extraction_error += f"unrar failed: {e.stderr.decode()}. "
91
+
92
+ if not extraction_success and unar_available:
93
+ try:
94
+ subprocess.run(['unar', '-d', rar_file_path, '-o', temp_dir],
95
+ check=True,
96
+ stdout=subprocess.PIPE,
97
+ stderr=subprocess.PIPE)
98
+ extraction_success = True
99
+ except subprocess.CalledProcessError as e:
100
+ extraction_error += f"unar failed: {e.stderr.decode()}. "
101
+
102
+ if not extraction_success and seven_zip_available:
103
+ try:
104
+ subprocess.run(['7z', 'x', rar_file_path, f'-o{temp_dir}'],
105
+ check=True,
106
+ stdout=subprocess.PIPE,
107
+ stderr=subprocess.PIPE)
108
+ extraction_success = True
109
+ except subprocess.CalledProcessError as e:
110
+ extraction_error += f"7z failed: {e.stderr.decode()}. "
111
+
112
+ if not extraction_success:
113
+ raise Exception(f"Failed to extract RAR file: {extraction_error}")
114
+
115
+ # Find the shapefile
116
+ shp_files = []
117
+ for root, dirs, files in os.walk(temp_dir):
118
+ shp_files.extend([os.path.join(root, f) for f in files if f.endswith('.shp')])
119
+
120
+ if not shp_files:
121
+ raise Exception("No shapefile found in the downloaded data")
122
+
123
+ # Read the shapefile
124
+ gdf = gpd.read_file(shp_files[0])
125
+
126
+ # Convert to SIRGAS 2000 (EPSG:4674) if not already
127
+ if gdf.crs is None or gdf.crs.to_epsg() != 4674:
128
+ gdf = gdf.to_crs(4674)
129
+
130
+ if simplified:
131
+ # Keep only the most relevant columns
132
+ # Note: Column names may need adjustment based on actual data
133
+ columns_to_keep = [
134
+ 'geometry',
135
+ 'nome', # Site name
136
+ 'categoria', # Category
137
+ 'area_km2', # Area in square kilometers
138
+ 'bioma', # Biome
139
+ 'uf', # State
140
+ 'importancia', # Importance
141
+ 'descricao', # Description
142
+ 'referencia' # Reference
143
+ ]
144
+
145
+ # Filter columns that actually exist in the dataset
146
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
147
+ gdf = gdf[existing_columns]
148
+
149
+ except Exception as e:
150
+ raise Exception(f"Error downloading BAZE Sites data: {str(e)}")
151
+
152
+ return gdf
153
+
154
+ if __name__ == '__main__':
155
+ read_baze_sites()
@@ -0,0 +1,85 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import gdown
5
+ from zipfile import ZipFile
6
+
7
+ def read_biosphere_reserves(simplified=False):
8
+ """Download Brazilian Biosphere Reserves data.
9
+
10
+ This function downloads and processes the Brazilian Biosphere Reserves data
11
+ from a Google Drive repository. The data includes UNESCO Biosphere Reserves
12
+ in Brazil, which are protected areas with high biodiversity value.
13
+ Original source: MMA - Ministério do Meio Ambiente
14
+
15
+ Parameters
16
+ ----------
17
+ simplified : boolean, by default False
18
+ If True, returns a simplified version of the dataset with fewer columns
19
+
20
+ Returns
21
+ -------
22
+ gpd.GeoDataFrame
23
+ Geodataframe with Brazilian biosphere reserves data
24
+
25
+ Example
26
+ -------
27
+ >>> from tunned_geobr import read_biosphere_reserves
28
+
29
+ # Read biosphere reserves data
30
+ >>> reserves = read_biosphere_reserves()
31
+ """
32
+
33
+ # Google Drive folder URL
34
+ folder_url = "https://drive.google.com/drive/folders/19ygCKsQrI1gfRMe1jUbHZGQibbN_oLAC"
35
+
36
+ try:
37
+ # Create a temporary directory
38
+ with tempfile.TemporaryDirectory() as temp_dir:
39
+ # Download the file from Google Drive
40
+ output_zip = os.path.join(temp_dir, "biosphere_reserves.zip")
41
+
42
+ # Use gdown to download the folder contents
43
+ gdown.download_folder(folder_url, output=temp_dir, quiet=False)
44
+
45
+ # Find the shapefile
46
+ shp_files = []
47
+ for root, dirs, files in os.walk(temp_dir):
48
+ shp_files.extend([os.path.join(root, f) for f in files if f.endswith('.shp')])
49
+
50
+ if not shp_files:
51
+ raise Exception("No shapefile found in the downloaded data")
52
+
53
+ # Read the shapefile
54
+ gdf = gpd.read_file(shp_files[0])
55
+
56
+ # Convert to SIRGAS 2000 (EPSG:4674) if not already
57
+ if gdf.crs is None or gdf.crs.to_epsg() != 4674:
58
+ gdf = gdf.to_crs(4674)
59
+
60
+ if simplified:
61
+ # Keep only the most relevant columns
62
+ # Note: Column names may need adjustment based on actual data
63
+ columns_to_keep = [
64
+ 'geometry',
65
+ 'nome', # Reserve name
66
+ 'categoria', # Category
67
+ 'area_km2', # Area in square kilometers
68
+ 'bioma', # Biome
69
+ 'uf', # State
70
+ 'ano_criacao', # Creation year
71
+ 'legislacao', # Legislation
72
+ 'orgao_gestor' # Managing agency
73
+ ]
74
+
75
+ # Filter columns that actually exist in the dataset
76
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
77
+ gdf = gdf[existing_columns]
78
+
79
+ except Exception as e:
80
+ raise Exception(f"Error downloading biosphere reserves data: {str(e)}")
81
+
82
+ return gdf
83
+
84
+ if __name__ == '__main__':
85
+ read_biosphere_reserves()
@@ -0,0 +1,79 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ from zipfile import ZipFile
6
+ from io import BytesIO
7
+
8
+ def read_cave_potential(simplified=False):
9
+ """Download Cave Occurrence Potential data from ICMBio.
10
+
11
+ This function downloads and processes data about the potential for cave occurrence
12
+ across Brazil. The data is based on lithological characteristics and was produced
13
+ by ICMBio's National Center for Cave Research and Conservation (CECAV).
14
+ Original source: ICMBio - Instituto Chico Mendes de Conservação da Biodiversidade
15
+
16
+ Parameters
17
+ ----------
18
+ simplified : boolean, by default False
19
+ If True, returns a simplified version of the dataset with fewer columns
20
+
21
+ Returns
22
+ -------
23
+ gpd.GeoDataFrame
24
+ Geodataframe with cave occurrence potential data
25
+ Columns:
26
+ - geometry: Geometry of the area
27
+ - METODOLOGI: Methodology used to determine potential
28
+ - GRAU_DE_PO: Potential degree (Very High, High, Medium, Low, Very Low)
29
+ - COUNT: Number of occurrences in the area
30
+
31
+ Example
32
+ -------
33
+ >>> from tunned_geobr import read_cave_potential
34
+
35
+ # Read cave potential data
36
+ >>> potential = read_cave_potential()
37
+ """
38
+
39
+ url = "https://www.gov.br/icmbio/pt-br/assuntos/centros-de-pesquisa/cavernas/publicacoes/mapa-de-potencialidades-de-ocorrencia-de-cavernas-no-brasil/dados-mapa-de-potencialidades-de-ocorrencia-de-cavermas-no-brasil.zip/@@download/file"
40
+
41
+ try:
42
+ # Download the zip file
43
+ response = requests.get(url)
44
+ if response.status_code != 200:
45
+ raise Exception("Failed to download data from ICMBio")
46
+
47
+ # Create a temporary directory
48
+ with tempfile.TemporaryDirectory() as temp_dir:
49
+ # Extract the zip file
50
+ with ZipFile(BytesIO(response.content)) as zip_ref:
51
+ zip_ref.extractall(temp_dir)
52
+
53
+ # Find the shapefile
54
+ shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
55
+ if not shp_files:
56
+ raise Exception("No shapefile found in the downloaded data")
57
+
58
+ # Read the shapefile
59
+ gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
60
+ gdf = gdf.to_crs(4674) # Convert to SIRGAS 2000
61
+
62
+ if simplified:
63
+ # Keep only the most relevant columns
64
+ columns_to_keep = [
65
+ 'geometry',
66
+ 'GRAU_DE_PO' # Potential degree
67
+ ]
68
+
69
+ # Filter columns that actually exist in the dataset
70
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
71
+ gdf = gdf[existing_columns]
72
+
73
+ except Exception as e:
74
+ raise Exception(f"Error downloading cave potential data: {str(e)}")
75
+
76
+ return gdf
77
+
78
+ if __name__ == '__main__':
79
+ read_cave_potential()
@@ -0,0 +1,101 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ import subprocess
6
+ from io import BytesIO
7
+
8
+ def read_census_tract_2022(simplified=False):
9
+ """Download Brazilian Census Tracts data from IBGE (2022 Census).
10
+
11
+ This function downloads and processes the Brazilian Census Tracts data
12
+ from IBGE (Brazilian Institute of Geography and Statistics) for the 2022 Census.
13
+ Original source: IBGE - Instituto Brasileiro de Geografia e Estatística
14
+
15
+ Parameters
16
+ ----------
17
+ simplified : boolean, by default False
18
+ If True, returns a simplified version of the dataset with fewer columns
19
+
20
+ Returns
21
+ -------
22
+ gpd.GeoDataFrame
23
+ Geodataframe with Brazilian census tracts data
24
+
25
+ Example
26
+ -------
27
+ >>> from tunned_geobr import read_census_tract_2022
28
+
29
+ # Read census tracts data
30
+ >>> census_tracts = read_census_tract_2022()
31
+ """
32
+
33
+ url = "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2022/setores/shp/BR/BR_setores_CD2022.zip"
34
+
35
+ try:
36
+ # Create a temporary directory
37
+ with tempfile.TemporaryDirectory() as temp_dir:
38
+ # Download the zip file to the temporary directory
39
+ zip_file_path = os.path.join(temp_dir, "census_tracts.zip")
40
+
41
+ # Download the file
42
+ response = requests.get(url)
43
+ if response.status_code != 200:
44
+ raise Exception("Failed to download census tracts data from IBGE")
45
+
46
+ # Save the content to a file
47
+ with open(zip_file_path, 'wb') as f:
48
+ f.write(response.content)
49
+
50
+ # Use unzip command line tool to extract the file (handles more compression methods)
51
+ try:
52
+ subprocess.run(['unzip', '-o', zip_file_path, '-d', temp_dir],
53
+ check=True,
54
+ stdout=subprocess.PIPE,
55
+ stderr=subprocess.PIPE)
56
+ except subprocess.CalledProcessError as e:
57
+ raise Exception(f"Failed to extract zip file: {e.stderr.decode()}")
58
+
59
+ # Find the shapefile
60
+ shp_files = []
61
+ for root, dirs, files in os.walk(temp_dir):
62
+ shp_files.extend([os.path.join(root, f) for f in files if f.endswith('.shp')])
63
+
64
+ if not shp_files:
65
+ raise Exception("No shapefile found in the downloaded data")
66
+
67
+ # Read the shapefile
68
+ gdf = gpd.read_file(shp_files[0])
69
+
70
+ # Convert to SIRGAS 2000 (EPSG:4674) if not already
71
+ if gdf.crs is None or gdf.crs.to_epsg() != 4674:
72
+ gdf = gdf.to_crs(4674)
73
+
74
+ if simplified:
75
+ # Keep only the most relevant columns
76
+ # Note: Column names may need adjustment based on actual data
77
+ columns_to_keep = [
78
+ 'geometry',
79
+ 'CD_SETOR', # Census Tract Code
80
+ 'NM_SETOR', # Census Tract Name
81
+ 'CD_SIT', # Situation Code
82
+ 'NM_SIT', # Situation Name (Urban/Rural)
83
+ 'CD_MUN', # Municipality Code
84
+ 'NM_MUN', # Municipality Name
85
+ 'CD_UF', # State Code
86
+ 'NM_UF', # State Name
87
+ 'SIGLA_UF', # State Abbreviation
88
+ 'AREA_KM2' # Area in square kilometers
89
+ ]
90
+
91
+ # Filter columns that actually exist in the dataset
92
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
93
+ gdf = gdf[existing_columns]
94
+
95
+ except Exception as e:
96
+ raise Exception(f"Error downloading census tracts data: {str(e)}")
97
+
98
+ return gdf
99
+
100
+ if __name__ == '__main__':
101
+ read_census_tract_2022()