tunned-geobr 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. tunned_geobr/__init__.py +34 -2
  2. tunned_geobr/read_apcb_amazon.py +78 -0
  3. tunned_geobr/read_apcb_caatinga.py +78 -0
  4. tunned_geobr/read_apcb_cerrado_pantanal.py +78 -0
  5. tunned_geobr/read_apcb_mata_atlantica.py +78 -0
  6. tunned_geobr/read_apcb_pampa.py +78 -0
  7. tunned_geobr/read_apcb_zcm.py +78 -0
  8. tunned_geobr/read_archaeological_sites.py +94 -0
  9. tunned_geobr/read_atlantic_forest_law_limits.py +74 -0
  10. tunned_geobr/read_baze_sites.py +155 -0
  11. tunned_geobr/read_biosphere_reserves.py +85 -0
  12. tunned_geobr/read_cave_potential.py +79 -0
  13. tunned_geobr/read_census_tract_2022.py +101 -0
  14. tunned_geobr/read_ebas.py +80 -0
  15. tunned_geobr/read_federal_highways.py +79 -0
  16. tunned_geobr/read_fossil_occurrences.py +94 -0
  17. tunned_geobr/read_geographic_regions.py +88 -0
  18. tunned_geobr/read_heliports.py +81 -0
  19. tunned_geobr/read_municipality_direct.py +127 -0
  20. tunned_geobr/read_natural_caves.py +83 -0
  21. tunned_geobr/read_neighborhoods_2022.py +99 -0
  22. tunned_geobr/read_pan_strategic_areas.py +89 -0
  23. tunned_geobr/read_ports.py +80 -0
  24. tunned_geobr/read_private_aerodromes.py +81 -0
  25. tunned_geobr/read_public_aerodromes.py +81 -0
  26. tunned_geobr/read_quilombola_areas.py +85 -0
  27. tunned_geobr/read_quilombola_areas_temp.py +103 -0
  28. tunned_geobr/read_railways.py +80 -0
  29. tunned_geobr/read_rppn.py +107 -0
  30. tunned_geobr/read_sigef_properties.py +83 -0
  31. tunned_geobr/read_snci_properties.py +83 -0
  32. tunned_geobr/read_state_direct.py +103 -0
  33. tunned_geobr/read_state_highways.py +79 -0
  34. tunned_geobr/read_transmission_lines_ons.py +87 -0
  35. tunned_geobr/read_vegetation.py +84 -0
  36. tunned_geobr/read_water_bodies_ana.py +87 -0
  37. tunned_geobr/read_waterways.py +80 -0
  38. {tunned_geobr-0.1.2.dist-info → tunned_geobr-0.2.0.dist-info}/METADATA +35 -3
  39. tunned_geobr-0.2.0.dist-info/RECORD +82 -0
  40. tunned_geobr-0.1.2.dist-info/RECORD +0 -46
  41. {tunned_geobr-0.1.2.dist-info → tunned_geobr-0.2.0.dist-info}/WHEEL +0 -0
  42. {tunned_geobr-0.1.2.dist-info → tunned_geobr-0.2.0.dist-info}/entry_points.txt +0 -0
  43. {tunned_geobr-0.1.2.dist-info → tunned_geobr-0.2.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,155 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ import subprocess
6
+ import platform
7
+ import shutil
8
+ from io import BytesIO
9
+
10
+ def read_baze_sites(simplified=False):
11
+ """Download Brazilian BAZE Sites data from MMA.
12
+
13
+ This function downloads and processes the Brazilian BAZE Sites data
14
+ (Sites of Biological Importance and Ecosystem Services) from the
15
+ Ministry of Environment (MMA).
16
+ Original source: MMA - Ministério do Meio Ambiente
17
+
18
+ Note: This function requires either 'unrar' or 'unar' to be installed on your system
19
+ to extract the RAR file. If you don't have these tools installed, you'll need to
20
+ install them:
21
+ - On macOS: brew install unrar or brew install unar
22
+ - On Ubuntu/Debian: sudo apt-get install unrar or sudo apt-get install unar
23
+ - On Windows: Install WinRAR or 7-Zip
24
+
25
+ Parameters
26
+ ----------
27
+ simplified : boolean, by default False
28
+ If True, returns a simplified version of the dataset with fewer columns
29
+
30
+ Returns
31
+ -------
32
+ gpd.GeoDataFrame
33
+ Geodataframe with Brazilian BAZE Sites data
34
+
35
+ Example
36
+ -------
37
+ >>> from tunned_geobr import read_baze_sites
38
+
39
+ # Read BAZE Sites data
40
+ >>> baze_sites = read_baze_sites()
41
+ """
42
+
43
+ url = "http://antigo.mma.gov.br/images/arquivo/80046/Especies/SitiosBAZE_2018.rar"
44
+
45
+ # Check if extraction tools are available
46
+ unrar_available = shutil.which('unrar') is not None
47
+ unar_available = shutil.which('unar') is not None
48
+ seven_zip_available = shutil.which('7z') is not None
49
+
50
+ if not (unrar_available or unar_available or seven_zip_available):
51
+ os_name = platform.system()
52
+ if os_name == 'Darwin': # macOS
53
+ install_msg = "Install with: brew install unrar or brew install unar"
54
+ elif os_name == 'Linux':
55
+ install_msg = "Install with: sudo apt-get install unrar or sudo apt-get install unar"
56
+ elif os_name == 'Windows':
57
+ install_msg = "Install WinRAR or 7-Zip"
58
+ else:
59
+ install_msg = "Install unrar, unar, or 7-Zip"
60
+
61
+ raise Exception(f"No RAR extraction tool found. This function requires unrar, unar, or 7-Zip to extract the data. {install_msg}")
62
+
63
+ try:
64
+ # Create a temporary directory
65
+ with tempfile.TemporaryDirectory() as temp_dir:
66
+ # Download the RAR file to the temporary directory
67
+ rar_file_path = os.path.join(temp_dir, "baze_sites.rar")
68
+
69
+ # Download the file
70
+ response = requests.get(url)
71
+ if response.status_code != 200:
72
+ raise Exception("Failed to download BAZE Sites data from MMA")
73
+
74
+ # Save the content to a file
75
+ with open(rar_file_path, 'wb') as f:
76
+ f.write(response.content)
77
+
78
+ # Extract the RAR file using available tools
79
+ extraction_success = False
80
+ extraction_error = ""
81
+
82
+ if unrar_available:
83
+ try:
84
+ subprocess.run(['unrar', 'x', rar_file_path, temp_dir],
85
+ check=True,
86
+ stdout=subprocess.PIPE,
87
+ stderr=subprocess.PIPE)
88
+ extraction_success = True
89
+ except subprocess.CalledProcessError as e:
90
+ extraction_error += f"unrar failed: {e.stderr.decode()}. "
91
+
92
+ if not extraction_success and unar_available:
93
+ try:
94
+ subprocess.run(['unar', '-d', rar_file_path, '-o', temp_dir],
95
+ check=True,
96
+ stdout=subprocess.PIPE,
97
+ stderr=subprocess.PIPE)
98
+ extraction_success = True
99
+ except subprocess.CalledProcessError as e:
100
+ extraction_error += f"unar failed: {e.stderr.decode()}. "
101
+
102
+ if not extraction_success and seven_zip_available:
103
+ try:
104
+ subprocess.run(['7z', 'x', rar_file_path, f'-o{temp_dir}'],
105
+ check=True,
106
+ stdout=subprocess.PIPE,
107
+ stderr=subprocess.PIPE)
108
+ extraction_success = True
109
+ except subprocess.CalledProcessError as e:
110
+ extraction_error += f"7z failed: {e.stderr.decode()}. "
111
+
112
+ if not extraction_success:
113
+ raise Exception(f"Failed to extract RAR file: {extraction_error}")
114
+
115
+ # Find the shapefile
116
+ shp_files = []
117
+ for root, dirs, files in os.walk(temp_dir):
118
+ shp_files.extend([os.path.join(root, f) for f in files if f.endswith('.shp')])
119
+
120
+ if not shp_files:
121
+ raise Exception("No shapefile found in the downloaded data")
122
+
123
+ # Read the shapefile
124
+ gdf = gpd.read_file(shp_files[0])
125
+
126
+ # Convert to SIRGAS 2000 (EPSG:4674) if not already
127
+ if gdf.crs is None or gdf.crs.to_epsg() != 4674:
128
+ gdf = gdf.to_crs(4674)
129
+
130
+ if simplified:
131
+ # Keep only the most relevant columns
132
+ # Note: Column names may need adjustment based on actual data
133
+ columns_to_keep = [
134
+ 'geometry',
135
+ 'nome', # Site name
136
+ 'categoria', # Category
137
+ 'area_km2', # Area in square kilometers
138
+ 'bioma', # Biome
139
+ 'uf', # State
140
+ 'importancia', # Importance
141
+ 'descricao', # Description
142
+ 'referencia' # Reference
143
+ ]
144
+
145
+ # Filter columns that actually exist in the dataset
146
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
147
+ gdf = gdf[existing_columns]
148
+
149
+ except Exception as e:
150
+ raise Exception(f"Error downloading BAZE Sites data: {str(e)}")
151
+
152
+ return gdf
153
+
154
+ if __name__ == '__main__':
155
+ read_baze_sites()
@@ -0,0 +1,85 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import gdown
5
+ from zipfile import ZipFile
6
+
7
+ def read_biosphere_reserves(simplified=False):
8
+ """Download Brazilian Biosphere Reserves data.
9
+
10
+ This function downloads and processes the Brazilian Biosphere Reserves data
11
+ from a Google Drive repository. The data includes UNESCO Biosphere Reserves
12
+ in Brazil, which are protected areas with high biodiversity value.
13
+ Original source: MMA - Ministério do Meio Ambiente
14
+
15
+ Parameters
16
+ ----------
17
+ simplified : boolean, by default False
18
+ If True, returns a simplified version of the dataset with fewer columns
19
+
20
+ Returns
21
+ -------
22
+ gpd.GeoDataFrame
23
+ Geodataframe with Brazilian biosphere reserves data
24
+
25
+ Example
26
+ -------
27
+ >>> from tunned_geobr import read_biosphere_reserves
28
+
29
+ # Read biosphere reserves data
30
+ >>> reserves = read_biosphere_reserves()
31
+ """
32
+
33
+ # Google Drive folder URL
34
+ folder_url = "https://drive.google.com/drive/folders/19ygCKsQrI1gfRMe1jUbHZGQibbN_oLAC"
35
+
36
+ try:
37
+ # Create a temporary directory
38
+ with tempfile.TemporaryDirectory() as temp_dir:
39
+ # Download the file from Google Drive
40
+ output_zip = os.path.join(temp_dir, "biosphere_reserves.zip")
41
+
42
+ # Use gdown to download the folder contents
43
+ gdown.download_folder(folder_url, output=temp_dir, quiet=False)
44
+
45
+ # Find the shapefile
46
+ shp_files = []
47
+ for root, dirs, files in os.walk(temp_dir):
48
+ shp_files.extend([os.path.join(root, f) for f in files if f.endswith('.shp')])
49
+
50
+ if not shp_files:
51
+ raise Exception("No shapefile found in the downloaded data")
52
+
53
+ # Read the shapefile
54
+ gdf = gpd.read_file(shp_files[0])
55
+
56
+ # Convert to SIRGAS 2000 (EPSG:4674) if not already
57
+ if gdf.crs is None or gdf.crs.to_epsg() != 4674:
58
+ gdf = gdf.to_crs(4674)
59
+
60
+ if simplified:
61
+ # Keep only the most relevant columns
62
+ # Note: Column names may need adjustment based on actual data
63
+ columns_to_keep = [
64
+ 'geometry',
65
+ 'nome', # Reserve name
66
+ 'categoria', # Category
67
+ 'area_km2', # Area in square kilometers
68
+ 'bioma', # Biome
69
+ 'uf', # State
70
+ 'ano_criacao', # Creation year
71
+ 'legislacao', # Legislation
72
+ 'orgao_gestor' # Managing agency
73
+ ]
74
+
75
+ # Filter columns that actually exist in the dataset
76
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
77
+ gdf = gdf[existing_columns]
78
+
79
+ except Exception as e:
80
+ raise Exception(f"Error downloading biosphere reserves data: {str(e)}")
81
+
82
+ return gdf
83
+
84
+ if __name__ == '__main__':
85
+ read_biosphere_reserves()
@@ -0,0 +1,79 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ from zipfile import ZipFile
6
+ from io import BytesIO
7
+
8
+ def read_cave_potential(simplified=False):
9
+ """Download Cave Occurrence Potential data from ICMBio.
10
+
11
+ This function downloads and processes data about the potential for cave occurrence
12
+ across Brazil. The data is based on lithological characteristics and was produced
13
+ by ICMBio's National Center for Cave Research and Conservation (CECAV).
14
+ Original source: ICMBio - Instituto Chico Mendes de Conservação da Biodiversidade
15
+
16
+ Parameters
17
+ ----------
18
+ simplified : boolean, by default False
19
+ If True, returns a simplified version of the dataset with fewer columns
20
+
21
+ Returns
22
+ -------
23
+ gpd.GeoDataFrame
24
+ Geodataframe with cave occurrence potential data
25
+ Columns:
26
+ - geometry: Geometry of the area
27
+ - METODOLOGI: Methodology used to determine potential
28
+ - GRAU_DE_PO: Potential degree (Very High, High, Medium, Low, Very Low)
29
+ - COUNT: Number of occurrences in the area
30
+
31
+ Example
32
+ -------
33
+ >>> from tunned_geobr import read_cave_potential
34
+
35
+ # Read cave potential data
36
+ >>> potential = read_cave_potential()
37
+ """
38
+
39
+ url = "https://www.gov.br/icmbio/pt-br/assuntos/centros-de-pesquisa/cavernas/publicacoes/mapa-de-potencialidades-de-ocorrencia-de-cavernas-no-brasil/dados-mapa-de-potencialidades-de-ocorrencia-de-cavermas-no-brasil.zip/@@download/file"
40
+
41
+ try:
42
+ # Download the zip file
43
+ response = requests.get(url)
44
+ if response.status_code != 200:
45
+ raise Exception("Failed to download data from ICMBio")
46
+
47
+ # Create a temporary directory
48
+ with tempfile.TemporaryDirectory() as temp_dir:
49
+ # Extract the zip file
50
+ with ZipFile(BytesIO(response.content)) as zip_ref:
51
+ zip_ref.extractall(temp_dir)
52
+
53
+ # Find the shapefile
54
+ shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
55
+ if not shp_files:
56
+ raise Exception("No shapefile found in the downloaded data")
57
+
58
+ # Read the shapefile
59
+ gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
60
+ gdf = gdf.to_crs(4674) # Convert to SIRGAS 2000
61
+
62
+ if simplified:
63
+ # Keep only the most relevant columns
64
+ columns_to_keep = [
65
+ 'geometry',
66
+ 'GRAU_DE_PO' # Potential degree
67
+ ]
68
+
69
+ # Filter columns that actually exist in the dataset
70
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
71
+ gdf = gdf[existing_columns]
72
+
73
+ except Exception as e:
74
+ raise Exception(f"Error downloading cave potential data: {str(e)}")
75
+
76
+ return gdf
77
+
78
+ if __name__ == '__main__':
79
+ read_cave_potential()
@@ -0,0 +1,101 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ import subprocess
6
+ from io import BytesIO
7
+
8
+ def read_census_tract_2022(simplified=False):
9
+ """Download Brazilian Census Tracts data from IBGE (2022 Census).
10
+
11
+ This function downloads and processes the Brazilian Census Tracts data
12
+ from IBGE (Brazilian Institute of Geography and Statistics) for the 2022 Census.
13
+ Original source: IBGE - Instituto Brasileiro de Geografia e Estatística
14
+
15
+ Parameters
16
+ ----------
17
+ simplified : boolean, by default False
18
+ If True, returns a simplified version of the dataset with fewer columns
19
+
20
+ Returns
21
+ -------
22
+ gpd.GeoDataFrame
23
+ Geodataframe with Brazilian census tracts data
24
+
25
+ Example
26
+ -------
27
+ >>> from tunned_geobr import read_census_tract_2022
28
+
29
+ # Read census tracts data
30
+ >>> census_tracts = read_census_tract_2022()
31
+ """
32
+
33
+ url = "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2022/setores/shp/BR/BR_setores_CD2022.zip"
34
+
35
+ try:
36
+ # Create a temporary directory
37
+ with tempfile.TemporaryDirectory() as temp_dir:
38
+ # Download the zip file to the temporary directory
39
+ zip_file_path = os.path.join(temp_dir, "census_tracts.zip")
40
+
41
+ # Download the file
42
+ response = requests.get(url)
43
+ if response.status_code != 200:
44
+ raise Exception("Failed to download census tracts data from IBGE")
45
+
46
+ # Save the content to a file
47
+ with open(zip_file_path, 'wb') as f:
48
+ f.write(response.content)
49
+
50
+ # Use unzip command line tool to extract the file (handles more compression methods)
51
+ try:
52
+ subprocess.run(['unzip', '-o', zip_file_path, '-d', temp_dir],
53
+ check=True,
54
+ stdout=subprocess.PIPE,
55
+ stderr=subprocess.PIPE)
56
+ except subprocess.CalledProcessError as e:
57
+ raise Exception(f"Failed to extract zip file: {e.stderr.decode()}")
58
+
59
+ # Find the shapefile
60
+ shp_files = []
61
+ for root, dirs, files in os.walk(temp_dir):
62
+ shp_files.extend([os.path.join(root, f) for f in files if f.endswith('.shp')])
63
+
64
+ if not shp_files:
65
+ raise Exception("No shapefile found in the downloaded data")
66
+
67
+ # Read the shapefile
68
+ gdf = gpd.read_file(shp_files[0])
69
+
70
+ # Convert to SIRGAS 2000 (EPSG:4674) if not already
71
+ if gdf.crs is None or gdf.crs.to_epsg() != 4674:
72
+ gdf = gdf.to_crs(4674)
73
+
74
+ if simplified:
75
+ # Keep only the most relevant columns
76
+ # Note: Column names may need adjustment based on actual data
77
+ columns_to_keep = [
78
+ 'geometry',
79
+ 'CD_SETOR', # Census Tract Code
80
+ 'NM_SETOR', # Census Tract Name
81
+ 'CD_SIT', # Situation Code
82
+ 'NM_SIT', # Situation Name (Urban/Rural)
83
+ 'CD_MUN', # Municipality Code
84
+ 'NM_MUN', # Municipality Name
85
+ 'CD_UF', # State Code
86
+ 'NM_UF', # State Name
87
+ 'SIGLA_UF', # State Abbreviation
88
+ 'AREA_KM2' # Area in square kilometers
89
+ ]
90
+
91
+ # Filter columns that actually exist in the dataset
92
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
93
+ gdf = gdf[existing_columns]
94
+
95
+ except Exception as e:
96
+ raise Exception(f"Error downloading census tracts data: {str(e)}")
97
+
98
+ return gdf
99
+
100
+ if __name__ == '__main__':
101
+ read_census_tract_2022()
@@ -0,0 +1,80 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ from zipfile import ZipFile
6
+ from io import BytesIO
7
+
8
+ def read_ebas(simplified=False):
9
+ """Download Endemic Bird Areas (EBAs) data.
10
+
11
+ This function downloads and processes Endemic Bird Areas (EBAs) data. EBAs are
12
+ regions of the world that contain concentrations of bird species found nowhere else.
13
+ Original source: Global Forest Watch
14
+
15
+ Parameters
16
+ ----------
17
+ simplified : boolean, by default False
18
+ If True, returns a simplified version of the dataset with fewer columns
19
+
20
+ Returns
21
+ -------
22
+ gpd.GeoDataFrame
23
+ Geodataframe with Endemic Bird Areas data
24
+
25
+ Example
26
+ -------
27
+ >>> from tunned_geobr import read_ebas
28
+
29
+ # Read Endemic Bird Areas data
30
+ >>> ebas = read_ebas()
31
+ """
32
+
33
+ url = "http://gfw2-data.s3.amazonaws.com/conservation/zip/endemic_bird_areas.zip"
34
+
35
+ try:
36
+ # Download the zip file
37
+ response = requests.get(url)
38
+ if response.status_code != 200:
39
+ raise Exception("Failed to download Endemic Bird Areas data")
40
+
41
+ # Create a temporary directory
42
+ with tempfile.TemporaryDirectory() as temp_dir:
43
+ # Extract the zip file
44
+ with ZipFile(BytesIO(response.content)) as zip_ref:
45
+ zip_ref.extractall(temp_dir)
46
+
47
+ # Find the shapefile
48
+ shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
49
+ if not shp_files:
50
+ raise Exception("No shapefile found in the downloaded data")
51
+
52
+ # Read the shapefile
53
+ gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
54
+
55
+ # Convert to SIRGAS 2000 (EPSG:4674)
56
+ gdf = gdf.to_crs(4674)
57
+
58
+ if simplified:
59
+ # Keep only the most relevant columns
60
+ columns_to_keep = [
61
+ 'geometry',
62
+ 'EBA_Name', # Endemic Bird Area name
63
+ 'EBA_ID', # Endemic Bird Area ID
64
+ 'Area_km2', # Area in square kilometers
65
+ 'Priority', # Conservation priority
66
+ 'Region', # Geographic region
67
+ 'Country' # Country
68
+ ]
69
+
70
+ # Filter columns that actually exist in the dataset
71
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
72
+ gdf = gdf[existing_columns]
73
+
74
+ except Exception as e:
75
+ raise Exception(f"Error downloading Endemic Bird Areas data: {str(e)}")
76
+
77
+ return gdf
78
+
79
+ if __name__ == '__main__':
80
+ read_ebas()
@@ -0,0 +1,79 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ from zipfile import ZipFile
6
+ from io import BytesIO
7
+
8
+ def read_federal_highways(simplified=False):
9
+ """Download Federal Highways data from MapBiomas.
10
+
11
+ This function downloads and processes federal highways data from MapBiomas.
12
+ The data includes information about federally-managed highways across Brazil.
13
+ Original source: MapBiomas
14
+
15
+ Parameters
16
+ ----------
17
+ simplified : boolean, by default False
18
+ If True, returns a simplified version of the dataset with fewer columns
19
+
20
+ Returns
21
+ -------
22
+ gpd.GeoDataFrame
23
+ Geodataframe with federal highways data
24
+
25
+ Example
26
+ -------
27
+ >>> from tunned_geobr import read_federal_highways
28
+
29
+ # Read federal highways data
30
+ >>> highways = read_federal_highways()
31
+ """
32
+
33
+ url = "https://brasil.mapbiomas.org/wp-content/uploads/sites/4/2023/08/rodovia-federal.zip"
34
+
35
+ try:
36
+ # Download the zip file
37
+ response = requests.get(url)
38
+ if response.status_code != 200:
39
+ raise Exception("Failed to download data from MapBiomas")
40
+
41
+ # Create a temporary directory
42
+ with tempfile.TemporaryDirectory() as temp_dir:
43
+ # Extract the zip file
44
+ with ZipFile(BytesIO(response.content)) as zip_ref:
45
+ zip_ref.extractall(temp_dir)
46
+
47
+ # Find the shapefile
48
+ shp_files = []
49
+ for root, dirs, files in os.walk(temp_dir):
50
+ shp_files.extend([os.path.join(root, f) for f in files if f.endswith('.shp')])
51
+
52
+ if not shp_files:
53
+ raise Exception("No shapefile found in the downloaded data")
54
+
55
+ # Read the shapefile
56
+ gdf = gpd.read_file(shp_files[0])
57
+ gdf = gdf.to_crs(4674) # Convert to SIRGAS 2000
58
+
59
+ if simplified:
60
+ # Keep only the most relevant columns
61
+ columns_to_keep = [
62
+ 'geometry',
63
+ 'sigla', # Highway code (BR-XXX)
64
+ 'uf', # State
65
+ 'jurisdicao', # Jurisdiction
66
+ 'superficie', # Surface type
67
+ 'situacao', # Status
68
+ 'extensao_km', # Length in km
69
+ 'tipo_trecho' # Section type
70
+ ]
71
+
72
+ # Filter columns that actually exist in the dataset
73
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
74
+ gdf = gdf[existing_columns]
75
+
76
+ except Exception as e:
77
+ raise Exception(f"Error downloading federal highways data: {str(e)}")
78
+
79
+ return gdf
@@ -0,0 +1,94 @@
1
+ import geopandas as gpd
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ import fiona
6
+ from zipfile import ZipFile
7
+ from io import BytesIO
8
+
9
+ def read_fossil_occurrences(simplified=False):
10
+ """Download Fossil Occurrences data from SGB.
11
+
12
+ This function downloads and processes data about fossil occurrences in Brazil
13
+ from SGB (Serviço Geológico do Brasil). The data comes from a File Geodatabase (.gdb)
14
+ and includes information about fossil sites across the country.
15
+ Original source: SGB - Serviço Geológico do Brasil
16
+
17
+ Parameters
18
+ ----------
19
+ simplified : boolean, by default False
20
+ If True, returns a simplified version of the dataset with fewer columns
21
+
22
+ Returns
23
+ -------
24
+ gpd.GeoDataFrame
25
+ Geodataframe with fossil occurrences data
26
+
27
+ Example
28
+ -------
29
+ >>> from tunned_geobr import read_fossil_occurrences
30
+
31
+ # Read fossil occurrences data
32
+ >>> fossils = read_fossil_occurrences()
33
+ """
34
+
35
+ url = "https://geoportal.sgb.gov.br/downloads/paleo.gdb.zip"
36
+
37
+ try:
38
+ # Download the zip file
39
+ response = requests.get(url)
40
+ if response.status_code != 200:
41
+ raise Exception("Failed to download data from SGB")
42
+
43
+ # Create a temporary directory
44
+ with tempfile.TemporaryDirectory() as temp_dir:
45
+ # Save the .gdb.zip file
46
+ gdb_zip = os.path.join(temp_dir, "paleo.gdb.zip")
47
+ with open(gdb_zip, 'wb') as f:
48
+ f.write(response.content)
49
+
50
+ # Create the .gdb directory
51
+ gdb_path = os.path.join(temp_dir, "paleo.gdb")
52
+ os.makedirs(gdb_path, exist_ok=True)
53
+
54
+ # Extract the .gdb.zip file directly into the .gdb directory
55
+ with ZipFile(gdb_zip) as zip_ref:
56
+ zip_ref.extractall(gdb_path)
57
+
58
+ # List all layers in the GDB
59
+ layers = fiona.listlayers(gdb_path)
60
+ if not layers:
61
+ raise Exception("No layers found in the GDB")
62
+
63
+ # Read the first layer (assuming it's the fossil occurrences)
64
+ gdf = gpd.read_file(gdb_path, layer=layers[0])
65
+ gdf = gdf.to_crs(4674) # Convert to SIRGAS 2000
66
+
67
+ if simplified:
68
+ # Keep only the most relevant columns
69
+ columns_to_keep = [
70
+ 'geometry',
71
+ 'LOCALIDADE', # Locality name
72
+ 'DISTRITO', # District
73
+ 'UNIDADE_LITOESTRATIGRAFICA', # Lithostratigraphic unit
74
+ 'UNIDADE_CRONOESTRATIGRAFICA', # Chronostratigraphic unit
75
+ 'LITOLOGIA', # Lithology
76
+ 'VESTIGIOS_ORGANICOS', # Organic traces
77
+ 'AMBIENTE_DEPOSICAO', # Depositional environment
78
+ 'TAXON', # Taxon
79
+ 'SISTEMATICA', # Systematics
80
+ 'MATERIAL', # Material
81
+ 'REFERENCIA_BIBLIOGRAFICA' # Bibliographic reference
82
+ ]
83
+
84
+ # Filter columns that actually exist in the dataset
85
+ existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
86
+ gdf = gdf[existing_columns]
87
+
88
+ except Exception as e:
89
+ raise Exception(f"Error downloading fossil occurrences data: {str(e)}")
90
+
91
+ return gdf
92
+
93
+ if __name__ == '__main__':
94
+ read_fossil_occurrences()