tunned-geobr 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tunned_geobr/__init__.py +1 -2
- tunned_geobr/read_immediate_region.py +59 -71
- tunned_geobr/read_indigenous_land.py +66 -33
- tunned_geobr/read_intermediate_region.py +67 -59
- tunned_geobr/read_meso_region.py +60 -69
- tunned_geobr/read_micro_region.py +60 -69
- tunned_geobr/read_neighborhood.py +88 -28
- tunned_geobr/read_region.py +59 -21
- tunned_geobr/read_sigef_properties.py +0 -2
- {tunned_geobr-1.0.0.dist-info → tunned_geobr-1.0.2.dist-info}/METADATA +3 -1
- {tunned_geobr-1.0.0.dist-info → tunned_geobr-1.0.2.dist-info}/RECORD +14 -15
- {tunned_geobr-1.0.0.dist-info → tunned_geobr-1.0.2.dist-info}/WHEEL +1 -1
- tunned_geobr/read_neighborhoods_2022.py +0 -99
- {tunned_geobr-1.0.0.dist-info → tunned_geobr-1.0.2.dist-info}/entry_points.txt +0 -0
- {tunned_geobr-1.0.0.dist-info → tunned_geobr-1.0.2.dist-info}/licenses/LICENSE.txt +0 -0
tunned_geobr/__init__.py
CHANGED
@@ -18,7 +18,6 @@ from .read_meso_region import read_meso_region
|
|
18
18
|
from .read_micro_region import read_micro_region
|
19
19
|
from .read_municipality import read_municipality
|
20
20
|
from .read_weighting_area import read_weighting_area
|
21
|
-
from .read_neighborhood import read_neighborhood
|
22
21
|
from .read_health_region import read_health_region
|
23
22
|
from .read_pop_arrangements import read_pop_arrangements
|
24
23
|
from .lookup_muni import lookup_muni
|
@@ -65,7 +64,7 @@ from .read_water_bodies_ana import read_water_bodies_ana
|
|
65
64
|
from .read_pan_strategic_areas import read_pan_strategic_areas
|
66
65
|
from .read_geographic_regions import read_geographic_regions
|
67
66
|
from .read_biosphere_reserves import read_biosphere_reserves
|
68
|
-
from .
|
67
|
+
from .read_neighborhood import read_neighborhood
|
69
68
|
from .read_baze_sites import read_baze_sites
|
70
69
|
from .read_existent_eolic import read_existent_eolic
|
71
70
|
from .read_planned_eolic import read_planned_eolic
|
@@ -1,81 +1,69 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
1
|
+
import geopandas as gpd
|
2
|
+
import tempfile
|
3
|
+
import os
|
4
|
+
import requests
|
5
|
+
from zipfile import ZipFile
|
6
|
+
from io import BytesIO
|
7
|
+
|
8
|
+
def read_immediate_region(simplified=False):
|
9
|
+
"""Download official immediate region data from IBGE.
|
10
|
+
|
11
|
+
This function downloads and processes immediate region data from IBGE (Brazilian Institute of Geography and Statistics).
|
12
|
+
The data includes immediate regions of Brazil for the year 2023.
|
13
|
+
Original source: IBGE
|
8
14
|
|
9
|
-
The Immediate Geographic Areas are part of the geographic division of
|
10
|
-
Brazil created in 2017 by IBGE to replace the "Micro Regions" division.
|
11
|
-
Data at scale 1:250,000, using Geodetic reference system "SIRGAS2000"
|
12
|
-
and CRS(4674)
|
13
|
-
|
14
15
|
Parameters
|
15
16
|
----------
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
"RJ") the function will load all immediate regions of that state. If
|
20
|
-
code_immediate="all", all immediate regions of the country are loaded
|
21
|
-
(defaults to "all").
|
22
|
-
year : int, optional
|
23
|
-
Year of the data, by default 2017
|
24
|
-
simplify: boolean, by default True
|
25
|
-
Data 'type', indicating whether the function returns the 'original' dataset
|
26
|
-
with high resolution or a dataset with 'simplify' borders (Default)
|
27
|
-
verbose : bool, optional
|
28
|
-
by default False
|
29
|
-
|
17
|
+
simplified : boolean, by default False
|
18
|
+
If True, returns a simplified version of the dataset with fewer columns
|
19
|
+
|
30
20
|
Returns
|
31
21
|
-------
|
32
22
|
gpd.GeoDataFrame
|
33
|
-
|
34
|
-
|
35
|
-
Raises
|
36
|
-
------
|
37
|
-
Exception
|
38
|
-
If parameters are not found or not well defined
|
39
|
-
|
23
|
+
Geodataframe with immediate region data
|
24
|
+
|
40
25
|
Example
|
41
26
|
-------
|
42
27
|
>>> from geobr import read_immediate_region
|
43
|
-
|
44
|
-
# Read
|
45
|
-
>>>
|
28
|
+
|
29
|
+
# Read immediate region data
|
30
|
+
>>> immediate_region = read_immediate_region()
|
46
31
|
"""
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
)
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
32
|
+
|
33
|
+
url = "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2023/Brasil/BR_RG_Imediatas_2023.zip"
|
34
|
+
|
35
|
+
try:
|
36
|
+
# Download the zip file
|
37
|
+
response = requests.get(url)
|
38
|
+
if response.status_code != 200:
|
39
|
+
raise Exception("Failed to download data from IBGE")
|
40
|
+
|
41
|
+
# Create a temporary directory
|
42
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
43
|
+
# Extract zip content
|
44
|
+
with ZipFile(BytesIO(response.content)) as zip_ref:
|
45
|
+
zip_ref.extractall(temp_dir)
|
46
|
+
|
47
|
+
# Find the shapefile
|
48
|
+
shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
|
49
|
+
if not shp_files:
|
50
|
+
raise Exception("No shapefile found in the downloaded data")
|
51
|
+
|
52
|
+
# Read the shapefile
|
53
|
+
gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
|
54
|
+
|
55
|
+
if simplified:
|
56
|
+
# Keep only the most relevant columns
|
57
|
+
# Note: These columns are based on typical immediate region data structure
|
58
|
+
# You may want to adjust these based on the actual data
|
59
|
+
columns_to_keep = [
|
60
|
+
'geometry',
|
61
|
+
'CD_RGI', # Immediate region code
|
62
|
+
'NM_RGI', # Immediate region name
|
63
|
+
]
|
64
|
+
gdf = gdf[columns_to_keep]
|
65
|
+
|
66
|
+
except Exception as e:
|
67
|
+
raise Exception(f"Error downloading immediate region data: {str(e)}")
|
68
|
+
|
69
|
+
return gdf
|
@@ -1,44 +1,77 @@
|
|
1
|
-
|
1
|
+
import geopandas as gpd
|
2
|
+
import tempfile
|
3
|
+
import os
|
4
|
+
import requests
|
5
|
+
from zipfile import ZipFile
|
6
|
+
from io import BytesIO
|
2
7
|
|
3
|
-
|
4
|
-
|
5
|
-
|
8
|
+
def read_indigenous_land(simplified=False):
|
9
|
+
"""Download Indigenous Land data from FUNAI.
|
10
|
+
|
11
|
+
This function downloads and processes data about indigenous lands in Brazil
|
12
|
+
from FUNAI (Fundação Nacional dos Povos Indígenas). The data includes location
|
13
|
+
and basic information about registered indigenous lands.
|
14
|
+
Original source: FUNAI - Fundação Nacional dos Povos Indígenas
|
6
15
|
|
7
|
-
The data set covers the whole of Brazil and it includes indigenous lands from all ethnicities and
|
8
|
-
in different stages of demarcation. The original data comes from the National Indian Foundation (FUNAI)
|
9
|
-
and can be found at http://www.funai.gov.br/index.php/shape. Although original data is updated monthly,
|
10
|
-
the geobr package will only keep the data for a few months per year.
|
11
|
-
|
12
16
|
Parameters
|
13
17
|
----------
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
Data 'type', indicating whether the function returns the 'original' dataset
|
18
|
-
with high resolution or a dataset with 'simplified' borders (Default)
|
19
|
-
verbose : bool, optional
|
20
|
-
by default False
|
21
|
-
|
18
|
+
simplified : boolean, by default False
|
19
|
+
If True, returns a simplified version of the dataset with fewer columns
|
20
|
+
|
22
21
|
Returns
|
23
22
|
-------
|
24
23
|
gpd.GeoDataFrame
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
24
|
+
Geodataframe with indigenous land data
|
25
|
+
Columns:
|
26
|
+
- geometry: Land boundaries
|
27
|
+
- nome: Land name
|
28
|
+
- municipio: Municipality
|
29
|
+
- uf: State
|
30
|
+
- etnia: Ethnicity
|
31
|
+
- fase: Legal status
|
32
|
+
- area_ha: Area in hectares
|
33
|
+
|
32
34
|
Example
|
33
35
|
-------
|
34
|
-
>>> from
|
35
|
-
|
36
|
-
# Read specific state at a given year
|
37
|
-
>>> df = read_indigenous_land(date=201907)
|
36
|
+
>>> from tunned_geobr import read_indigenous_land
|
37
|
+
>>> lands = read_indigenous_land()
|
38
38
|
"""
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
|
40
|
+
url = "https://geoserver.funai.gov.br/geoserver/Funai/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=Funai%3Atis_poligonais&maxFeatures=10000&outputFormat=SHAPE-ZIP"
|
41
|
+
|
42
|
+
try:
|
43
|
+
# Download the zip file with a 60-second timeout
|
44
|
+
response = requests.get(url, timeout=60)
|
45
|
+
if response.status_code != 200:
|
46
|
+
raise Exception(f"Failed to download data from FUNAI. Status code: {response.status_code}")
|
47
|
+
|
48
|
+
# Create a temporary directory
|
49
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
50
|
+
# Extract the zip file
|
51
|
+
with ZipFile(BytesIO(response.content)) as zip_ref:
|
52
|
+
zip_ref.extractall(temp_dir)
|
53
|
+
|
54
|
+
# Find the shapefile
|
55
|
+
shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
|
56
|
+
if not shp_files:
|
57
|
+
raise Exception("No shapefile found in the downloaded data")
|
58
|
+
|
59
|
+
# Read the shapefile
|
60
|
+
gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
|
61
|
+
gdf = gdf.to_crs(4674) # Convert to SIRGAS 2000
|
62
|
+
|
63
|
+
# Print columns for debugging
|
64
|
+
print("Available columns:", gdf.columns)
|
65
|
+
|
66
|
+
if simplified:
|
67
|
+
columns_to_keep = ['geometry', 'nome', 'municipio', 'uf', 'etnia', 'fase', 'area_ha']
|
68
|
+
existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
|
69
|
+
gdf = gdf[existing_columns]
|
70
|
+
|
71
|
+
except Exception as e:
|
72
|
+
raise Exception(f"Error downloading indigenous land data: {str(e)}")
|
73
|
+
|
44
74
|
return gdf
|
75
|
+
|
76
|
+
if __name__ == '__main__':
|
77
|
+
read_indigenous_land()
|
@@ -1,61 +1,69 @@
|
|
1
|
-
|
1
|
+
import geopandas as gpd
|
2
|
+
import tempfile
|
3
|
+
import os
|
4
|
+
import requests
|
5
|
+
from zipfile import ZipFile
|
6
|
+
from io import BytesIO
|
2
7
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
)
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
with high resolution or a dataset with 'simplified' borders (Default)
|
27
|
-
verbose : bool, optional
|
28
|
-
by default False
|
29
|
-
|
30
|
-
Returns
|
31
|
-
-------
|
32
|
-
gpd.GeoDataFrame
|
33
|
-
Metadata and geopackage of selected states
|
34
|
-
|
35
|
-
Raises
|
36
|
-
------
|
37
|
-
Exception
|
38
|
-
If parameters are not found or not well defined
|
39
|
-
|
40
|
-
Example
|
41
|
-
-------
|
42
|
-
>>> from geobr import read_intermediate_region
|
43
|
-
|
44
|
-
# Read specific state at a given year
|
45
|
-
>>> df = read_intermediate_region(year=2019)
|
8
|
+
def read_intermediate_region(simplified=False):
|
9
|
+
"""Download official intermediate region data from IBGE.
|
10
|
+
|
11
|
+
This function downloads and processes intermediate region data from IBGE (Brazilian Institute of Geography and Statistics).
|
12
|
+
The data includes intermediate regions of Brazil for the year 2023.
|
13
|
+
Original source: IBGE
|
14
|
+
|
15
|
+
Parameters
|
16
|
+
----------
|
17
|
+
simplified : boolean, by default False
|
18
|
+
If True, returns a simplified version of the dataset with fewer columns
|
19
|
+
|
20
|
+
Returns
|
21
|
+
-------
|
22
|
+
gpd.GeoDataFrame
|
23
|
+
Geodataframe with intermediate region data
|
24
|
+
|
25
|
+
Example
|
26
|
+
-------
|
27
|
+
>>> from geobr import read_intermediate_region
|
28
|
+
|
29
|
+
# Read intermediate region data
|
30
|
+
>>> intermediate_region = read_intermediate_region()
|
46
31
|
"""
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
32
|
+
|
33
|
+
url = "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2023/Brasil/BR_RG_Intermediarias_2023.zip"
|
34
|
+
|
35
|
+
try:
|
36
|
+
# Download the zip file
|
37
|
+
response = requests.get(url)
|
38
|
+
if response.status_code != 200:
|
39
|
+
raise Exception("Failed to download data from IBGE")
|
40
|
+
|
41
|
+
# Create a temporary directory
|
42
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
43
|
+
# Extract zip content
|
44
|
+
with ZipFile(BytesIO(response.content)) as zip_ref:
|
45
|
+
zip_ref.extractall(temp_dir)
|
46
|
+
|
47
|
+
# Find the shapefile
|
48
|
+
shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
|
49
|
+
if not shp_files:
|
50
|
+
raise Exception("No shapefile found in the downloaded data")
|
51
|
+
|
52
|
+
# Read the shapefile
|
53
|
+
gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
|
54
|
+
|
55
|
+
if simplified:
|
56
|
+
# Keep only the most relevant columns
|
57
|
+
# Note: These columns are based on typical intermediate region data structure
|
58
|
+
# You may want to adjust these based on the actual data
|
59
|
+
columns_to_keep = [
|
60
|
+
'geometry',
|
61
|
+
'CD_RGINT', # Intermediate region code
|
62
|
+
'NM_RGINT', # Intermediate region name
|
63
|
+
]
|
64
|
+
gdf = gdf[columns_to_keep]
|
65
|
+
|
66
|
+
except Exception as e:
|
67
|
+
raise Exception(f"Error downloading intermediate region data: {str(e)}")
|
68
|
+
|
69
|
+
return gdf
|
tunned_geobr/read_meso_region.py
CHANGED
@@ -1,78 +1,69 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
1
|
+
import geopandas as gpd
|
2
|
+
import tempfile
|
3
|
+
import os
|
4
|
+
import requests
|
5
|
+
from zipfile import ZipFile
|
6
|
+
from io import BytesIO
|
7
|
+
|
8
|
+
def read_meso_region(simplified=False):
|
9
|
+
"""Download official mesoregion data from IBGE.
|
10
|
+
|
11
|
+
This function downloads and processes mesoregion data from IBGE (Brazilian Institute of Geography and Statistics).
|
12
|
+
The data includes mesoregions of Brazil for the year 2022.
|
13
|
+
Original source: IBGE
|
14
|
+
|
9
15
|
Parameters
|
10
16
|
----------
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
If code_meso="all", all meso regions of the country are loaded.
|
15
|
-
year : int, optional
|
16
|
-
Year of the data, by default 2010
|
17
|
-
simplified: boolean, by default True
|
18
|
-
Data 'type', indicating whether the function returns the 'original' dataset
|
19
|
-
with high resolution or a dataset with 'simplified' borders (Default)
|
20
|
-
verbose : bool, optional
|
21
|
-
by default False
|
22
|
-
|
17
|
+
simplified : boolean, by default False
|
18
|
+
If True, returns a simplified version of the dataset with fewer columns
|
19
|
+
|
23
20
|
Returns
|
24
21
|
-------
|
25
22
|
gpd.GeoDataFrame
|
26
|
-
|
27
|
-
|
28
|
-
Raises
|
29
|
-
------
|
30
|
-
Exception
|
31
|
-
If parameters are not found or not well defined
|
32
|
-
|
23
|
+
Geodataframe with mesoregion data
|
24
|
+
|
33
25
|
Example
|
34
26
|
-------
|
35
27
|
>>> from geobr import read_meso_region
|
36
|
-
|
37
|
-
# Read
|
38
|
-
>>>
|
39
|
-
|
40
|
-
# Read all meso regions of a state at a given year
|
41
|
-
>>> df = read_meso_region(code_meso=12, year=2017)
|
42
|
-
>>> df = read_meso_region(code_meso="AM", year=2000)
|
43
|
-
|
44
|
-
# Read all meso regions of the country at a given year
|
45
|
-
>>> df = read_meso_region(code_meso="all", year=2010)
|
28
|
+
|
29
|
+
# Read mesoregion data
|
30
|
+
>>> meso_region = read_meso_region()
|
46
31
|
"""
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
32
|
+
|
33
|
+
url = "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2022/Brasil/BR/BR_Mesorregioes_2022.zip"
|
34
|
+
|
35
|
+
try:
|
36
|
+
# Download the zip file
|
37
|
+
response = requests.get(url)
|
38
|
+
if response.status_code != 200:
|
39
|
+
raise Exception("Failed to download data from IBGE")
|
40
|
+
|
41
|
+
# Create a temporary directory
|
42
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
43
|
+
# Extract zip content
|
44
|
+
with ZipFile(BytesIO(response.content)) as zip_ref:
|
45
|
+
zip_ref.extractall(temp_dir)
|
46
|
+
|
47
|
+
# Find the shapefile
|
48
|
+
shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
|
49
|
+
if not shp_files:
|
50
|
+
raise Exception("No shapefile found in the downloaded data")
|
51
|
+
|
52
|
+
# Read the shapefile
|
53
|
+
gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
|
54
|
+
|
55
|
+
if simplified:
|
56
|
+
# Keep only the most relevant columns
|
57
|
+
# Note: These columns are based on typical mesoregion data structure
|
58
|
+
# You may want to adjust these based on the actual data
|
59
|
+
columns_to_keep = [
|
60
|
+
'geometry',
|
61
|
+
'CD_MESO', # Mesoregion code
|
62
|
+
'NM_MESO', # Mesoregion name
|
63
|
+
]
|
64
|
+
gdf = gdf[columns_to_keep]
|
65
|
+
|
66
|
+
except Exception as e:
|
67
|
+
raise Exception(f"Error downloading mesoregion data: {str(e)}")
|
68
|
+
|
69
|
+
return gdf
|
@@ -1,78 +1,69 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
1
|
+
import geopandas as gpd
|
2
|
+
import tempfile
|
3
|
+
import os
|
4
|
+
import requests
|
5
|
+
from zipfile import ZipFile
|
6
|
+
from io import BytesIO
|
7
|
+
|
8
|
+
def read_micro_region(simplified=False):
|
9
|
+
"""Download official microregion data from IBGE.
|
10
|
+
|
11
|
+
This function downloads and processes microregion data from IBGE (Brazilian Institute of Geography and Statistics).
|
12
|
+
The data includes microregions of Brazil for the year 2022.
|
13
|
+
Original source: IBGE
|
14
|
+
|
9
15
|
Parameters
|
10
16
|
----------
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
If code_micro="all", all micro regions of the country are loaded.
|
15
|
-
year : int, optional
|
16
|
-
Year of the data, by default 2010
|
17
|
-
simplified: boolean, by default True
|
18
|
-
Data 'type', indicating whether the function returns the 'original' dataset
|
19
|
-
with high resolution or a dataset with 'simplified' borders (Default)
|
20
|
-
verbose : bool, optional
|
21
|
-
by default False
|
22
|
-
|
17
|
+
simplified : boolean, by default False
|
18
|
+
If True, returns a simplified version of the dataset with fewer columns
|
19
|
+
|
23
20
|
Returns
|
24
21
|
-------
|
25
22
|
gpd.GeoDataFrame
|
26
|
-
|
27
|
-
|
28
|
-
Raises
|
29
|
-
------
|
30
|
-
Exception
|
31
|
-
If parameters are not found or not well defined
|
32
|
-
|
23
|
+
Geodataframe with microregion data
|
24
|
+
|
33
25
|
Example
|
34
26
|
-------
|
35
27
|
>>> from geobr import read_micro_region
|
36
|
-
|
37
|
-
# Read
|
38
|
-
>>>
|
39
|
-
|
40
|
-
# Read all meso regions of a state at a given year
|
41
|
-
>>> df = read_micro_region(code_micro=12, year=2017)
|
42
|
-
>>> df = read_micro_region(code_micro="AM", year=2000)
|
43
|
-
|
44
|
-
# Read all meso regions of the country at a given year
|
45
|
-
>>> df = read_micro_region(code_micro="all", year=2010)
|
28
|
+
|
29
|
+
# Read microregion data
|
30
|
+
>>> micro_region = read_micro_region()
|
46
31
|
"""
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
32
|
+
|
33
|
+
url = "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2022/Brasil/BR/BR_Microrregioes_2022.zip"
|
34
|
+
|
35
|
+
try:
|
36
|
+
# Download the zip file
|
37
|
+
response = requests.get(url)
|
38
|
+
if response.status_code != 200:
|
39
|
+
raise Exception("Failed to download data from IBGE")
|
40
|
+
|
41
|
+
# Create a temporary directory
|
42
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
43
|
+
# Extract zip content
|
44
|
+
with ZipFile(BytesIO(response.content)) as zip_ref:
|
45
|
+
zip_ref.extractall(temp_dir)
|
46
|
+
|
47
|
+
# Find the shapefile
|
48
|
+
shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
|
49
|
+
if not shp_files:
|
50
|
+
raise Exception("No shapefile found in the downloaded data")
|
51
|
+
|
52
|
+
# Read the shapefile
|
53
|
+
gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
|
54
|
+
|
55
|
+
if simplified:
|
56
|
+
# Keep only the most relevant columns
|
57
|
+
# Note: These columns are based on typical microregion data structure
|
58
|
+
# You may want to adjust these based on the actual data
|
59
|
+
columns_to_keep = [
|
60
|
+
'geometry',
|
61
|
+
'CD_MICRO', # Microregion code
|
62
|
+
'NM_MICRO', # Microregion name
|
63
|
+
]
|
64
|
+
gdf = gdf[columns_to_keep]
|
65
|
+
|
66
|
+
except Exception as e:
|
67
|
+
raise Exception(f"Error downloading microregion data: {str(e)}")
|
68
|
+
|
69
|
+
return gdf
|
@@ -1,39 +1,99 @@
|
|
1
|
-
|
1
|
+
import geopandas as gpd
|
2
|
+
import tempfile
|
3
|
+
import os
|
4
|
+
import requests
|
5
|
+
import subprocess
|
6
|
+
from io import BytesIO
|
2
7
|
|
3
|
-
|
4
|
-
|
5
|
-
|
8
|
+
def read_neighborhood(simplified=False):
|
9
|
+
"""Download Brazilian Neighborhoods data from IBGE (2022 Census).
|
10
|
+
|
11
|
+
This function downloads and processes the Brazilian Neighborhoods data
|
12
|
+
from IBGE (Brazilian Institute of Geography and Statistics) for the 2022 Census.
|
13
|
+
Original source: IBGE - Instituto Brasileiro de Geografia e Estatística
|
6
14
|
|
7
15
|
Parameters
|
8
16
|
----------
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
Data 'type', indicating whether the function returns the 'original' dataset
|
13
|
-
with high resolution or a dataset with 'simplified' borders (Default)
|
14
|
-
verbose : bool, optional
|
15
|
-
by default False
|
16
|
-
|
17
|
+
simplified : boolean, by default False
|
18
|
+
If True, returns a simplified version of the dataset with fewer columns
|
19
|
+
|
17
20
|
Returns
|
18
21
|
-------
|
19
22
|
gpd.GeoDataFrame
|
20
|
-
|
21
|
-
|
22
|
-
Raises
|
23
|
-
------
|
24
|
-
Exception
|
25
|
-
If parameters are not found or not well defined
|
26
|
-
|
23
|
+
Geodataframe with Brazilian neighborhoods data
|
24
|
+
|
27
25
|
Example
|
28
26
|
-------
|
29
|
-
>>> from
|
30
|
-
|
31
|
-
# Read
|
32
|
-
>>>
|
27
|
+
>>> from tunned_geobr import read_neighborhoods_2022
|
28
|
+
|
29
|
+
# Read neighborhoods data
|
30
|
+
>>> neighborhoods = read_neighborhoods_2022()
|
33
31
|
"""
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
32
|
+
|
33
|
+
url = "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2022/bairros/shp/BR/BR_bairros_CD2022.zip"
|
34
|
+
|
35
|
+
try:
|
36
|
+
# Create a temporary directory
|
37
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
38
|
+
# Download the zip file to the temporary directory
|
39
|
+
zip_file_path = os.path.join(temp_dir, "neighborhoods.zip")
|
40
|
+
|
41
|
+
# Download the file
|
42
|
+
response = requests.get(url)
|
43
|
+
if response.status_code != 200:
|
44
|
+
raise Exception("Failed to download neighborhoods data from IBGE")
|
45
|
+
|
46
|
+
# Save the content to a file
|
47
|
+
with open(zip_file_path, 'wb') as f:
|
48
|
+
f.write(response.content)
|
49
|
+
|
50
|
+
# Use unzip command line tool to extract the file (handles more compression methods)
|
51
|
+
try:
|
52
|
+
subprocess.run(['unzip', '-o', zip_file_path, '-d', temp_dir],
|
53
|
+
check=True,
|
54
|
+
stdout=subprocess.PIPE,
|
55
|
+
stderr=subprocess.PIPE)
|
56
|
+
except subprocess.CalledProcessError as e:
|
57
|
+
raise Exception(f"Failed to extract zip file: {e.stderr.decode()}")
|
58
|
+
|
59
|
+
# Find the shapefile
|
60
|
+
shp_files = []
|
61
|
+
for root, dirs, files in os.walk(temp_dir):
|
62
|
+
shp_files.extend([os.path.join(root, f) for f in files if f.endswith('.shp')])
|
63
|
+
|
64
|
+
if not shp_files:
|
65
|
+
raise Exception("No shapefile found in the downloaded data")
|
66
|
+
|
67
|
+
# Read the shapefile
|
68
|
+
gdf = gpd.read_file(shp_files[0])
|
69
|
+
|
70
|
+
# Convert to SIRGAS 2000 (EPSG:4674) if not already
|
71
|
+
if gdf.crs is None or gdf.crs.to_epsg() != 4674:
|
72
|
+
gdf = gdf.to_crs(4674)
|
73
|
+
|
74
|
+
if simplified:
|
75
|
+
# Keep only the most relevant columns
|
76
|
+
# Note: Column names may need adjustment based on actual data
|
77
|
+
columns_to_keep = [
|
78
|
+
'geometry',
|
79
|
+
'CD_BAIRRO', # Neighborhood Code
|
80
|
+
'NM_BAIRRO', # Neighborhood Name
|
81
|
+
'CD_MUN', # Municipality Code
|
82
|
+
'NM_MUN', # Municipality Name
|
83
|
+
'CD_UF', # State Code
|
84
|
+
'NM_UF', # State Name
|
85
|
+
'SIGLA_UF', # State Abbreviation
|
86
|
+
'AREA_KM2' # Area in square kilometers
|
87
|
+
]
|
88
|
+
|
89
|
+
# Filter columns that actually exist in the dataset
|
90
|
+
existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
|
91
|
+
gdf = gdf[existing_columns]
|
92
|
+
|
93
|
+
except Exception as e:
|
94
|
+
raise Exception(f"Error downloading neighborhoods data: {str(e)}")
|
95
|
+
|
39
96
|
return gdf
|
97
|
+
|
98
|
+
if __name__ == '__main__':
|
99
|
+
read_neighborhoods_2022()
|
tunned_geobr/read_region.py
CHANGED
@@ -1,31 +1,69 @@
|
|
1
|
-
|
1
|
+
import geopandas as gpd
|
2
|
+
import tempfile
|
3
|
+
import os
|
4
|
+
import requests
|
5
|
+
from zipfile import ZipFile
|
6
|
+
from io import BytesIO
|
2
7
|
|
3
|
-
|
4
|
-
|
5
|
-
|
8
|
+
def read_region(simplified=False):
|
9
|
+
"""Download official region data from IBGE.
|
10
|
+
|
11
|
+
This function downloads and processes region data from IBGE (Brazilian Institute of Geography and Statistics).
|
12
|
+
The data includes regions of Brazil for the year 2022.
|
13
|
+
Original source: IBGE
|
6
14
|
|
7
|
-
Data at scale 1:250,000, using Geodetic reference system "SIRGAS2000" and CRS(4674)
|
8
|
-
|
9
15
|
Parameters
|
10
16
|
----------
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
Data 'type', indicating whether the function returns the 'original' dataset
|
15
|
-
with high resolution or a dataset with 'simplified' borders (Default)
|
16
|
-
verbose : bool, optional
|
17
|
-
by default False
|
18
|
-
|
17
|
+
simplified : boolean, by default False
|
18
|
+
If True, returns a simplified version of the dataset with fewer columns
|
19
|
+
|
19
20
|
Returns
|
20
21
|
-------
|
21
22
|
gpd.GeoDataFrame
|
22
|
-
|
23
|
-
|
23
|
+
Geodataframe with region data
|
24
|
+
|
24
25
|
Example
|
25
26
|
-------
|
26
|
-
>>> from
|
27
|
-
|
28
|
-
# Read
|
29
|
-
>>>
|
27
|
+
>>> from geobr import read_region
|
28
|
+
|
29
|
+
# Read region data
|
30
|
+
>>> region = read_region()
|
30
31
|
"""
|
31
|
-
|
32
|
+
|
33
|
+
url = "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2022/Brasil/BR/BR_Regioes_2022.zip"
|
34
|
+
|
35
|
+
try:
|
36
|
+
# Download the zip file
|
37
|
+
response = requests.get(url)
|
38
|
+
if response.status_code != 200:
|
39
|
+
raise Exception("Failed to download data from IBGE")
|
40
|
+
|
41
|
+
# Create a temporary directory
|
42
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
43
|
+
# Extract zip content
|
44
|
+
with ZipFile(BytesIO(response.content)) as zip_ref:
|
45
|
+
zip_ref.extractall(temp_dir)
|
46
|
+
|
47
|
+
# Find the shapefile
|
48
|
+
shp_files = [f for f in os.listdir(temp_dir) if f.endswith('.shp')]
|
49
|
+
if not shp_files:
|
50
|
+
raise Exception("No shapefile found in the downloaded data")
|
51
|
+
|
52
|
+
# Read the shapefile
|
53
|
+
gdf = gpd.read_file(os.path.join(temp_dir, shp_files[0]))
|
54
|
+
|
55
|
+
if simplified:
|
56
|
+
# Keep only the most relevant columns
|
57
|
+
# Note: These columns are based on typical region data structure
|
58
|
+
# You may want to adjust these based on the actual data
|
59
|
+
columns_to_keep = [
|
60
|
+
'geometry',
|
61
|
+
'CD_RG', # Region code
|
62
|
+
'NM_RG', # Region name
|
63
|
+
]
|
64
|
+
gdf = gdf[columns_to_keep]
|
65
|
+
|
66
|
+
except Exception as e:
|
67
|
+
raise Exception(f"Error downloading region data: {str(e)}")
|
68
|
+
|
69
|
+
return gdf
|
@@ -32,8 +32,6 @@ def read_sigef_properties(simplified=False):
|
|
32
32
|
"""
|
33
33
|
|
34
34
|
url = "https://certificacao.incra.gov.br/csv_shp/zip/Sigef%20Brasil.zip"
|
35
|
-
if 1==1:
|
36
|
-
return "Essa camada está muito pesada, baixe manualmente no link: " + url
|
37
35
|
try:
|
38
36
|
# Download the zip file
|
39
37
|
# Disable SSL verification due to INCRA's certificate issues
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: tunned-geobr
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.2
|
4
4
|
Summary: Fork personalizado do geobr com funcionalidades extras como download de dados da ANM
|
5
5
|
Author: Anderson Stolfi
|
6
6
|
License: MIT
|
@@ -23,6 +23,8 @@ Requires-Dist: patool>=1.15.0
|
|
23
23
|
Requires-Dist: fiona>=1.10.1
|
24
24
|
Requires-Dist: gdown>=5.2.0
|
25
25
|
Requires-Dist: tabulate>=0.9.0
|
26
|
+
Requires-Dist: build>=1.2.2.post1
|
27
|
+
Requires-Dist: twine>=6.1.0
|
26
28
|
Description-Content-Type: text/markdown
|
27
29
|
|
28
30
|
# geobr: Download Official Spatial Data Sets of Brazil
|
@@ -1,8 +1,8 @@
|
|
1
|
-
tunned_geobr-1.0.
|
2
|
-
tunned_geobr-1.0.
|
3
|
-
tunned_geobr-1.0.
|
4
|
-
tunned_geobr-1.0.
|
5
|
-
tunned_geobr/__init__.py,sha256=
|
1
|
+
tunned_geobr-1.0.2.dist-info/METADATA,sha256=mmz2jbljwyJeyc1qcdGLzi4bHmmeuUMwzOI_GLugK5Q,5080
|
2
|
+
tunned_geobr-1.0.2.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
|
3
|
+
tunned_geobr-1.0.2.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
4
|
+
tunned_geobr-1.0.2.dist-info/licenses/LICENSE.txt,sha256=mECZRcbde3HssOKe1Co4zgqBLGVN0OWpTsEy3LIbcRA,75
|
5
|
+
tunned_geobr/__init__.py,sha256=rxOGWhc2o3F8qDORi34uF4sCGouQFK74bKpuuAXWlHY,7393
|
6
6
|
tunned_geobr/data/grid_state_correspondence_table.csv,sha256=FpkBuX_-lRXQ1yBrQODxQgG9oha9Fd8A8zGKfdsDAmk,2660
|
7
7
|
tunned_geobr/list_geobr.py,sha256=6rsdtSZUvFrS-n5KXLOSQ34P1AK_yMFAj_MVZRvvZmQ,17278
|
8
8
|
tunned_geobr/lookup_muni.py,sha256=ny1zU4i6OagvL4Mrc6XQWPgn2RrJa_mXlKXh81oVYsM,3462
|
@@ -65,13 +65,13 @@ tunned_geobr/read_hydroelectric_inventory_aai_studies.py,sha256=GWnSzsnMJSDjSpXF
|
|
65
65
|
tunned_geobr/read_ibama_embargoes.py,sha256=hzbWiifgoCgq7TP4X3H4TWFFkLnE5Y9969KUd3aJsek,3251
|
66
66
|
tunned_geobr/read_icmbio_embargoes.py,sha256=Xn3oxt4yK8wE6sAKWsU91EyU2h0yLysIbtQk2VUuJ5A,3107
|
67
67
|
tunned_geobr/read_icmbio_infractions.py,sha256=Heolqhuxc3sUby4RhxJZ88djIBWRcXkFbubhXgkpYos,3182
|
68
|
-
tunned_geobr/read_immediate_region.py,sha256=
|
69
|
-
tunned_geobr/read_indigenous_land.py,sha256=
|
70
|
-
tunned_geobr/read_intermediate_region.py,sha256=
|
68
|
+
tunned_geobr/read_immediate_region.py,sha256=rxPfNYTvYM1n09KNJ1QkdLdXukvO6gZ4wU9IZP-Mum0,2465
|
69
|
+
tunned_geobr/read_indigenous_land.py,sha256=0QmsWQjq1RNS6GqF_vfPzTibBv09Qrxw-Mgg0GE9yzA,2839
|
70
|
+
tunned_geobr/read_intermediate_region.py,sha256=b-Txa79C5MIcQDUwudAorah4QFARM277-bhAgcQ7jOo,2513
|
71
71
|
tunned_geobr/read_isolated_systems.py,sha256=k0dxfUNubsJXV3_5ph1CWghR_Mfpn4oe_4zLkg7c5Cs,5343
|
72
|
-
tunned_geobr/read_meso_region.py,sha256=
|
72
|
+
tunned_geobr/read_meso_region.py,sha256=siwlqiZ9Q8XMmrEf4YjS6TJiQW0fsTq0udi2oByIiL0,2396
|
73
73
|
tunned_geobr/read_metro_area.py,sha256=e18jyXrRMwQTv_ZO2hGoyC8qZsV6NlYfWXsu6DusRQM,1498
|
74
|
-
tunned_geobr/read_micro_region.py,sha256=
|
74
|
+
tunned_geobr/read_micro_region.py,sha256=hs1NhVfb_LazLLp9EmLxQCDscPlkqqG-OjTv2fIGoFw,2412
|
75
75
|
tunned_geobr/read_mining_processes.py,sha256=UmywViEDD9hx7qcDj9CMRHdPM69NQhsRB4870Y77QSs,2569
|
76
76
|
tunned_geobr/read_municipal_seat.py,sha256=9Vi-q1jzY8n086O-nNY1sVkVzV_NZbdzE5juosCcVZI,1142
|
77
77
|
tunned_geobr/read_municipality.py,sha256=dZM1BVi3U9ZvasLADV-ciKVr9R4o92dRowpEVdVkvYw,5651
|
@@ -79,8 +79,7 @@ tunned_geobr/read_municipality_direct.py,sha256=VrZR_5__DsV5IbbX-sr56WT-P4M_tVdn
|
|
79
79
|
tunned_geobr/read_natural_caves.py,sha256=-XjoRxhT_yYy0fZu87S6RRUZ-cyaWPqWqOrd9Y8ERKo,3073
|
80
80
|
tunned_geobr/read_natural_gas_delivery_points.py,sha256=nJJmqbJJ5Xx2P2wVL9yXdGLuPI0O8pCCL9zDkHQtZOs,5387
|
81
81
|
tunned_geobr/read_natural_gas_processing_hub.py,sha256=qI5o-4TmPfi3h0gYNWjeMR5GsRAq-fsXoB62llqt9RA,5367
|
82
|
-
tunned_geobr/read_neighborhood.py,sha256=
|
83
|
-
tunned_geobr/read_neighborhoods_2022.py,sha256=EX1-5CM3tNe05HE1F5r3YtZ-66X_NC67u_DzrmzKvTc,3952
|
82
|
+
tunned_geobr/read_neighborhood.py,sha256=xM2Ztl2uvaQ4fLmO9lCboa-lqGoUNlo_OM_ktVqlOn0,3946
|
84
83
|
tunned_geobr/read_og_basement.py,sha256=nwUOn-BMYC3mvvP9uTBLYly00drIw6CwU5lHJeOdi-Y,4617
|
85
84
|
tunned_geobr/read_og_effective_geographic_basin.py,sha256=Qvy--_A8oGrL-Os3mfofr14MA0qWv3s5FFdtIabBJ8E,5457
|
86
85
|
tunned_geobr/read_og_ipa_direct_evidence.py,sha256=N5nDr7AinKFqhcfgnvygVjzpdN2D1TP5VSILS8gkIgU,4738
|
@@ -117,13 +116,13 @@ tunned_geobr/read_processing_facilities.py,sha256=8iCveDTk7MXm1bmb1pcknzen62HTGY
|
|
117
116
|
tunned_geobr/read_public_aerodromes.py,sha256=nq3b9HF5_e-yeNcSfQ5ktdAGHKbSfDD_imj-tOhjKJA,2909
|
118
117
|
tunned_geobr/read_quilombola_areas.py,sha256=iY-r4YDRjaGyO-iPRBm1kWDkN_-axjYxMAQyAjIfG68,4288
|
119
118
|
tunned_geobr/read_railways.py,sha256=J6eM0yr049CaOL95PMd4sGc7JJHiEinJhqf0ThCOClg,2763
|
120
|
-
tunned_geobr/read_region.py,sha256=
|
119
|
+
tunned_geobr/read_region.py,sha256=X7IwsAVxwUl0apsExSuBr9kIK_7IUehPenLXAF-JFDA,2331
|
121
120
|
tunned_geobr/read_rppn.py,sha256=nXDzclIiqhutkYWvxlIH_mYSNGdfRVSUzSzi-15X-3w,3963
|
122
121
|
tunned_geobr/read_schools.py,sha256=kxaRwuKmZDPgSuhCUd_Ltxo-6_z3b3jXY9Qo0MY_b-A,1364
|
123
122
|
tunned_geobr/read_sedimentary_basins.py,sha256=mpCde4-WRdAAuHF-AwrODd0GpxRhzJOuP60U6Zbl9pE,4583
|
124
123
|
tunned_geobr/read_semiarid.py,sha256=pxxYTWq8_UPUyblA7_FXXXRz-XOCrrebCvYQ-kgDSrU,1358
|
125
124
|
tunned_geobr/read_settlements.py,sha256=C47Wj4DhSDa-pSFfYK4uGDwtu4sUwqPMr-CuuxS95xg,3060
|
126
|
-
tunned_geobr/read_sigef_properties.py,sha256=
|
125
|
+
tunned_geobr/read_sigef_properties.py,sha256=EBqTUbL9Kg9EUPh--nYHX2OVacymq80YrPCZXKAP6sQ,3108
|
127
126
|
tunned_geobr/read_snci_properties.py,sha256=lKhRSBeayD3M_ffljSf5_Sn57VhYh0g3lwFnOgpYji0,3226
|
128
127
|
tunned_geobr/read_state.py,sha256=JgV3cR0LFbmwIzuzPbR_Zfy1bR_2eBeEPxunozctuag,3819
|
129
128
|
tunned_geobr/read_state_direct.py,sha256=8Tdz-gVH_t90BJngcfcpr0VLs5HfCUxRgRQj8hy4Bt0,3826
|
@@ -138,4 +137,4 @@ tunned_geobr/read_water_bodies_ana.py,sha256=Z-dpTPVgRHVndTeSFxx8uXn7ufMg2jm0Dlz
|
|
138
137
|
tunned_geobr/read_waterways.py,sha256=mEdoVogYWr5EYZ8bE3xMCVWyLrHYU7xTL2lUE0XbDAM,2951
|
139
138
|
tunned_geobr/read_weighting_area.py,sha256=m2X5Ua3jRqLlkqCQbIzR2jmo58pzqkyR3UYcGtgy20E,2325
|
140
139
|
tunned_geobr/utils.py,sha256=WT9PSGWvcERjj3yhfTvyWSE5ZiEjO4tYK5xIj5jJCg8,8170
|
141
|
-
tunned_geobr-1.0.
|
140
|
+
tunned_geobr-1.0.2.dist-info/RECORD,,
|
@@ -1,99 +0,0 @@
|
|
1
|
-
import geopandas as gpd
|
2
|
-
import tempfile
|
3
|
-
import os
|
4
|
-
import requests
|
5
|
-
import subprocess
|
6
|
-
from io import BytesIO
|
7
|
-
|
8
|
-
def read_neighborhoods_2022(simplified=False):
|
9
|
-
"""Download Brazilian Neighborhoods data from IBGE (2022 Census).
|
10
|
-
|
11
|
-
This function downloads and processes the Brazilian Neighborhoods data
|
12
|
-
from IBGE (Brazilian Institute of Geography and Statistics) for the 2022 Census.
|
13
|
-
Original source: IBGE - Instituto Brasileiro de Geografia e Estatística
|
14
|
-
|
15
|
-
Parameters
|
16
|
-
----------
|
17
|
-
simplified : boolean, by default False
|
18
|
-
If True, returns a simplified version of the dataset with fewer columns
|
19
|
-
|
20
|
-
Returns
|
21
|
-
-------
|
22
|
-
gpd.GeoDataFrame
|
23
|
-
Geodataframe with Brazilian neighborhoods data
|
24
|
-
|
25
|
-
Example
|
26
|
-
-------
|
27
|
-
>>> from tunned_geobr import read_neighborhoods_2022
|
28
|
-
|
29
|
-
# Read neighborhoods data
|
30
|
-
>>> neighborhoods = read_neighborhoods_2022()
|
31
|
-
"""
|
32
|
-
|
33
|
-
url = "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2022/bairros/shp/BR/BR_bairros_CD2022.zip"
|
34
|
-
|
35
|
-
try:
|
36
|
-
# Create a temporary directory
|
37
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
38
|
-
# Download the zip file to the temporary directory
|
39
|
-
zip_file_path = os.path.join(temp_dir, "neighborhoods.zip")
|
40
|
-
|
41
|
-
# Download the file
|
42
|
-
response = requests.get(url)
|
43
|
-
if response.status_code != 200:
|
44
|
-
raise Exception("Failed to download neighborhoods data from IBGE")
|
45
|
-
|
46
|
-
# Save the content to a file
|
47
|
-
with open(zip_file_path, 'wb') as f:
|
48
|
-
f.write(response.content)
|
49
|
-
|
50
|
-
# Use unzip command line tool to extract the file (handles more compression methods)
|
51
|
-
try:
|
52
|
-
subprocess.run(['unzip', '-o', zip_file_path, '-d', temp_dir],
|
53
|
-
check=True,
|
54
|
-
stdout=subprocess.PIPE,
|
55
|
-
stderr=subprocess.PIPE)
|
56
|
-
except subprocess.CalledProcessError as e:
|
57
|
-
raise Exception(f"Failed to extract zip file: {e.stderr.decode()}")
|
58
|
-
|
59
|
-
# Find the shapefile
|
60
|
-
shp_files = []
|
61
|
-
for root, dirs, files in os.walk(temp_dir):
|
62
|
-
shp_files.extend([os.path.join(root, f) for f in files if f.endswith('.shp')])
|
63
|
-
|
64
|
-
if not shp_files:
|
65
|
-
raise Exception("No shapefile found in the downloaded data")
|
66
|
-
|
67
|
-
# Read the shapefile
|
68
|
-
gdf = gpd.read_file(shp_files[0])
|
69
|
-
|
70
|
-
# Convert to SIRGAS 2000 (EPSG:4674) if not already
|
71
|
-
if gdf.crs is None or gdf.crs.to_epsg() != 4674:
|
72
|
-
gdf = gdf.to_crs(4674)
|
73
|
-
|
74
|
-
if simplified:
|
75
|
-
# Keep only the most relevant columns
|
76
|
-
# Note: Column names may need adjustment based on actual data
|
77
|
-
columns_to_keep = [
|
78
|
-
'geometry',
|
79
|
-
'CD_BAIRRO', # Neighborhood Code
|
80
|
-
'NM_BAIRRO', # Neighborhood Name
|
81
|
-
'CD_MUN', # Municipality Code
|
82
|
-
'NM_MUN', # Municipality Name
|
83
|
-
'CD_UF', # State Code
|
84
|
-
'NM_UF', # State Name
|
85
|
-
'SIGLA_UF', # State Abbreviation
|
86
|
-
'AREA_KM2' # Area in square kilometers
|
87
|
-
]
|
88
|
-
|
89
|
-
# Filter columns that actually exist in the dataset
|
90
|
-
existing_columns = ['geometry'] + [col for col in columns_to_keep[1:] if col in gdf.columns]
|
91
|
-
gdf = gdf[existing_columns]
|
92
|
-
|
93
|
-
except Exception as e:
|
94
|
-
raise Exception(f"Error downloading neighborhoods data: {str(e)}")
|
95
|
-
|
96
|
-
return gdf
|
97
|
-
|
98
|
-
if __name__ == '__main__':
|
99
|
-
read_neighborhoods_2022()
|
File without changes
|
File without changes
|