icon picker
Python

Libraries Used

import numpy as np
import pandas as pd

Data Loading

Mount Google Drive

from google.colab import drive
drive.mount('/contentdrive/')

Load Regions Table

# regions table
path='/contentdrive/MyDrive/SQL - Deforestation Porject/DB Files/regions.csv'
regions=pd.read_csv(path)
regions.head()
regions
Afghanistan
AFG
South Asia
Low income
Albania
ALB
Europe & Central Asia
Algeria
DZA
Middle East & North Africa
American Samoa
ASM
East Asia & Pacific
Andorra
AND
Europe & Central Asia
There are no rows in this table

Load Land Area Table

# land area table
path='/contentdrive/MyDrive/SQL - Deforestation Porject/DB Files/land_area.csv'
land_area=pd.read_csv(path)
land_area.head()
land_area
country_code
country_name
year
total_area_sq_mi
ABW
Aruba
2,016
69.5
AFG
Afghanistan
2,016
252,069.5
AGO
Angola
2,016
481,351.35
ALB
Albania
2,016
10,579.15
AND
Andorra
2,016
181.47
There are no rows in this table

Load Forest Area Table

# forest area table
path='/contentdrive/MyDrive/SQL - Deforestation Porject/DB Files/forest_area.csv'
forest_area=pd.read_csv(path)
forest_area.head()
forest area
country_code
country_name
year
forest_area_sqkm
ABW
Aruba
2,016
4.2
AFG
Afghanistan
2,016
13,500
AGO
Angola
2,016
577,311.99
ALB
Albania
2,016
7,705.4
AND
Andorra
2,016
160
There are no rows in this table

Data Processing

Merge Tables

# forsetation view table
forestation = regions.merge(land_area, on=["country_code"], how="inner")\
.merge(forest_area, on=["country_code", "year"], how="inner")

Convert Total Area to sq km

# Convert total area to sq km
forestation['total_area_sqkm'] = forestation['total_area_sq_mi'] * 2.59
# Remove total_area_sq_mi column
forestation.drop(columns=["total_area_sq_mi"], inplace=True)
# Remove nulls from the table
forestation = forestation.dropna(subset=["total_area_sqkm", "forest_area_sqkm"])
# Create a new column "forest_area_percent"
forestation['forest_area_percent'] = (forestation['forest_area_sqkm'] / forestation['total_area_sqkm'] * 100).round(2)

Display Forestation Data

forestation.head()
forestation
country_name_x
country_code
region
income_group
country_name_y
year
forest_area_sqkm
total_area_sqkm
forest_area_percent
Afghanistan
AFG
South Asia
Low income
Afghanistan
2,016
13,500
652,860.01
2.07
Afghanistan
AFG
South Asia
Low income
Afghanistan
2,015
13,500
652,860.01
2.07
Afghanistan
AFG
South Asia
Low income
Afghanistan
2,014
13,500
652,860.01
2.07
Afghanistan
AFG
South Asia
Low income
Afghanistan
2,013
13,500
652,860.01
2.07
Afghanistan
AFG
South Asia
Low income
Afghanistan
2,012
13,500
652,860.01
2.07
There are no rows in this table

Analysis

Total Forest Area in 1990

# Part 1 .a. What was the total forest area (in sq km) of the world in 1990?
# فلترة الجدول على 1990 & world
World_1990= forestation[(forestation['year'] == 1990) & (forestation['country_name'].str.lower() == 'world')]
# حساب إجمالي مساحة الغابات لعام 1990
total_forest_area_1990 = World_1990['forest_area_sqkm'].sum()
print(total_forest_area_1990)
Total Forest Area in 1990: 41282694.9 sq km

Total Forest Area in 2016

# Part 1 .b. What was the total forest area (in sq km) of the world in 2016?
# فلترة الجدول على 2016 & World
World_2016= forestation[(forestation['year'] == 2016) & (forestation['country_name'].str.lower() == 'world')]

# حساب إجمالي مساحة الغابات لعام 2016
total_forest_area_2016 = World_2016['forest_area_sqkm'].sum()
print(total_forest_area_2016)
Total Forest Area in 2016: 39958245.9 sq km

Change in Forest Area

# Part 1 .c.What was the change (in sq km) in the forest area of the world from 1990 to 2016?
# the change in the forest area from 1990 to 2016
change_in_forest_area = total_forest_area_1990 - total_forest_area_2016
print(change_in_forest_area)
Change in Forest Area: 1324449.0 sq km

Percent Change in Forest Area

# Part 1 .d. What was the percent change in forest area of the world between 1990 and 2016?
# the percent change in the forest area from 1990 to 2016
percent_change_in_forest_area = round((total_forest_area_1990 - total_forest_area_2016)/total_forest_area_1990 * 100,2)
print(percent_change_in_forest_area)
Percent Change in Forest Area: 3.21 %

Closest Country's Total Area to Forest Area Lost

# Part 1 .e. If you compare the amount of forest area lost between 1990 and 2016, to which country's total area in 2016 is it closest to?
# حساب مساحة كل دولة
forestation_2016 = forestation[(forestation['year'] == 2016) & (forestation['country_name'].str.lower() != 'world')]\
[['country_name','total_area_sqkm']].copy()
# حساب الفرق المطلق لكل دولة
forestation_2016['difference'] = abs(change_in_forest_area - forestation_2016['total_area_sqkm'])
# الدولة ذات الفرق الأقل
closest_country = forestation_2016.sort_values('difference',ascending=True).head(1)
print(closest_country)
Closest Country: Peru with a total area of 1,280,000 sq km.
country_name total_area_sqkm difference 4131 Peru 1.280000e+06 44449.0109

Regional Insights

Percent Forest Area by Region

# Part 2.(Regional Outlook) Create a table that shows the Regions and their percent forest area in 1990 and 2016.
# فلترة الجدول على عام 1990 وعام 2016
forestation_1990_2016 = forestation[forestation['year'].isin([1990, 2016])]

# تجميع البيانات حسب المنطقة والسنة
forestation_1990_2016 = forestation_1990_2016.groupby(['region', 'year'], as_index=False).agg(
total_land_area=('total_area_sqkm', 'sum'),
total_forest_area=('forest_area_sqkm', 'sum'))

# حساب نسبة الغابات
forestation_1990_2016['percent_forest_area'] = round((forestation_1990_2016['total_forest_area'] / forestation_1990_2016['total_land_area']) * 100,2)
# جدول يحتوي على region - year - percent forest area
r_insights=forestation_1990_2016[['region','year','percent_forest_area']]
print(r_insights)
r_insights
region
year
percent_forest_area
East Asia & Pacific
1,990
25.77
East Asia & Pacific
2,016
26.36
Europe & Central Asia
1,990
37.27
Europe & Central Asia
2,016
38.06
Latin America & Caribbean
1,990
51.03
Latin America & Caribbean
2,016
46.16
Middle East & North Africa
1,990
1.78
Middle East & North Africa
2,016
2.07
North America
1,990
35.65
North America
2,016
36.04
South Asia
1,990
16.51
South Asia
2,016
17.51
Sub-Saharan Africa
1,990
32.19
Sub-Saharan Africa
2,016
27.56
World
1,990
32.42
World
2,016
31.38
There are no rows in this table

the percent forest of the entire world in 2016

# Part 2 .a. What was the percent forest of the entire world in 2016?
World_2016 = r_insights[(r_insights['year'] == 2016) & (r_insights['region'].str.lower() == 'world')]
print(World_2016)
region year percent_forest_area World 2016 31.38

Highest Percent Forest in 2016

# Part 2 .a. What was the percent forest of the entire world in 2016?
World_2016 = r_insights[(r_insights['year'] == 2016) & (r_insights['region'].str.lower() == 'world')]
print(World_2016)
Highest Percent Forest Region in 2016: Latin America & Caribbean with 46.16%.
region year percent_forest_area 5 Latin America & Caribbean 2016 46.16

Lowest Percent Forest in 2016

# Part 2 .a. Which region had the HIGHEST percent forest in 2016?
region_2016 = r_insights[r_insights['year'] == 2016]
highest_region = region_2016.sort_values('percent_forest_area',ascending=False).head(1)
print(highest_region)
Lowest Percent Forest Region in 2016: Middle East & North Africa with 2.07%.
region year percent_forest_area 7 Middle East & North Africa 2016 2.07

the percent forest of the entire world in 1990

# Part 2 .b. What was the percent forest of the entire world in 1990?
World_1990 = r_insights[(r_insights['year'] == 1990) & (r_insights['region'].str.lower() == 'world')]
print(World_1990)
region year percent_forest_area 4 Latin America & Caribbean 1990 51.03

the LOWEST percent forest in 1990

# Part 2 .b.which region had the LOWEST percent forest in 1990?
region_1990 = r_insights[r_insights['year'] == 1990]
lowest_region = region_1990.sort_values('percent_forest_area',ascending=True).head(1)
print(lowest_region)
region year percent_forest_area 6 Middle East & North Africa 1990 1.78

the world DECREASED in forest area from 1990 to 2016

# Part 2 .c.which regions of the world DECREASED in forest area from 1990 to 2016?
# one table for 1990 / another table for 2016
region_1990 =r_insights[(r_insights['year'] == 1990) & (r_insights['region'] != 'World')]
region_2016 =r_insights[(r_insights['year'] == 2016) & (r_insights['region'] != 'World')]
# merge region 1990 & region 2016
merge_regions= region_1990.merge(region_2016, on=["region"], how="left", suffixes=('_1990', '_2016'))
# create a new column(pfa_decrease)
merge_regions['pfa_decrease'] = merge_regions['percent_forest_area_1990'] - merge_regions['percent_forest_area_2016']
# filter by pfa_decrease >0
result=merge_regions[merge_regions['pfa_decrease'] > 0]
print(result[['region','pfa_decrease']])
region pfa_decrease 2 Latin America & Caribbean 4.87 6 Sub-Saharan Africa 4.63

Country-Level Detail

Largest Amount Decrease in Forest Area

# Part 3 - Country-Level Detail
# a. Which 5 countries saw the largest amount decrease in forest area from 1990 to 2016? What was the difference in forest area for each?
# فلترة الجدول على عام 1990 وعام 2016
country_1990 = forestation[(forestation['year'] == 1990) & (forestation['country_name'] != 'World')]
country_2016 = forestation[(forestation['year'] == 2016) & (forestation['country_name'] != 'World')]
# merge country 1990 & country 2016
merged_countries= country_1990.merge(country_2016, on=["country_name"], how="left", suffixes=('_1990', '_2016'))
# create a new column (fa_difference)
merged_countries['fa_difference'] = round((merged_countries['forest_area_sqkm_1990'] - merged_countries['forest_area_sqkm_2016']),2)
# top 5 by the largest amount decrease in forest area
result = merged_countries.sort_values(by='fa_difference', ascending=False).head(5)
print(result[['country_name', 'fa_difference']])
-
country_name
fa_difference
Brazil
541,510
Indonesia
282,193.98
Myanmar
107,234
Nigeria
106,506
Tanzania
102,320
There are no rows in this table

Largest Percent Decrease in Forest Area

# Part 3 - Country-Level Detail
# b. Which 5 countries saw the largest percent decrease in forest area from 1990 to 2016? What was the percent change to 2 decimal places for each?
# فلترة الجدول على عام 1990 وعام 2016
country_1990 = forestation[(forestation['year'] == 1990) & (forestation['country_name'] != 'World')]
country_2016 = forestation[(forestation['year'] == 2016) & (forestation['country_name'] != 'World')]
# merge country 1990 & country 2016
merged_countries= country_1990.merge(country_2016, on=["country_name"], how="left", suffixes=('_1990', '_2016'))
# create a new column (pfa_difference)
merged_countries['pfa_difference'] = round(((merged_countries['forest_area_sqkm_1990'] - merged_countries['forest_area_sqkm_2016'])/
merged_countries['forest_area_sqkm_1990'])*100,2)
# top 5 by the largest percent decrease in forest area
result = merged_countries.sort_values(by='pfa_difference', ascending=False).head(5)
print(result[['country_name', 'pfa_difference']])
--
country_name
pfa_difference
Togo
75.45
Nigeria
61.8
Uganda
59.13
Mauritania
46.75
Honduras
45.03
There are no rows in this table

Countries with Higher Percent Forestation than the USA

# Part 3 - Country-Level Detail
# c. How many countries had a percent forestation higher than the United States in 2016?
# فلترة الجدول على عام 2016
country_2016 = forestation[(forestation['year'] == 2016) & (forestation['country_code'] != 'Wld')]
countries_higher = country_2016[country_2016['forest_area_percent'] >
country_2016[country_2016['country_code'].str.lower() == 'usa']['forest_area_percent'].iloc[0]]
print(len(countries_higher))
Number of Countries with Higher Percent Forestation than the USA in 2016: 94

94


Want to print your doc?
This is not the way.
Try clicking the ⋯ next to your doc name or using a keyboard shortcut (
CtrlP
) instead.