#Import data
Countries<- read.csv("https://tkelleman.github.io/tkweb/Week5/countries_total.csv")
Income<-read.csv("https://tkelleman.github.io/tkweb/Week5/income_per_person.csv")
LifeExp<-read.csv("https://tkelleman.github.io/tkweb/Week5/life_expectancy_years.csv")
Population<-read.csv("https://tkelleman.github.io/tkweb/Week5/population_total.csv")

## A - Reshape data set: Income Per Person to make a longitudinal data such that the resulting data set has three columns: country, year, and income.
IncomeEdit <- Income %>%
  gather(key = "Year", value = "Income", - geo, na.rm=TRUE)
names(IncomeEdit)[1] <- "Country"

## B - Do the same for Life Expectancy in Years so that the resulting data set has three columns: country, year, and life expectancy.
LifeExpEdit <- LifeExp %>%
  gather(key = "Year", value = "LifeExpectancy", - geo, na.rm=TRUE)
names(LifeExpEdit)[1] <- "Country"

## C -  Merge/join the above two longitudinal data sets to make a new data set, under name LifeExpIncom that has variables: country, year, lifeExp, and income.
LifeExpIncom <- merge(IncomeEdit, LifeExpEdit, by = c("Country", "Year"))

## D - Merge LifeExpIncom with country region so that the final data set has information about income, life expectancy, and country region.
LifeIncomCount <-merge(LifeExpIncom, Countries, by.x = "Country", by.y = "name", all.x = TRUE)

## E -  Merge the previous resulting data set with population size so that the final data set has information about income, life expectancy, population size, and country region.
PopulationEdit <- Population %>%
  gather(key = "Year", value = "Population", - geo, na.rm=TRUE)
names(PopulationEdit)[1] <- "Country"

FinalData <- merge(LifeIncomCount, PopulationEdit, by = c("Country", "Year"))
FinalData <- subset(FinalData, select = -c(alpha.2, alpha.3, country.code, iso_3166.2, sub.region, region.code, sub.region.code, intermediate.region.code, intermediate.region))
FinalData$Year<-gsub("X","", as.character(FinalData$Year))

#write.csv(FinalData, "FinalData.csv")

## 3 Create a subset of the above resulting longitudinal data set that contains only the data of the year 2000/font> - name it 2000data.
Y2000data<-filter(FinalData, Year==2000)

#Y2000data contains 187 observations of 6 variables
#FinalData contains 40,437 observations of 6 variables

cols1 = c("#332288","#117733","#44AA99","#88CCEE","#DDCC77","#CC6677")
Y2000Plot<-ggplot(Y2000data, aes(x=Income, 
                      y=LifeExpectancy, 
                      size = Population, 
                      color = region)) +
  geom_point(aes(size=Population, ids = Country),
            show.legend = TRUE,
            alpha = 0.8)+
            scale_color_manual(values = cols1)+
    labs(title = 'Plot of Life Expectancy by Income in 2000',
       x = 'Income',
       y = 'Life Expectancy',
       legend = 'Region')+
    transition_states(Year) +
    ease_aes('linear')
Y2000Plot


Figure 1: Plot of Life Expectancy by Income in 2000

This plot shows the relationship between life expectancy and income in the year 2000 in the merged data set. The population of each country is represented by the diameter of the points plotted and the color of the point represents the region of the country. As shown in this figure, countries in the regions of Americas and Europe have a higher life expectancy than most countries in the Africa region, with a few outlier data points. —

### Full Data Set
cols1 = c("#332288","#117733","#44AA99","#88CCEE","#DDCC77","#CC6677")
AllDataPlot<-ggplot(FinalData, aes(x=Income, 
                      y=LifeExpectancy, 
                      size = Population, 
                      color = region)) +
  geom_point(aes(size=Population, ids = Country),
            show.legend = TRUE,
            alpha = 0.8)+
            scale_color_manual(values = cols1)+
    labs(title = 'Plot of Life Expectency by Income at Year: {closest_state}',
       x = 'Income',
       y = 'Life Expectancy',
       fill =  'Region')+
    transition_states(Year) +
    ease_aes('linear')

animate(AllDataPlot, renderer = gifski_renderer(), rewind = FALSE, nframes = 300, fps = 10)


Figure 2: Plot of Life Expectancy by Income for Years 1800-2018

This plot shows the relationship between life expectancy and income all countries in the merged data set. The population of each country is represented by the diameter of the points plotted and the color of the point represents the region of the country. As shown in the plot, all represented countries increase their life expectancy between the given years but not all countries show an increase in income.

