Let’s choose 2 other countries to compare to Canada
gapwide <- gapminder %>%
filter((country=="Algeria")|(country=="Bahrain")|(country=="Canada")) %>%
pivot_wider(id_cols= year,
names_from= country,
values_from = lifeExp)
gapwide %>%
DT::datatable()
Let’s choose Algeria and Bahrain to plot against each other.
gapwide %>%
ggplot(aes(x=Algeria, y=Bahrain))+
geom_point()+
labs(title="Scatter Plot of Bahrain's Life Expectancy vs Algeria's Life Expectancy",x="Algeria(Years)",y="Bahrain(Years)")+
theme_bw()
Over time, life Expectancy of Bahrain is higher compared to Algeria
gapwide %>%
pivot_longer(cols = c(-year),
names_to = "Country",
values_to="Life Expectancy") %>%
DT::datatable()
gap_mulit_wide <- gapminder %>%
filter(country== "Australia"| country == "Bahrain") %>%
pivot_wider(id_cols = year,
names_from = country,
values_from = c(lifeExp,gdpPercap))
gap_mulit_wide %>% knitr::kable()
year | lifeExp_Australia | lifeExp_Bahrain | gdpPercap_Australia | gdpPercap_Bahrain |
---|---|---|---|---|
1952 | 69.120 | 50.939 | 10039.60 | 9867.085 |
1957 | 70.330 | 53.832 | 10949.65 | 11635.799 |
1962 | 70.930 | 56.923 | 12217.23 | 12753.275 |
1967 | 71.100 | 59.923 | 14526.12 | 14804.673 |
1972 | 71.930 | 63.300 | 16788.63 | 18268.658 |
1977 | 73.490 | 65.593 | 18334.20 | 19340.102 |
1982 | 74.740 | 69.052 | 19477.01 | 19211.147 |
1987 | 76.320 | 70.750 | 21888.89 | 18524.024 |
1992 | 77.560 | 72.601 | 23424.77 | 19035.579 |
1997 | 78.830 | 73.925 | 26997.94 | 20292.017 |
2002 | 80.370 | 74.795 | 30687.75 | 23403.559 |
2007 | 81.235 | 75.635 | 34435.37 | 29796.048 |
gap_mulit_wide %>%
pivot_longer(cols=c(-year),
names_to = c(".value", "country"),
names_sep = "_") %>%
knitr::kable()
year | country | lifeExp | gdpPercap |
---|---|---|---|
1952 | Australia | 69.120 | 10039.596 |
1952 | Bahrain | 50.939 | 9867.085 |
1957 | Australia | 70.330 | 10949.650 |
1957 | Bahrain | 53.832 | 11635.799 |
1962 | Australia | 70.930 | 12217.227 |
1962 | Bahrain | 56.923 | 12753.275 |
1967 | Australia | 71.100 | 14526.125 |
1967 | Bahrain | 59.923 | 14804.673 |
1972 | Australia | 71.930 | 16788.629 |
1972 | Bahrain | 63.300 | 18268.658 |
1977 | Australia | 73.490 | 18334.198 |
1977 | Bahrain | 65.593 | 19340.102 |
1982 | Australia | 74.740 | 19477.009 |
1982 | Bahrain | 69.052 | 19211.147 |
1987 | Australia | 76.320 | 21888.889 |
1987 | Bahrain | 70.750 | 18524.024 |
1992 | Australia | 77.560 | 23424.767 |
1992 | Bahrain | 72.601 | 19035.579 |
1997 | Australia | 78.830 | 26997.937 |
1997 | Bahrain | 73.925 | 20292.017 |
2002 | Australia | 80.370 | 30687.755 |
2002 | Bahrain | 74.795 | 23403.559 |
2007 | Australia | 81.235 | 34435.367 |
2007 | Bahrain | 75.635 | 29796.048 |
## .value separates values for the measures of lifeExp and gdpPerCap
guest <- read_csv("https://raw.githubusercontent.com/STAT545-UBC/Classroom/master/data/wedding/attend.csv")
## Parsed with column specification:
## cols(
## party = col_double(),
## name = col_character(),
## meal_wedding = col_character(),
## meal_brunch = col_character(),
## attendance_wedding = col_character(),
## attendance_brunch = col_character(),
## attendance_golf = col_character()
## )
email <- read_csv("https://raw.githubusercontent.com/STAT545-UBC/Classroom/master/data/wedding/emails.csv")
## Parsed with column specification:
## cols(
## guest = col_character(),
## email = col_character()
## )
Each guest should on one line for each email and guest field changed to name to make it easier to join
Email <- email %>%
separate_rows(email,guest, sep = ",") %>%
select(name=guest,email) %>%
mutate(name=trimws(name, "both"))
## There is a white space in Email dataset, removed this
Assumption: Only including guests from guest list who have a corresponding email from the email tibble: (no additonal emails, no additional guests):
guest_emails <- Email %>%
inner_join(guest,Email, by="name")
guest_emails %>% DT::datatable()
The names of peoples with emails but are not on the guest list:
guest_emails %>%
select(name,email) %>%
setdiff(Email,.) %>%
knitr::kable()
name | |
---|---|
Turner Jones | tjjones12@hotmail.ca |
Albert Marshall | themarshallfamily1234@gmail.com |
Vivian Marshall | themarshallfamily1234@gmail.com |
These 3 are not found in the guest dataset.
A new guest list with names of people on the original guest list with emails and those not on the original guest list who also have emails (found on email dataset but not guest dataset)
Email %>%
left_join(guest,Email, by="name") %>%
DT::datatable()
There are 5 guests who don’t have emails in the email dataset. These are the ones that don’t have a corresponding email in the email tibble but are found on the guest list (guest dataset)
Let’s find out who the five guests on the guest list that don’t have emails are :
guest_emails%>%
select(-email) %>%
setdiff(guest,.) %>%
knitr::kable()
name | party | meal_wedding | meal_brunch | attendance_wedding | attendance_brunch | attendance_golf |
---|---|---|---|---|---|---|
Atlanta Connolly | 5 | PENDING | PENDING | PENDING | PENDING | PENDING |
Denzel Connolly | 5 | fish | Menu B | CONFIRMED | CONFIRMED | CONFIRMED |
Chanelle Shah | 5 | chicken | Menu C | CONFIRMED | CONFIRMED | CONFIRMED |
Cosmo Dunkley | 10 | PENDING | PENDING | PENDING | PENDING | PENDING |
Cai Mcdaniel | 11 | fish | Menu C | CONFIRMED | CONFIRMED | CONFIRMED |