Code for quiz 6 more dplyr and our first interactive chart using echarts4r
#Steps 1-6
drug_cos <- read_csv("https://estanny.com/static/week6/drug_cos.csv")
health_cos <- read_csv("http://estanny.com/static/week6/health_cos.csv")
drug_cos %>% glimpse()
Rows: 104
Columns: 9
$ ticker <chr> "ZTS", "ZTS", "ZTS", "ZTS", "ZTS", "ZTS", "ZTS…
$ name <chr> "Zoetis Inc", "Zoetis Inc", "Zoetis Inc", "Zoe…
$ location <chr> "New Jersey; U.S.A", "New Jersey; U.S.A", "New…
$ ebitdamargin <dbl> 0.149, 0.217, 0.222, 0.238, 0.182, 0.335, 0.36…
$ grossmargin <dbl> 0.610, 0.640, 0.634, 0.641, 0.635, 0.659, 0.66…
$ netmargin <dbl> 0.058, 0.101, 0.111, 0.122, 0.071, 0.168, 0.16…
$ ros <dbl> 0.101, 0.171, 0.176, 0.195, 0.140, 0.286, 0.32…
$ roe <dbl> 0.069, 0.113, 0.612, 0.465, 0.285, 0.587, 0.48…
$ year <dbl> 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018…
health_cos %>% glimpse()
Rows: 464
Columns: 11
$ ticker <chr> "ZTS", "ZTS", "ZTS", "ZTS", "ZTS", "ZTS", "ZTS"…
$ name <chr> "Zoetis Inc", "Zoetis Inc", "Zoetis Inc", "Zoet…
$ revenue <dbl> 4233000000, 4336000000, 4561000000, 4785000000,…
$ gp <dbl> 2581000000, 2773000000, 2892000000, 3068000000,…
$ rnd <dbl> 427000000, 409000000, 399000000, 396000000, 364…
$ netincome <dbl> 245000000, 436000000, 504000000, 583000000, 339…
$ assets <dbl> 5711000000, 6262000000, 6558000000, 6588000000,…
$ liabilities <dbl> 1975000000, 2221000000, 5596000000, 5251000000,…
$ marketcap <dbl> NA, NA, 16345223371, 21572007994, 23860348635, …
$ year <dbl> 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,…
$ industry <chr> "Drug Manufacturers - Specialty & Generic", "Dr…
names_drug <- drug_cos %>% names()
names_health <- health_cos %>% names()
intersect(names_drug, names_health)
[1] "ticker" "name" "year"
For ‘drug_cos’ select (in this order): ‘ticker’,‘year’,‘grossmargin’
Extract observations for 2018
Assign output to ‘drug_subset’
For ‘health_cos’ select (in this order) ‘ticker’,‘year’,‘revenue’,‘gp’,‘industry’
Extract observations for 2018
Assign output to ‘health_subset’
drug_subset <- drug_cos %>%
select(ticker,year,grossmargin) %>%
filter(year==2018)
health_subset <- health_cos %>%
select(ticker,year,revenue,gp,industry) %>%
filter(year==2018)
drug_subset %>% left_join(health_subset)
# A tibble: 13 x 6
ticker year grossmargin revenue gp industry
<chr> <dbl> <dbl> <dbl> <dbl> <chr>
1 ZTS 2018 0.672 5.82e 9 3.91e 9 Drug Manufacturers - …
2 PRGO 2018 0.387 4.73e 9 1.83e 9 Drug Manufacturers - …
3 PFE 2018 0.79 5.36e10 4.24e10 Drug Manufacturers - …
4 MYL 2018 0.35 1.14e10 4.00e 9 Drug Manufacturers - …
5 MRK 2018 0.681 4.23e10 2.88e10 Drug Manufacturers - …
6 LLY 2018 0.738 2.46e10 1.81e10 Drug Manufacturers - …
7 JNJ 2018 0.668 8.16e10 5.45e10 Drug Manufacturers - …
8 GILD 2018 0.781 2.21e10 1.73e10 Drug Manufacturers - …
9 BMY 2018 0.71 2.26e10 1.60e10 Drug Manufacturers - …
10 BIIB 2018 0.865 1.35e10 1.16e10 Drug Manufacturers - …
11 AMGN 2018 0.827 2.37e10 1.96e10 Drug Manufacturers - …
12 AGN 2018 0.861 1.58e10 1.36e10 Drug Manufacturers - …
13 ABBV 2018 0.764 3.28e10 2.50e10 Drug Manufacturers - …
*Start with ‘drug_cos’
drug_cos_subset <- drug_cos %>%
filter(ticker=="MRK")
drug_cos_subset
# A tibble: 8 x 9
ticker name location ebitdamargin grossmargin netmargin ros roe
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 MRK Merc… New Jer… 0.305 0.649 0.131 0.15 0.114
2 MRK Merc… New Jer… 0.33 0.652 0.13 0.182 0.113
3 MRK Merc… New Jer… 0.282 0.615 0.1 0.123 0.089
4 MRK Merc… New Jer… 0.567 0.603 0.282 0.409 0.248
5 MRK Merc… New Jer… 0.298 0.622 0.112 0.136 0.096
6 MRK Merc… New Jer… 0.254 0.648 0.098 0.117 0.092
7 MRK Merc… New Jer… 0.278 0.678 0.06 0.162 0.063
8 MRK Merc… New Jer… 0.313 0.681 0.147 0.206 0.199
# … with 1 more variable: year <dbl>
Use left_join to combine the rown and columns of ‘drug_cos_subset’ with the columns of ‘health_cos’
Assign the output to ‘combo_df’
combo_df<- drug_cos_subset %>%
left_join(health_cos)
combo_df
# A tibble: 8 x 17
ticker name location ebitdamargin grossmargin netmargin ros roe
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 MRK Merc… New Jer… 0.305 0.649 0.131 0.15 0.114
2 MRK Merc… New Jer… 0.33 0.652 0.13 0.182 0.113
3 MRK Merc… New Jer… 0.282 0.615 0.1 0.123 0.089
4 MRK Merc… New Jer… 0.567 0.603 0.282 0.409 0.248
5 MRK Merc… New Jer… 0.298 0.622 0.112 0.136 0.096
6 MRK Merc… New Jer… 0.254 0.648 0.098 0.117 0.092
7 MRK Merc… New Jer… 0.278 0.678 0.06 0.162 0.063
8 MRK Merc… New Jer… 0.313 0.681 0.147 0.206 0.199
# … with 9 more variables: year <dbl>, revenue <dbl>, gp <dbl>,
# rnd <dbl>, netincome <dbl>, assets <dbl>, liabilities <dbl>,
# marketcap <dbl>, industry <chr>
Note: the variables ‘ticker’,‘name’,‘location’ and ‘industry’ are the saem for all the observations
Assign the company name to ‘co_name’
co_name<- combo_df %>%
distinct(name) %>%
pull()
co_location <- combo_df %>%
distinct(location) %>%
pull()
co_industry <-combo_df %>%
distinct(industry) %>%
pull()
Put the r inline commands used in the blanks below. When you knit the document the results of the commands will be displayed in your text.
The company ‘r co_name’ is located in ‘r co_location’ and is a member of the ‘r co_industry’ industry group.
Start with ‘combo_df’
Select variables (in this order): ‘year’,‘grossmargin’,‘netmargin’,‘revenue’,‘gp’,‘netincome’
Assign the output to ‘combo_df_subset’
combo_df_subset <- combo_df %>%
select(year,grossmargin,netmargin,revenue,gp,netincome)
combo_df_subset
# A tibble: 8 x 6
year grossmargin netmargin revenue gp netincome
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2011 0.649 0.131 48047000000 31176000000 6272000000
2 2012 0.652 0.13 47267000000 30821000000 6168000000
3 2013 0.615 0.1 44033000000 27079000000 4404000000
4 2014 0.603 0.282 42237000000 25469000000 11920000000
5 2015 0.622 0.112 39498000000 24564000000 4442000000
6 2016 0.648 0.098 39807000000 25777000000 3920000000
7 2017 0.678 0.06 40122000000 27210000000 2394000000
8 2018 0.681 0.147 42294000000 28785000000 6220000000
combo_df_subset %>%
mutate(grossmargin_check=gp/revenue, close_enough=abs(grossmargin_check-grossmargin)<0.001)
# A tibble: 8 x 8
year grossmargin netmargin revenue gp netincome
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2011 0.649 0.131 4.80e10 3.12e10 6.27e 9
2 2012 0.652 0.13 4.73e10 3.08e10 6.17e 9
3 2013 0.615 0.1 4.40e10 2.71e10 4.40e 9
4 2014 0.603 0.282 4.22e10 2.55e10 1.19e10
5 2015 0.622 0.112 3.95e10 2.46e10 4.44e 9
6 2016 0.648 0.098 3.98e10 2.58e10 3.92e 9
7 2017 0.678 0.06 4.01e10 2.72e10 2.39e 9
8 2018 0.681 0.147 4.23e10 2.88e10 6.22e 9
# … with 2 more variables: grossmargin_check <dbl>,
# close_enough <lgl>
Create the variable ‘netmargin_check’ to compare with the variable ‘netmargin’. They should be equal.
Create the variable ‘close_enough’ to check that the absolute value of the difference between ‘netmargin_check’ and ‘netmargin’ is less than 0.001
combo_df_subset%>%
mutate(netmargin_check=netincome/revenue,
close_enough=abs(netmargin_check-netmargin)<0.001)
# A tibble: 8 x 8
year grossmargin netmargin revenue gp netincome
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2011 0.649 0.131 4.80e10 3.12e10 6.27e 9
2 2012 0.652 0.13 4.73e10 3.08e10 6.17e 9
3 2013 0.615 0.1 4.40e10 2.71e10 4.40e 9
4 2014 0.603 0.282 4.22e10 2.55e10 1.19e10
5 2015 0.622 0.112 3.95e10 2.46e10 4.44e 9
6 2016 0.648 0.098 3.98e10 2.58e10 3.92e 9
7 2017 0.678 0.06 4.01e10 2.72e10 2.39e 9
8 2018 0.681 0.147 4.23e10 2.88e10 6.22e 9
# … with 2 more variables: netmargin_check <dbl>, close_enough <lgl>
Fill in the blanks
Put the command you use in the Rchunks in the Rmd file for this quiz
Use the ‘health_cos’ data
For each industry calculate
health_cos %>%
group_by(industry) %>%
summarize(mean_grossmargin_percent=mean(gp/revenue)*100,
median_grossmargin_percent=median(gp/revenue)*100,
min_grossmargin_percent=min(gp/revenue)*100,
max_grossmargin_percent=max(gp/revenue)*100,
)
# A tibble: 9 x 5
industry mean_grossmargi… median_grossmar… min_grossmargin…
* <chr> <dbl> <dbl> <dbl>
1 Biotech… 92.5 92.7 81.7
2 Diagnos… 50.5 52.7 28.0
3 Drug Ma… 75.4 76.4 36.8
4 Drug Ma… 47.9 42.6 34.3
5 Healthc… 20.5 19.6 10.0
6 Medical… 55.9 37.4 28.1
7 Medical… 70.8 72.0 53.2
8 Medical… 10.4 5.38 2.49
9 Medical… 53.9 52.8 40.5
# … with 1 more variable: max_grossmargin_percent <dbl>
mean_grossmargin_percent for the industry Medical Devices is 70.8%
median_grossmargin_percent for the industry Medical Devices is 72%
min_grossmargin_percent for the industry Medical Devices is 53.2%
max_grossmargin_percent for the industry Medical Devices is 84.7
Fill in the blanks.
Use the ‘health_cos’ data
*Extract observations for the ticker AMGN from ‘health_cos’ and assign to the variable ‘health_cos_subset’
health_cos_subset <- health_cos %>%
filter(ticker=="AMGN")
health_cos_subset
# A tibble: 8 x 11
ticker name revenue gp rnd netincome assets liabilities
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AMGN Amge… 1.56e10 1.29e10 3.17e9 3.68e9 4.89e10 29842000000
2 AMGN Amge… 1.73e10 1.41e10 3.38e9 4.34e9 5.43e10 35238000000
3 AMGN Amge… 1.87e10 1.53e10 4.08e9 5.08e9 6.61e10 44029000000
4 AMGN Amge… 2.01e10 1.56e10 4.30e9 5.16e9 6.90e10 43231000000
5 AMGN Amge… 2.17e10 1.74e10 4.07e9 6.94e9 7.14e10 43366000000
6 AMGN Amge… 2.30e10 1.88e10 3.84e9 7.72e9 7.76e10 47751000000
7 AMGN Amge… 2.28e10 1.88e10 3.56e9 1.98e9 8.00e10 54713000000
8 AMGN Amge… 2.37e10 1.96e10 3.74e9 8.39e9 6.64e10 53916000000
# … with 3 more variables: marketcap <dbl>, year <dbl>,
# industry <chr>
Run the code below
health_cos_subset %>%
distinct(name)%>%
pull(name)
[1] "Amgen Inc"
co_name<- health_cos_subset %>%
distinct(name) %>%
pull(name)
You can take output from your code and include it in your text
In following chunk
co_industry <-health_cos_subset %>%
distinct(industry) %>%
pull()
this is outside the Rchunk. Put the r inline commands used in the blanks below. When you knit the document the results of the commands will be displayed in your text.
the company ‘r co_name’ is a member of the ‘r co_industry’ group.
df %>% glimpse()
Rows: 9
Columns: 2
$ industry <chr> "Biotechnology", "Diagnostics & Research", "Dru…
$ med_rnd_rev <dbl> 0.48317287, 0.05620271, 0.17451442, 0.06851879,…
ggplot(data=df,
mapping=aes(
x=reorder(industry,med_rnd_rev),
y=med_rnd_rev
))+
geom_col()+
scale_y_continuous(labels=scales::percent)+
coord_flip()+
labs(
title="Median R&D expenditures",
subtitle="by industry as a percent of revenue from 2011 to 2018",
x=NULL, y=NULL)+
theme_ipsum()
ggsave(filename="preview.png",
path = here::here("_posts","2021-03-12-joining-data"))
df %>%
arrange(med_rnd_rev) %>%
e_charts(
x=industry,
) %>%
e_bar(
serie=med_rnd_rev,
name="median"
) %>%
e_flip_coords() %>%
e_tooltip() %>%
e_title(
text="Median industry R&D expenditures",
subtext="by industry as a percent of revenue from 2011 to 2018",
left="center") %>%
e_legend(FALSE) %>%
e_x_axis(
formatter=e_axis_formatter("percent",digits=0)
) %>%
e_y_axis(
show=FALSE
) %>%
e_theme("infographic")