# these temperature are in Kelvin:
<- c("310","322","348")
temperature
temperature## [1] "310" "322" "348"
Prepro 2: Demo
The code for this demo can also be downloaded as an R Script (right click → Save Target As..)
Piping
We want to extract the temperature data from a character string (temperature
), and then convert the Kelvin value into Celsius according to the following formula, before finally calculating the mean of all the values:
\[°C = K - 273.15\]
Translated into R-code, this results in the following operation:
<- function(x,y){x-y} # helperfunction to subtract y from x
subtract
<- mean(subtract(as.integer(temperature), 273.15))
output # \_1_/
# \_______2_____________/
# \________________3______________________/
# \_____________________4_______________________/
# 1. Take temperature
# 2. Convert "character" → "integer"
# 4. Subtract 273.15
# 5. Calculate the mean
The whole operation is easier to read if it is written down sequentially:
<- as.integer(temperature) # 2
tmp <- subtract(tmp, 273.15) # 3
tmp <- mean(tmp) # 4
output
output## [1] 53.51667
The fact that the intermediate results must always be saved and retrieved again in the subsequent operation makes this somewhat cumbersome. This is where “piping” comes into play: It makes the output of one function the first parameter of the subsequent function.
|> # 1
temperature as.integer() |> # 2
subtract(273.15) |> # 3
mean() # 4
## [1] 53.51667
Important
- the
|>
pipe operator was first introduced in R4.1
- In addition to the base R pipe operator, there is also a very similar1 pipe operator,
%>%
, in themagrittr
package. - The Ctrl +Shift+M keyboard shortcut in RStudio inserts a pipe operator.
- By checking the Use native pipe operator setting in RStudio Settings
Tools
→Global Options
→Code
, you can control whichpipe operator
,|>
or%>%
, is inserted with the above key combination. - We recommend using the base-R pipe operator
|>
Joins
<- data.frame(
students Matriculation_No = c(100002, 100003, 200003),
Student = c("Patrick", "Manuela", "Eva"),
ZIP = c(8006, 8001, 8820)
)
students## Matriculation_No Student ZIP
## 1 100002 Patrick 8006
## 2 100003 Manuela 8001
## 3 200003 Eva 8820
<- data.frame(
localities ZIP = c(8003, 8006, 8810, 8820),
LocalityName = c("Zurich", "Zurich", "Horgen", "Wadenswil")
)
localities## ZIP LocalityName
## 1 8003 Zurich
## 2 8006 Zurich
## 3 8810 Horgen
## 4 8820 Wadenswil
# Load library
library("dplyr")
inner_join(students, localities, by = "ZIP")
## Matriculation_No Student ZIP LocalityName
## 1 100002 Patrick 8006 Zurich
## 2 200003 Eva 8820 Wadenswil
left_join(students, localities, by = "ZIP")
## Matriculation_No Student ZIP LocalityName
## 1 100002 Patrick 8006 Zurich
## 2 100003 Manuela 8001 <NA>
## 3 200003 Eva 8820 Wadenswil
right_join(students, localities, by = "ZIP")
## Matriculation_No Student ZIP LocalityName
## 1 100002 Patrick 8006 Zurich
## 2 200003 Eva 8820 Wadenswil
## 3 NA <NA> 8003 Zurich
## 4 NA <NA> 8810 Horgen
full_join(students, localities, by = "ZIP")
## Matriculation_No Student ZIP LocalityName
## 1 100002 Patrick 8006 Zurich
## 2 100003 Manuela 8001 <NA>
## 3 200003 Eva 8820 Wadenswil
## 4 NA <NA> 8003 Zurich
## 5 NA <NA> 8810 Horgen
<- data.frame(
students Matriculation_No = c(100002, 100003, 200003),
Student = c("Patrick", "Manuela", "Pascal"),
Residence = c(8006, 8001, 8006)
)
left_join(students, localities, by = c("Residence" = "ZIP"))
## Matriculation_No Student Residence LocalityName
## 1 100002 Patrick 8006 Zurich
## 2 100003 Manuela 8001 <NA>
## 3 200003 Pascal 8006 Zurich