Reshaping data to wide format with tidyr¶
The following makes use of the packages tidyr and readr. You may need to
install them from CRAN using the code
install.packages(c("tidyr","readr"))
if you want to run this on your computer. (The packages are already installed
on the notebook container, however.)
substr(readLines("inequality-oecd-downloaded.csv",n=5),
start=1,stop=40)
[1] "\357\273\277\"LOCATION\",\"INDICATOR\",\"SUBJECT\",\"MEA"
[2] "\"AUS\",\"INCOMEINEQ\",\"P50P10\",\"RT\",\"A\",\"20"
[3] "\"AUS\",\"INCOMEINEQ\",\"P50P10\",\"RT\",\"A\",\"20"
[4] "\"AUS\",\"INCOMEINEQ\",\"P50P10\",\"RT\",\"A\",\"20"
[5] "\"AUS\",\"INCOMEINEQ\",\"P90P10\",\"RT\",\"A\",\"20"
library(readr)
inequality.oecd.dld <- read_csv("inequality-oecd-downloaded.csv")
-- Column specification --------------------------------------------------------
cols(
LOCATION = col_character(),
INDICATOR = col_character(),
SUBJECT = col_character(),
MEASURE = col_character(),
FREQUENCY = col_character(),
TIME = col_double(),
Value = col_double(),
`Flag Codes` = col_character()
)
inequality.oecd.dld
# A tibble: 2,315 x 8
LOCATION INDICATOR SUBJECT MEASURE FREQUENCY TIME Value `Flag Codes`
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
1 AUS INCOMEINEQ P50P10 RT A 2012 2.2 NA
2 AUS INCOMEINEQ P50P10 RT A 2014 2.2 NA
3 AUS INCOMEINEQ P50P10 RT A 2016 2.1 NA
4 AUS INCOMEINEQ P90P10 RT A 2012 4.4 NA
5 AUS INCOMEINEQ P90P10 RT A 2014 4.3 NA
6 AUS INCOMEINEQ P90P10 RT A 2016 4.3 NA
7 AUS INCOMEINEQ P90P50 RT A 2012 2 NA
8 AUS INCOMEINEQ P90P50 RT A 2014 2 NA
9 AUS INCOMEINEQ P90P50 RT A 2016 2.1 NA
10 AUS INCOMEINEQ GINI INEQ A 2012 0.326 NA
# ... with 2,305 more rows
library(tidyr)
inequality.oecd.dld %>% spread(key="SUBJECT",value="Value") ->
inequality.oecd
inequality.oecd[-c(2,4,6)]
# A tibble: 771 x 9
LOCATION MEASURE TIME GINI P50P10 P90P10 P90P50 PALMA S80S20
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AUS INEQ 2012 0.326 NA NA NA NA NA
2 AUS INEQ 2014 0.337 NA NA NA NA NA
3 AUS INEQ 2016 0.33 NA NA NA NA NA
4 AUS RT 2012 NA 2.2 4.4 2 1.24 5.5
5 AUS RT 2014 NA 2.2 4.3 2 1.34 5.7
6 AUS RT 2016 NA 2.1 4.3 2.1 1.26 5.5
7 AUT INEQ 2007 0.284 NA NA NA NA NA
8 AUT INEQ 2008 0.281 NA NA NA NA NA
9 AUT INEQ 2009 0.289 NA NA NA NA NA
10 AUT INEQ 2010 0.28 NA NA NA NA NA
# ... with 761 more rows
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
inequality.oecd.sub <- select(inequality.oecd.dld,
LOCATION,SUBJECT,TIME,Value)
inequality.oecd.sub
# A tibble: 2,315 x 4
LOCATION SUBJECT TIME Value
<chr> <chr> <dbl> <dbl>
1 AUS P50P10 2012 2.2
2 AUS P50P10 2014 2.2
3 AUS P50P10 2016 2.1
4 AUS P90P10 2012 4.4
5 AUS P90P10 2014 4.3
6 AUS P90P10 2016 4.3
7 AUS P90P50 2012 2
8 AUS P90P50 2014 2
9 AUS P90P50 2016 2.1
10 AUS GINI 2012 0.326
# ... with 2,305 more rows
inequality.oecd.sub %>% spread(key=SUBJECT,
value=Value) -> inequality.oecd
inequality.oecd
# A tibble: 386 x 8
LOCATION TIME GINI P50P10 P90P10 P90P50 PALMA S80S20
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AUS 2012 0.326 2.2 4.4 2 1.24 5.5
2 AUS 2014 0.337 2.2 4.3 2 1.34 5.7
3 AUS 2016 0.33 2.1 4.3 2.1 1.26 5.5
4 AUT 2007 0.284 2 3.6 1.8 1 4.4
5 AUT 2008 0.281 1.9 3.4 1.8 1 4.3
6 AUT 2009 0.289 2 3.6 1.8 1.03 4.5
7 AUT 2010 0.28 1.9 3.5 1.8 0.98 4.3
8 AUT 2011 0.281 1.9 3.5 1.8 0.99 4.4
9 AUT 2012 0.275 2 3.5 1.8 0.96 4.2
10 AUT 2013 0.279 1.9 3.4 1.8 0.99 4.2
# ... with 376 more rows
inequality.oecd.dld %>% pivot_wider(names_from=SUBJECT,
values_from=Value,
id_cols=c(LOCATION,TIME)) ->
inequality.oecd
inequality.oecd
# A tibble: 386 x 8
LOCATION TIME P50P10 P90P10 P90P50 GINI S80S20 PALMA
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AUS 2012 2.2 4.4 2 0.326 5.5 1.24
2 AUS 2014 2.2 4.3 2 0.337 5.7 1.34
3 AUS 2016 2.1 4.3 2.1 0.33 5.5 1.26
4 AUT 2007 2 3.6 1.8 0.284 4.4 1
5 AUT 2008 1.9 3.4 1.8 0.281 4.3 1
6 AUT 2009 2 3.6 1.8 0.289 4.5 1.03
7 AUT 2010 1.9 3.5 1.8 0.28 4.3 0.98
8 AUT 2011 1.9 3.5 1.8 0.281 4.4 0.99
9 AUT 2012 2 3.5 1.8 0.275 4.2 0.96
10 AUT 2013 1.9 3.4 1.8 0.279 4.2 0.99
# ... with 376 more rows
- R file: reshaping-to-wide-with-tidyr.R
- Rmarkdown file: reshaping-to-wide-with-tidyr.Rmd
- Jupyter notebook file: reshaping-to-wide-with-tidyr.ipynb
- Interactive version of the Jupyter notebook (shuts down after 60s):
- Interactive version of the Jupyter notebook (sign in required):