Reshaping data to long format with tidyr¶
The following makes use of the packages tidyr and readr. You may need to
install them from CRAN using the code
install.packages(c("tidyr","readr"))
if you want to run this on your computer. (The packages are already installed
on the notebook container, however.)
# Inspecting the file
substr(readLines("gini-oecd.tsv",n=5),start=1,stop=50)
[1] "Data table for: Income inequality, Gini coefficien"
[2] "Location \t 2007\t2008\t 2009\t2010\t2011\t2012\t2013\t201"
[3] "Australia\t\t\t\t\t\t0.326\t\t0.337\t\t0.330\t"
[4] "Austria\t0.284\t0.281\t0.289\t0.280\t0.281\t0.275\t0.279\t"
[5] "Belgium\t0.277\t0.266\t0.272\t0.267\t0.270\t0.265\t0.265\t"
library(readr)
gini.oecd <- read_tsv("gini-oecd.tsv",
skip=1)
-- Column specification --------------------------------------------------------
cols(
Location = col_character(),
`2007` = col_double(),
`2008` = col_double(),
`2009` = col_double(),
`2010` = col_double(),
`2011` = col_double(),
`2012` = col_double(),
`2013` = col_double(),
`2014` = col_double(),
`2015` = col_double(),
`2016` = col_double(),
`2017` = col_double()
)
gini.oecd
# A tibble: 42 x 12
Location `2007` `2008` `2009` `2010` `2011` `2012` `2013` `2014` `2015`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Australia NA NA NA NA NA 0.326 NA 0.337 NA
2 Austria 0.284 0.281 0.289 0.28 0.281 0.275 0.279 0.274 0.276
3 Belgium 0.277 0.266 0.272 0.267 0.27 0.265 0.265 0.266 0.268
4 Brazil NA NA 0.485 NA 0.483 NA 0.47 NA NA
5 Canada 0.317 0.315 0.316 0.316 0.313 0.317 0.32 0.313 0.318
6 Chile NA NA 0.48 NA 0.471 NA 0.465 NA 0.454
7 China (People~ NA NA NA NA 0.514 NA NA NA NA
8 Costa Rica NA NA NA 0.472 0.48 0.483 0.494 0.485 0.479
9 Czech Republic 0.256 0.259 0.257 0.259 0.257 0.253 0.259 0.257 0.258
10 Denmark NA NA NA NA 0.251 0.249 0.254 0.256 0.263
# ... with 32 more rows, and 2 more variables: `2016` <dbl>, `2017` <dbl>
library(tidyr)
gini.oecd %>% gather(`2007`,`2008`,`2009`,`2010`,`2011`,`2012`,
`2013`,`2014`,`2015`,`2016`,`2017`,
key="year",value="gini") -> gini.oecd.long
gini.oecd.long
# A tibble: 462 x 3
Location year gini
<chr> <chr> <dbl>
1 Australia 2007 NA
2 Austria 2007 0.284
3 Belgium 2007 0.277
4 Brazil 2007 NA
5 Canada 2007 0.317
6 Chile 2007 NA
7 China (People's Republic of) 2007 NA
8 Costa Rica 2007 NA
9 Czech Republic 2007 0.256
10 Denmark 2007 NA
# ... with 452 more rows
gini.oecd %>% gather(-Location,
key="year",value="gini") -> gini.oecd.long
gini.oecd.long
# A tibble: 462 x 3
Location year gini
<chr> <chr> <dbl>
1 Australia 2007 NA
2 Austria 2007 0.284
3 Belgium 2007 0.277
4 Brazil 2007 NA
5 Canada 2007 0.317
6 Chile 2007 NA
7 China (People's Republic of) 2007 NA
8 Costa Rica 2007 NA
9 Czech Republic 2007 0.256
10 Denmark 2007 NA
# ... with 452 more rows
gini.oecd %>% pivot_longer(-Location,
names_to="year",
values_to="gini") -> gini.oecd.long
gini.oecd.long
# A tibble: 462 x 3
Location year gini
<chr> <chr> <dbl>
1 Australia 2007 NA
2 Australia 2008 NA
3 Australia 2009 NA
4 Australia 2010 NA
5 Australia 2011 NA
6 Australia 2012 0.326
7 Australia 2013 NA
8 Australia 2014 0.337
9 Australia 2015 NA
10 Australia 2016 0.33
# ... with 452 more rows
- R file: reshaping-to-long-with-tidyr.R
- Rmarkdown file: reshaping-to-long-with-tidyr.Rmd
- Jupyter notebook file: reshaping-to-long-with-tidyr.ipynb
- Interactive version of the Jupyter notebook (shuts down after 60s):
- Interactive version of the Jupyter notebook (sign in required):