Reshaping data to wide format with tidyr

The following makes use of the packages tidyr and readr. You may need to install them from CRAN using the code install.packages(c("tidyr","readr")) if you want to run this on your computer. (The packages are already installed on the notebook container, however.)

substr(readLines("inequality-oecd-downloaded.csv",n=5),
       start=1,stop=40)
[1] "\357\273\277\"LOCATION\",\"INDICATOR\",\"SUBJECT\",\"MEA"
[2] "\"AUS\",\"INCOMEINEQ\",\"P50P10\",\"RT\",\"A\",\"20"     
[3] "\"AUS\",\"INCOMEINEQ\",\"P50P10\",\"RT\",\"A\",\"20"     
[4] "\"AUS\",\"INCOMEINEQ\",\"P50P10\",\"RT\",\"A\",\"20"     
[5] "\"AUS\",\"INCOMEINEQ\",\"P90P10\",\"RT\",\"A\",\"20"     
library(readr)
inequality.oecd.dld <- read_csv("inequality-oecd-downloaded.csv")
-- Column specification --------------------------------------------------------
cols(
  LOCATION = col_character(),
  INDICATOR = col_character(),
  SUBJECT = col_character(),
  MEASURE = col_character(),
  FREQUENCY = col_character(),
  TIME = col_double(),
  Value = col_double(),
  `Flag Codes` = col_character()
)
inequality.oecd.dld
# A tibble: 2,315 x 8
   LOCATION INDICATOR  SUBJECT MEASURE FREQUENCY  TIME Value `Flag Codes`
   <chr>    <chr>      <chr>   <chr>   <chr>     <dbl> <dbl> <chr>       
 1 AUS      INCOMEINEQ P50P10  RT      A          2012 2.2   NA          
 2 AUS      INCOMEINEQ P50P10  RT      A          2014 2.2   NA          
 3 AUS      INCOMEINEQ P50P10  RT      A          2016 2.1   NA          
 4 AUS      INCOMEINEQ P90P10  RT      A          2012 4.4   NA          
 5 AUS      INCOMEINEQ P90P10  RT      A          2014 4.3   NA          
 6 AUS      INCOMEINEQ P90P10  RT      A          2016 4.3   NA          
 7 AUS      INCOMEINEQ P90P50  RT      A          2012 2     NA          
 8 AUS      INCOMEINEQ P90P50  RT      A          2014 2     NA          
 9 AUS      INCOMEINEQ P90P50  RT      A          2016 2.1   NA          
10 AUS      INCOMEINEQ GINI    INEQ    A          2012 0.326 NA          
# ... with 2,305 more rows
library(tidyr)
inequality.oecd.dld %>% spread(key="SUBJECT",value="Value") ->
                                               inequality.oecd
inequality.oecd[-c(2,4,6)]
# A tibble: 771 x 9
   LOCATION MEASURE  TIME   GINI P50P10 P90P10 P90P50 PALMA S80S20
   <chr>    <chr>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>
 1 AUS      INEQ     2012  0.326   NA     NA     NA   NA      NA  
 2 AUS      INEQ     2014  0.337   NA     NA     NA   NA      NA  
 3 AUS      INEQ     2016  0.33    NA     NA     NA   NA      NA  
 4 AUS      RT       2012 NA        2.2    4.4    2    1.24    5.5
 5 AUS      RT       2014 NA        2.2    4.3    2    1.34    5.7
 6 AUS      RT       2016 NA        2.1    4.3    2.1  1.26    5.5
 7 AUT      INEQ     2007  0.284   NA     NA     NA   NA      NA  
 8 AUT      INEQ     2008  0.281   NA     NA     NA   NA      NA  
 9 AUT      INEQ     2009  0.289   NA     NA     NA   NA      NA  
10 AUT      INEQ     2010  0.28    NA     NA     NA   NA      NA  
# ... with 761 more rows
library(dplyr)
Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
inequality.oecd.sub <- select(inequality.oecd.dld,
                              LOCATION,SUBJECT,TIME,Value)
inequality.oecd.sub
# A tibble: 2,315 x 4
   LOCATION SUBJECT  TIME Value
   <chr>    <chr>   <dbl> <dbl>
 1 AUS      P50P10   2012 2.2  
 2 AUS      P50P10   2014 2.2  
 3 AUS      P50P10   2016 2.1  
 4 AUS      P90P10   2012 4.4  
 5 AUS      P90P10   2014 4.3  
 6 AUS      P90P10   2016 4.3  
 7 AUS      P90P50   2012 2    
 8 AUS      P90P50   2014 2    
 9 AUS      P90P50   2016 2.1  
10 AUS      GINI     2012 0.326
# ... with 2,305 more rows
inequality.oecd.sub %>% spread(key=SUBJECT,
                               value=Value) -> inequality.oecd
inequality.oecd
# A tibble: 386 x 8
   LOCATION  TIME  GINI P50P10 P90P10 P90P50 PALMA S80S20
   <chr>    <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>
 1 AUS       2012 0.326    2.2    4.4    2    1.24    5.5
 2 AUS       2014 0.337    2.2    4.3    2    1.34    5.7
 3 AUS       2016 0.33     2.1    4.3    2.1  1.26    5.5
 4 AUT       2007 0.284    2      3.6    1.8  1       4.4
 5 AUT       2008 0.281    1.9    3.4    1.8  1       4.3
 6 AUT       2009 0.289    2      3.6    1.8  1.03    4.5
 7 AUT       2010 0.28     1.9    3.5    1.8  0.98    4.3
 8 AUT       2011 0.281    1.9    3.5    1.8  0.99    4.4
 9 AUT       2012 0.275    2      3.5    1.8  0.96    4.2
10 AUT       2013 0.279    1.9    3.4    1.8  0.99    4.2
# ... with 376 more rows
inequality.oecd.dld %>% pivot_wider(names_from=SUBJECT,
                                    values_from=Value,
                                    id_cols=c(LOCATION,TIME)) ->
                                                inequality.oecd
inequality.oecd
# A tibble: 386 x 8
   LOCATION  TIME P50P10 P90P10 P90P50  GINI S80S20 PALMA
   <chr>    <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl> <dbl>
 1 AUS       2012    2.2    4.4    2   0.326    5.5  1.24
 2 AUS       2014    2.2    4.3    2   0.337    5.7  1.34
 3 AUS       2016    2.1    4.3    2.1 0.33     5.5  1.26
 4 AUT       2007    2      3.6    1.8 0.284    4.4  1   
 5 AUT       2008    1.9    3.4    1.8 0.281    4.3  1   
 6 AUT       2009    2      3.6    1.8 0.289    4.5  1.03
 7 AUT       2010    1.9    3.5    1.8 0.28     4.3  0.98
 8 AUT       2011    1.9    3.5    1.8 0.281    4.4  0.99
 9 AUT       2012    2      3.5    1.8 0.275    4.2  0.96
10 AUT       2013    1.9    3.4    1.8 0.279    4.2  0.99
# ... with 376 more rows