# This is accompanying code for # https://jozef.io/r002-data-manipulation/ # Original data source: # http://ec.europa.eu/eurostat/web/sector-accounts/data/annual-data # Important note - the figures are presented # - in millions € for euro area countries and euro area / EU aggregates, # - millions of national currency otherwise # Read Data ------------------------------------------------------------------- gdi <- read.csv( stringsAsFactors = FALSE , url("https://jozef.io/post/data/ESA2010_GDI.csv") ) # Subset known (not `NA`) value in the year 2016 less than 1 million ---------- rowidx <- !is.na(gdi[, "Y.2016"]) & gdi[, "Y.2016"] < 1000000 gdi[rowidx, , drop = FALSE] # Subest the first 2 and the 27th row and the 1st, 22nd and 23rd column ------- # Logical vectors TRUE for rows/colums to subset, FALSE for those to omit ----- st1 <- gdi[c(TRUE, TRUE, rep(FALSE, 24), TRUE, rep(FALSE, 8)) , c(TRUE, rep(FALSE, 20), rep(TRUE, 2)) , drop = FALSE ] # Numeric vectors of row/column numbers to subset ----------------------------- st2 <- gdi[c(1:2, 27) , c(1, 22:23) , drop = FALSE ] # Negative numeric vectors of row/column numbers to omit ---------------------- st3 <- gdi[c(-3:-26, -28:-35) , c(-2:-21) , drop = FALSE ] # Character vectors of row/column names to subset ----------------------------- st4 <- gdi[c("1", "2", "27") # we do not have very meaningful rownames , c("country", "Y.2015", "Y.2016") , drop = FALSE ] # All of the above give identical results ------------------------------------- identical(st1, st2) && identical(st2, st3) && identical(st3, st4) # Answers to the Exercises ---------------------------------------------------- # |- Exercise 1. -------------------------------------------------------------- # We can examine with str() and/or class(): str(gdi[3, 3]) # numeric vector of length 1 str(gdi[3, 3, drop = FALSE]) # data.frame with 1 observation in 1 column # |- Exercise 2. -------------------------------------------------------------- gdi[-3, 3] # all rows but the 3rd and only 3rd column, simplifies to vector gdi[3, -3] # only 3rd row and all columns but the 3rd, as a data.frame gdi[-3, 3, drop = FALSE] # all rows but the 3rd and only 3rd column, data.frame # |- Exercise 3. -------------------------------------------------------------- rowidx <- gdi[, "Y.2016"] < 1000000 rowidx # has NAs, because comparing NA with a number results in NA gdi[rowidx, c(1, 23), drop = FALSE] # subsetting with NAs is not very nice # |- Bonus Question 1 --------------------------------------------------------- identical(gdi[, "Y.2016", drop = FALSE], gdi["Y.2016"]) typeof(gdi) # data.frame is also a list (of its columns), # and sub-lists of lists are accessed with [] # |- Bonus Question 2 --------------------------------------------------------- identical(gdi[, "Y.2016"], gdi[["Y.2016"]]) typeof(gdi) # data.frame is also a list (of its columns), # [[]] selects contents of an element of a list