# This is accompanying code for
# https://jozef.io/r002-data-manipulation/

# Original data source:
# http://ec.europa.eu/eurostat/web/sector-accounts/data/annual-data
# Important note - the figures are presented
# - in millions € for euro area countries and euro area / EU aggregates,
# - millions of national currency otherwise

# Read Data -------------------------------------------------------------------
gdi <- read.csv(
  stringsAsFactors = FALSE
, url("https://jozef.io/post/data/ESA2010_GDI.csv")
)

# Subset known (not `NA`) value in the year 2016 less than 1 million ----------
rowidx <- !is.na(gdi[, "Y.2016"]) & gdi[, "Y.2016"] < 1000000
gdi[rowidx, , drop = FALSE]

# Subest the first 2 and the 27th row and the 1st, 22nd and 23rd column -------

# Logical vectors TRUE for rows/colums to subset, FALSE for those to omit -----
st1 <- gdi[c(TRUE, TRUE, rep(FALSE, 24), TRUE, rep(FALSE, 8))
           , c(TRUE, rep(FALSE, 20), rep(TRUE, 2))
           , drop = FALSE
           ]

# Numeric vectors of row/column numbers to subset -----------------------------
st2 <- gdi[c(1:2, 27)
           , c(1, 22:23)
           , drop = FALSE
           ]

# Negative numeric vectors of row/column numbers to omit ----------------------
st3 <- gdi[c(-3:-26, -28:-35)
           , c(-2:-21)
           , drop = FALSE
           ]

# Character vectors of row/column names to subset -----------------------------
st4 <- gdi[c("1", "2", "27") # we do not have very meaningful rownames
           , c("country", "Y.2015", "Y.2016")
           , drop = FALSE
           ]

# All of the above give identical results -------------------------------------
identical(st1, st2) && identical(st2, st3) && identical(st3, st4)


# Answers to the Exercises ----------------------------------------------------

# |- Exercise 1. --------------------------------------------------------------
# We can examine with str() and/or class():
str(gdi[3, 3])  # numeric vector of length 1
str(gdi[3, 3, drop = FALSE]) # data.frame with 1 observation in 1 column

# |- Exercise 2. --------------------------------------------------------------
gdi[-3, 3] # all rows but the 3rd and only 3rd column, simplifies to vector
gdi[3, -3] # only 3rd row and all columns but the 3rd, as a data.frame
gdi[-3, 3, drop = FALSE] # all rows but the 3rd and only 3rd column, data.frame

# |- Exercise 3. --------------------------------------------------------------
rowidx <- gdi[, "Y.2016"] < 1000000
rowidx # has NAs, because comparing NA with a number results in NA
gdi[rowidx, c(1, 23), drop = FALSE] # subsetting with NAs is not very nice

# |- Bonus Question 1 ---------------------------------------------------------
identical(gdi[, "Y.2016", drop = FALSE], gdi["Y.2016"])
typeof(gdi) # data.frame is also a list (of its columns),
            # and sub-lists of lists are accessed with []

# |- Bonus Question 2 ---------------------------------------------------------
identical(gdi[, "Y.2016"], gdi[["Y.2016"]])
typeof(gdi) # data.frame is also a list (of its columns),
            # [[]] selects contents of an element of a list