Second Weekend straight, obsessed with R — Part III

Copy and paste the following code to your R Studio platform or R version:

# ——— Data Frames and Matrices ———————- #
# In general, data frames are used to store 2-dimensional data
set.seed(100)
a <- c(1:17, NA)
a
b <- c(“a”, “b”, “c”, “d”, NA, “f”, “a”, “b”, “c”, “d”, NA, “f”,
“a”, “b”, “c”, “d”, NA, “f”)
b
c <- factor(sample(c(“red”, “blue”, “green”), 18, replace = TRUE))
c

df1 <- data.frame(v1 = a, v2 = b, v3 = c)
df1
typeof(df1)
class(df1)

dim(df1)
nrow(df1)
ncol(df1)
colnames(df1)
colnames(df1) <- c(“A”, “B”, “C”)
colnames(df1)

head(df1)
tail(df1, 3)
View(df1)

# It has now been converted into a matrix
mat <- t(df1)
mat
class(mat)

summary(df1)
str(df1)

df1$B <- as.character(df1$B) # or
# df1$B <- as.character(df1[ ,2])
# df1$B <- as.character(df1[ ,”B”])
str(df1)

# Selecting multiple columns
df1[ ,c(1,2)] # or
df1[ ,c(“A”,”C”)]

# Selecting multiple rows
df1[c(1:5), c(1,2)]

df1[which(df1$A >= 12), ] # or

index <- which(df1$A > 8)
index
df1[index, ] # or

df1[df1$A > 9, ] # or
subset(df1, select = c(1,2), subset = A >= 10)

# Eliminating missing values
df2 <- na.omit(df1)
df2
df1

index <- !is.na(df1$A)
index
# now, the first column is without NA’s
df1[index, ] # or
df1[!is.na(df1$A), ]

# ———— A mini challenge —————– #
df1 <- data.frame(a, b, c, stringsAsFactors = TRUE)
df1
str(df1)

# Now, remove all missing values and contain only those rows that
# contain either “green” or “blue” in column C
index <- df1$c %in% c(“green”, “blue”)
index
df2 <- df1[index, ]
df2
df2 <- na.omit(df2)
df2

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google+ photo

You are commenting using your Google+ account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s