Question 1 - count number of zeroes with a for loop
vector <- rbinom(n=20, size=0:5, prob=0.5)
counter <- 0
# as a for loop
for (i in seq_along(vector)) {
if (vector[i]==0) {
counter <- counter+1 }
}
print(counter)
## [1] 4
Question 2 - count number of zeroes with subsetting
length(vector[vector==0])
## [1] 4
Question 3 - make_matrix function
##############
# function: make_matrix
# makes a matrix with where values are row x column
# input: n_row is number of rows desired
# n_col is number of columns desired
# output: a matrix with number of rows = n_row and number of columns = n_col and each position equals row number x column number
#---------
make_matrix <- function(n_row=runif(1, 1, 10), n_col=runif(1, 1, 10)) {
new_matrix <- matrix(nrow=n_row, ncol=n_col)
for (i in 1:nrow(new_matrix)) {
for (j in 1:ncol(new_matrix)) {
new_matrix[i,j] <- i*j
}
}
print(new_matrix)
}
#####################
make_matrix()
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 1 2 3 4 5 6 7 8 9
## [2,] 2 4 6 8 10 12 14 16 18
## [3,] 3 6 9 12 15 18 21 24 27
## [4,] 4 8 12 16 20 24 28 32 36
## [5,] 5 10 15 20 25 30 35 40 45
Question 4 - Randomization test
names <- c("group1", "group2", "group3")
var1 <- rnorm(n=10,mean=1)
var2 <- rnorm(n=10,mean=2)
var3 <- rnorm(n=10,mean=3)
df <- data.frame(group=rep(names, each=length(var1)), response_var=c(var1, var2, var3))
###############
# function: shuffle_means
# rerun three normal distributions with given observations and means, sd=1
# input: n_obs number of observations
# means is vector of means to use
# output: a vector with the means of each group
#----------
shuffle_means <- function(n_obs=10, means=c(1,2,3)) {
v1 <- rnorm(n=n_obs,mean=means[1])
v2 <- rnorm(n=n_obs,mean=means[2])
v3 <- rnorm(n=n_obs,mean=means[3])
averages <- c(mean(v1), mean(v2), mean(v3))
return(averages)
}
#####################
# testing the function
shuffle_means()
## [1] 1.329321 1.595706 2.714350
# repeat the function 100 times and put it into a dataframe
df2 <- data.frame(replicate=NULL, group1=NULL, group2=NULL, group3=NULL)
for (i in 1:100) {
new_data <- shuffle_means()
df2 <- rbind(df2, list(replicate=i, group1=new_data[1], group2=new_data[2], group3=new_data[3]))
}
# graphing the results
plot1 <- ggplot(df2, aes(x=group1)) +
geom_histogram(color="black", fill="cornflowerblue") +
geom_vline(xintercept=c(mean(df2$group1)), linetype="dashed", color="orange") +
xlim(0,5) +
ggtitle(paste0("For group 1, average mean is ", round(mean(df2$group1), digits=3),", specified mean is 1"))
plot2 <- ggplot(df2, aes(x=group2)) +
geom_histogram(color="black", fill="lightblue") +
geom_vline(xintercept=c(mean(df2$group2)), linetype="dashed", color="orange") +
xlim(0,5) +
ggtitle(paste0("For group 2, average mean is ", round(mean(df2$group2), digits=3),", specified mean is 2"))
plot3 <- ggplot(df2, aes(x=group3)) +
geom_histogram(color="black", fill="lavender") +
geom_vline(xintercept=c(mean(df2$group3)), linetype="dashed", color="orange") +
xlim(0,5) +
ggtitle(paste0("For group 3, average mean is ", round(mean(df2$group3), digits=3),", specified mean is 3"))
grid.arrange(plot1, plot2, plot3)

# The mean for these 100 randomizations isn't always the specified mean. Can see how the vertical line isn't always at the specified mean