Slides and example for third session
This commit is contained in:
@@ -0,0 +1,78 @@
|
||||
# 01_preprocessing.R
|
||||
#
|
||||
# Cleaning up data for toy data set Methods Seminar SS2024
|
||||
#
|
||||
# Input: RDM_MS_SS2024_download_2024-06-07.csv
|
||||
# Output: results/data_rdm-ms-ss2024_cleaned.csv
|
||||
# results/data_rdm-ms-ss2024_cleaned.RData
|
||||
#
|
||||
# created: 2024-06-03
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
|
||||
|
||||
dat <- read.table("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv",
|
||||
sep = ",", skip = 3, stringsAsFactors = TRUE, na.string = "")
|
||||
|
||||
names(dat) <-
|
||||
readLines("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv", 1) |>
|
||||
strsplit(split = ",") |>
|
||||
unlist()
|
||||
|
||||
# Clean up variables
|
||||
dat$ResponseId <- factor(dat$ResponseId)
|
||||
|
||||
dat$sex <- factor(dat$sex,
|
||||
levels = c("m", "f", "d", "not indicated"))
|
||||
|
||||
dat$data_sharing_1 <- factor(dat$data_sharing_1,
|
||||
levels = c("No", "Yes"))
|
||||
|
||||
dat$career_level_1 <- factor(dat$career_level_1,
|
||||
levels = c("Student", "PhD student", "Postdoc",
|
||||
"Senior researcher", "Professor",
|
||||
"Other"))
|
||||
|
||||
dat$rdm_stmnt_1 <- factor(dat$rdm_stmnt_1,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_2 <- factor(dat$rdm_stmnt_2,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_3 <- factor(dat$rdm_stmnt_3,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_4 <- factor(dat$rdm_stmnt_4,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_5 <- factor(dat$rdm_stmnt_5,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
## Fix data_sharing_2
|
||||
dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1
|
||||
dat$data_sharing_2 <- as.numeric(dat$data_sharing_2)
|
||||
|
||||
# Create numeric statement variables
|
||||
|
||||
dat$rdm_stmnt_1 <- as.numeric(dat$rdm_stmnt_1)
|
||||
dat$rdm_stmnt_2 <- as.numeric(dat$rdm_stmnt_2)
|
||||
dat$rdm_stmnt_3 <- as.numeric(dat$rdm_stmnt_3)
|
||||
dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_5)
|
||||
dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_4)
|
||||
dat$rdm_stmnt_5 <- as.numeric(dat$rdm_stmnt_5)
|
||||
|
||||
# Save cleaned data set
|
||||
write.table(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.csv", sep = ";",
|
||||
row.names = FALSE, quote = FALSE)
|
||||
|
||||
save(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.RData")
|
||||
|
||||
Reference in New Issue
Block a user