79 lines
3.0 KiB
R
79 lines
3.0 KiB
R
|
# 01_preprocessing.R
|
||
|
#
|
||
|
# Cleaning up data for toy data set Methods Seminar SS2024
|
||
|
#
|
||
|
# Input: RDM_MS_SS2024_download_2024-06-07.csv
|
||
|
# Output: results/data_rdm-ms-ss2024_cleaned.csv
|
||
|
# results/data_rdm-ms-ss2024_cleaned.RData
|
||
|
#
|
||
|
# created: 2024-06-03
|
||
|
|
||
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
|
||
|
|
||
|
dat <- read.table("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv",
|
||
|
sep = ",", skip = 3, stringsAsFactors = TRUE, na.string = "")
|
||
|
|
||
|
names(dat) <-
|
||
|
readLines("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv", 1) |>
|
||
|
strsplit(split = ",") |>
|
||
|
unlist()
|
||
|
|
||
|
# Clean up variables
|
||
|
dat$ResponseId <- factor(dat$ResponseId)
|
||
|
|
||
|
dat$sex <- factor(dat$sex,
|
||
|
levels = c("m", "f", "d", "not indicated"))
|
||
|
|
||
|
dat$data_sharing_1 <- factor(dat$data_sharing_1,
|
||
|
levels = c("No", "Yes"))
|
||
|
|
||
|
dat$career_level_1 <- factor(dat$career_level_1,
|
||
|
levels = c("Student", "PhD student", "Postdoc",
|
||
|
"Senior researcher", "Professor",
|
||
|
"Other"))
|
||
|
|
||
|
dat$rdm_stmnt_1 <- factor(dat$rdm_stmnt_1,
|
||
|
levels = c("Strongly disagree", "Disagree",
|
||
|
"Neither agree nor disagree", "Agree",
|
||
|
"Strongly agree"))
|
||
|
|
||
|
dat$rdm_stmnt_2 <- factor(dat$rdm_stmnt_2,
|
||
|
levels = c("Strongly disagree", "Disagree",
|
||
|
"Neither agree nor disagree", "Agree",
|
||
|
"Strongly agree"))
|
||
|
|
||
|
dat$rdm_stmnt_3 <- factor(dat$rdm_stmnt_3,
|
||
|
levels = c("Strongly disagree", "Disagree",
|
||
|
"Neither agree nor disagree", "Agree",
|
||
|
"Strongly agree"))
|
||
|
|
||
|
dat$rdm_stmnt_4 <- factor(dat$rdm_stmnt_4,
|
||
|
levels = c("Strongly disagree", "Disagree",
|
||
|
"Neither agree nor disagree", "Agree",
|
||
|
"Strongly agree"))
|
||
|
|
||
|
dat$rdm_stmnt_5 <- factor(dat$rdm_stmnt_5,
|
||
|
levels = c("Strongly disagree", "Disagree",
|
||
|
"Neither agree nor disagree", "Agree",
|
||
|
"Strongly agree"))
|
||
|
|
||
|
## Fix data_sharing_2
|
||
|
dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1
|
||
|
dat$data_sharing_2 <- as.numeric(dat$data_sharing_2)
|
||
|
|
||
|
# Create numeric statement variables
|
||
|
|
||
|
dat$rdm_stmnt_1 <- as.numeric(dat$rdm_stmnt_1)
|
||
|
dat$rdm_stmnt_2 <- as.numeric(dat$rdm_stmnt_2)
|
||
|
dat$rdm_stmnt_3 <- as.numeric(dat$rdm_stmnt_3)
|
||
|
dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_5)
|
||
|
dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_4)
|
||
|
dat$rdm_stmnt_5 <- as.numeric(dat$rdm_stmnt_5)
|
||
|
|
||
|
# Save cleaned data set
|
||
|
write.table(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.csv", sep = ";",
|
||
|
row.names = FALSE, quote = FALSE)
|
||
|
|
||
|
save(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.RData")
|
||
|
|