# 01_preprocessing.R # # Cleaning up data for toy data set Methods Seminar SS2024 # # Input: RDM_MS_SS2024_download_2024-06-07.csv # Output: results/data_rdm-ms-ss2024_cleaned.csv # results/data_rdm-ms-ss2024_cleaned.RData # # created: 2024-06-03 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/") dat <- read.table("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv", sep = ",", skip = 3, stringsAsFactors = TRUE, na.string = "") names(dat) <- readLines("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv", 1) |> strsplit(split = ",") |> unlist() # Clean up variables dat$ResponseId <- factor(dat$ResponseId) dat$sex <- factor(dat$sex, levels = c("m", "f", "d", "not indicated")) dat$data_sharing_1 <- factor(dat$data_sharing_1, levels = c("No", "Yes")) dat$career_level_1 <- factor(dat$career_level_1, levels = c("Student", "PhD student", "Postdoc", "Senior researcher", "Professor", "Other")) dat$rdm_stmnt_1 <- factor(dat$rdm_stmnt_1, levels = c("Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree")) dat$rdm_stmnt_2 <- factor(dat$rdm_stmnt_2, levels = c("Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree")) dat$rdm_stmnt_3 <- factor(dat$rdm_stmnt_3, levels = c("Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree")) dat$rdm_stmnt_4 <- factor(dat$rdm_stmnt_4, levels = c("Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree")) dat$rdm_stmnt_5 <- factor(dat$rdm_stmnt_5, levels = c("Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree")) ## Fix data_sharing_2 dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1 dat$data_sharing_2 <- as.numeric(dat$data_sharing_2) # Create numeric statement variables dat$rdm_stmnt_1 <- as.numeric(dat$rdm_stmnt_1) dat$rdm_stmnt_2 <- as.numeric(dat$rdm_stmnt_2) dat$rdm_stmnt_3 <- as.numeric(dat$rdm_stmnt_3) dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_5) dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_4) dat$rdm_stmnt_5 <- as.numeric(dat$rdm_stmnt_5) # Save cleaned data set write.table(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.csv", sep = ";", row.names = FALSE, quote = FALSE) save(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.RData")