Slides and example for third session
This commit is contained in:
Binary file not shown.
@@ -0,0 +1,41 @@
|
||||
# codebook_generation_01.R
|
||||
#
|
||||
# Code generation example
|
||||
#
|
||||
# Input: results/data_rdm-ms-ss2024_cleaned.RData
|
||||
# Output:
|
||||
#
|
||||
# created: 2024-06-04
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
|
||||
|
||||
load("data/results/data_rdm-ms-ss2024_cleaned.RData")
|
||||
|
||||
codebook <- data.frame(var_name = names(dat),
|
||||
var_text = c("Response Id", "Please indicate your sex.",
|
||||
"How old are you? Please enter your age in years.",
|
||||
"Have you ever published data in a repository?",
|
||||
"How many of your data sets have you published so far?",
|
||||
"All my analyses are preregistered",
|
||||
"Sharing my data is very important to me",
|
||||
"I invest more time in research data management than my colleagues",
|
||||
"I think research data management is overrated",
|
||||
"Sharing data is bad scientific practice",
|
||||
"What is your current career level?",
|
||||
"How long have you been working in science (in years)?")
|
||||
|
||||
)
|
||||
|
||||
codebook$type <- sapply(dat, class)
|
||||
codebook$n <- sapply(dat, length)
|
||||
codebook$mean <- sapply(dat, function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
|
||||
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
|
||||
|
||||
write.table(codebook,
|
||||
file = "data/codebook/codebook_01.csv",
|
||||
na = "",
|
||||
sep = ";",
|
||||
quote = FALSE)
|
||||
|
||||
openxlsx::write.xlsx(codebook, file = "data/codebook/codebook_01.xlsx")
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
var_name;var_text;type;n;mean;sd
|
||||
1;ResponseId;Response Id;factor;13;;
|
||||
2;age;Please indicate your sex.;integer;13;29.6923076923077;5.99144689515278
|
||||
3;sex;How old are you? Please enter your age in years.;factor;13;;
|
||||
4;data_sharing_1;Have you ever published data in a repository?;factor;13;;
|
||||
5;data_sharing_2;How many of your data sets have you published so far?;numeric;13;2.30769230769231;1.65250392761083
|
||||
6;rdm_stmnt_1;All my analyses are preregistered;numeric;13;4.15384615384615;1.14354374979373
|
||||
7;rdm_stmnt_2;Sharing my data is very important to me;numeric;13;4;
|
||||
8;rdm_stmnt_3;I invest more time in research data management than my colleagues;numeric;13;2.84615384615385;0.800640769025436
|
||||
9;rdm_stmnt_4;I think research data management is overrated;numeric;13;1.15384615384615;0.375533808099405
|
||||
10;rdm_stmnt_5;Sharing data is bad scientific practice;numeric;13;1.15384615384615;0.375533808099405
|
||||
11;career_level_1;What is your current career level?;factor;13;;
|
||||
12;career_level_2;How long have you been working in science (in years)?;numeric;13;6.26923076923077;10.1788493632126
|
||||
|
Binary file not shown.
@@ -0,0 +1,68 @@
|
||||
# codebook_generation_01.R
|
||||
#
|
||||
# Code generation example
|
||||
#
|
||||
# Input: results/data_rdm-ms-ss2024_cleaned.RData
|
||||
# Output:
|
||||
#
|
||||
# created: 2024-06-04
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
|
||||
|
||||
dat <- as.data.frame(haven::read_spss("data/rawdata/RDM_MS_SS2024_download_2024-06-04.sav"))
|
||||
|
||||
## Fix data_sharing_2
|
||||
dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1
|
||||
|
||||
# Look at attributes
|
||||
attributes(dat$sex)
|
||||
|
||||
# Create codebook with survey questions
|
||||
codebook <- data.frame(variable = names(dat),
|
||||
label = sapply(dat, function(x) attr(x, "label")))
|
||||
|
||||
# Clean up data frame
|
||||
dat <- as.data.frame(lapply(dat, sjlabelled::unlabel))
|
||||
|
||||
sapply(dat, class) # Look at classes
|
||||
|
||||
dat$age <- as.numeric(dat$age)
|
||||
dat$career_level_2 <- as.numeric(dat$career_level_2)
|
||||
dat$data_sharing_2 <- as.numeric(dat$data_sharing_2)
|
||||
|
||||
dat$sex <- factor(dat$sex,
|
||||
levels = 1:4,
|
||||
labels = names(attr(dat$sex, "labels")))
|
||||
dat$data_sharing_1 <- factor(dat$data_sharing_1,
|
||||
levels = 1:2,
|
||||
labels = names(attr(dat$data_sharing_1, "labels")))
|
||||
dat$career_level_1 <- factor(dat$career_level_1,
|
||||
levels = 1:6,
|
||||
labels = names(attr(dat$career_level_1, "labels")))
|
||||
|
||||
# Add descriptive statistics to codebook
|
||||
|
||||
codebook$n <- sapply(dat, length)
|
||||
codebook$type <- sapply(dat, class)
|
||||
codebook$mean <- sapply(dat, function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
|
||||
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
|
||||
|
||||
# props <- function(x) {
|
||||
# if (is.factor(x)) {
|
||||
# proportions(summary(x))
|
||||
# } else {
|
||||
# NA
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# codebook$prop <- lapply(dat, props)
|
||||
|
||||
write.table(codebook,
|
||||
file = "data/codebook/codebook_02.csv",
|
||||
na = "",
|
||||
sep = ";",
|
||||
quote = FALSE)
|
||||
|
||||
openxlsx::write.xlsx(codebook, file = "data/codebook/codebook_02.xlsx")
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
variable;label;n;type;mean;sd
|
||||
ResponseId;ResponseId;Response ID;13;character;;
|
||||
age;age;How old are you? Please enter your age in years.;13;numeric;29.6923076923077;5.99144689515278
|
||||
sex;sex;Please indicate your sex.;13;factor;;
|
||||
data_sharing_1;data_sharing_1;Have you ever published data in a repository?;13;factor;;
|
||||
data_sharing_2;data_sharing_2;How many of your data sets have you published so far?;13;numeric;1.38461538461538;1.85015591858549
|
||||
rdm_stmnt_1;rdm_stmnt_1;Please indicate how much you agree with the following statements - All my analyses are preregistered;13;numeric;4.15384615384615;1.14354374979373
|
||||
rdm_stmnt_2;rdm_stmnt_2;Please indicate how much you agree with the following statements - Sharing my data is very important to me;13;numeric;4;
|
||||
rdm_stmnt_3;rdm_stmnt_3;Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues;13;numeric;2.84615384615385;0.800640769025436
|
||||
rdm_stmnt_4;rdm_stmnt_4;Please indicate how much you agree with the following statements - I think research data management is overrated;13;numeric;1.84615384615385;0.987096233585649
|
||||
rdm_stmnt_5;rdm_stmnt_5;Please indicate how much you agree with the following statements - Sharing data is bad scientific practice;13;numeric;1.15384615384615;0.375533808099405
|
||||
career_level_1;career_level_1;What is your current career level?;13;factor;;
|
||||
career_level_2;career_level_2;How long have you been working in science (in years)?;13;numeric;6.26923076923077;10.1788493632126
|
||||
|
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
#' ---
|
||||
#' title: Codebook for Data Set "RDM MS SS 2024"
|
||||
#' author: Nora Wickelmaier
|
||||
#' ---
|
||||
|
||||
#+ echo = FALSE
|
||||
dat <- haven::read_spss("../rawdata/RDM_MS_SS2024_download_2024-06-04.sav")
|
||||
|
||||
codebook::codebook(dat)
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
###############################################################################
|
||||
This file contains an overview of the variables from a toy data set collected
|
||||
at the methods seminar SS 2024. The raw data contain in
|
||||
"RDM_MS_SS2024_download_2024-06-03_v1.csv" contain additional variables
|
||||
created by Qualtrics. The variables have been preprocessed and are stored in
|
||||
"data_rdm-ms-ss2024_cleaned.csv".
|
||||
###############################################################################
|
||||
|
||||
|
||||
ResponseId. <Qualtrics ID of subject>
|
||||
-------------------------------------------------------------------------------
|
||||
random sequence of numbers, letters, and underscore
|
||||
|
||||
|
||||
sex. Please indicate your sex.
|
||||
-------------------------------------------------------------------------------
|
||||
-1. m
|
||||
-2. f
|
||||
-3. d
|
||||
-4. not indicated
|
||||
|
||||
|
||||
age. How old are you? Please enter your age in years.
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
||||
|
||||
|
||||
data_sharing_1. Have you ever published data in a repository?
|
||||
-------------------------------------------------------------------------------
|
||||
-1. No
|
||||
-2. Yes
|
||||
|
||||
|
||||
data_sharing_2. How many of your data sets have you published so far?
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
||||
|
||||
|
||||
rdm_stmnt. Please indicate how much you agree with the following statements:
|
||||
|
||||
rdm_stmnt_1. All my analyses are preregistered
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_2. Sharing my data is very important to me
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_3. I invest more time in research data management than my colleagues
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_4. I think research data management is overrated
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_5. Sharing data is bad scientific practice
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
|
||||
career_level_1. What is your current career level?
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Student
|
||||
-2. PhD student
|
||||
-3. Postdoc
|
||||
-4. Senior researcher
|
||||
-5 Professor
|
||||
-6. Other
|
||||
|
||||
|
||||
career_level_2. How long have you been working in science (in years)?
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
||||
Reference in New Issue
Block a user