Slides and example for third session

This commit is contained in:
2024-06-07 13:47:03 +02:00
parent 102834032c
commit f1f7f35988
25 changed files with 1388 additions and 0 deletions
@@ -0,0 +1,41 @@
# codebook_generation_01.R
#
# Code generation example
#
# Input: results/data_rdm-ms-ss2024_cleaned.RData
# Output:
#
# created: 2024-06-04
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
load("data/results/data_rdm-ms-ss2024_cleaned.RData")
codebook <- data.frame(var_name = names(dat),
var_text = c("Response Id", "Please indicate your sex.",
"How old are you? Please enter your age in years.",
"Have you ever published data in a repository?",
"How many of your data sets have you published so far?",
"All my analyses are preregistered",
"Sharing my data is very important to me",
"I invest more time in research data management than my colleagues",
"I think research data management is overrated",
"Sharing data is bad scientific practice",
"What is your current career level?",
"How long have you been working in science (in years)?")
)
codebook$type <- sapply(dat, class)
codebook$n <- sapply(dat, length)
codebook$mean <- sapply(dat, function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
write.table(codebook,
file = "data/codebook/codebook_01.csv",
na = "",
sep = ";",
quote = FALSE)
openxlsx::write.xlsx(codebook, file = "data/codebook/codebook_01.xlsx")
@@ -0,0 +1,13 @@
var_name;var_text;type;n;mean;sd
1;ResponseId;Response Id;factor;13;;
2;age;Please indicate your sex.;integer;13;29.6923076923077;5.99144689515278
3;sex;How old are you? Please enter your age in years.;factor;13;;
4;data_sharing_1;Have you ever published data in a repository?;factor;13;;
5;data_sharing_2;How many of your data sets have you published so far?;numeric;13;2.30769230769231;1.65250392761083
6;rdm_stmnt_1;All my analyses are preregistered;numeric;13;4.15384615384615;1.14354374979373
7;rdm_stmnt_2;Sharing my data is very important to me;numeric;13;4;
8;rdm_stmnt_3;I invest more time in research data management than my colleagues;numeric;13;2.84615384615385;0.800640769025436
9;rdm_stmnt_4;I think research data management is overrated;numeric;13;1.15384615384615;0.375533808099405
10;rdm_stmnt_5;Sharing data is bad scientific practice;numeric;13;1.15384615384615;0.375533808099405
11;career_level_1;What is your current career level?;factor;13;;
12;career_level_2;How long have you been working in science (in years)?;numeric;13;6.26923076923077;10.1788493632126
1 var_name;var_text;type;n;mean;sd
2 1;ResponseId;Response Id;factor;13;;
3 2;age;Please indicate your sex.;integer;13;29.6923076923077;5.99144689515278
4 3;sex;How old are you? Please enter your age in years.;factor;13;;
5 4;data_sharing_1;Have you ever published data in a repository?;factor;13;;
6 5;data_sharing_2;How many of your data sets have you published so far?;numeric;13;2.30769230769231;1.65250392761083
7 6;rdm_stmnt_1;All my analyses are preregistered;numeric;13;4.15384615384615;1.14354374979373
8 7;rdm_stmnt_2;Sharing my data is very important to me;numeric;13;4;
9 8;rdm_stmnt_3;I invest more time in research data management than my colleagues;numeric;13;2.84615384615385;0.800640769025436
10 9;rdm_stmnt_4;I think research data management is overrated;numeric;13;1.15384615384615;0.375533808099405
11 10;rdm_stmnt_5;Sharing data is bad scientific practice;numeric;13;1.15384615384615;0.375533808099405
12 11;career_level_1;What is your current career level?;factor;13;;
13 12;career_level_2;How long have you been working in science (in years)?;numeric;13;6.26923076923077;10.1788493632126
@@ -0,0 +1,68 @@
# codebook_generation_01.R
#
# Code generation example
#
# Input: results/data_rdm-ms-ss2024_cleaned.RData
# Output:
#
# created: 2024-06-04
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
dat <- as.data.frame(haven::read_spss("data/rawdata/RDM_MS_SS2024_download_2024-06-04.sav"))
## Fix data_sharing_2
dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1
# Look at attributes
attributes(dat$sex)
# Create codebook with survey questions
codebook <- data.frame(variable = names(dat),
label = sapply(dat, function(x) attr(x, "label")))
# Clean up data frame
dat <- as.data.frame(lapply(dat, sjlabelled::unlabel))
sapply(dat, class) # Look at classes
dat$age <- as.numeric(dat$age)
dat$career_level_2 <- as.numeric(dat$career_level_2)
dat$data_sharing_2 <- as.numeric(dat$data_sharing_2)
dat$sex <- factor(dat$sex,
levels = 1:4,
labels = names(attr(dat$sex, "labels")))
dat$data_sharing_1 <- factor(dat$data_sharing_1,
levels = 1:2,
labels = names(attr(dat$data_sharing_1, "labels")))
dat$career_level_1 <- factor(dat$career_level_1,
levels = 1:6,
labels = names(attr(dat$career_level_1, "labels")))
# Add descriptive statistics to codebook
codebook$n <- sapply(dat, length)
codebook$type <- sapply(dat, class)
codebook$mean <- sapply(dat, function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
# props <- function(x) {
# if (is.factor(x)) {
# proportions(summary(x))
# } else {
# NA
# }
# }
#
# codebook$prop <- lapply(dat, props)
write.table(codebook,
file = "data/codebook/codebook_02.csv",
na = "",
sep = ";",
quote = FALSE)
openxlsx::write.xlsx(codebook, file = "data/codebook/codebook_02.xlsx")
@@ -0,0 +1,13 @@
variable;label;n;type;mean;sd
ResponseId;ResponseId;Response ID;13;character;;
age;age;How old are you? Please enter your age in years.;13;numeric;29.6923076923077;5.99144689515278
sex;sex;Please indicate your sex.;13;factor;;
data_sharing_1;data_sharing_1;Have you ever published data in a repository?;13;factor;;
data_sharing_2;data_sharing_2;How many of your data sets have you published so far?;13;numeric;1.38461538461538;1.85015591858549
rdm_stmnt_1;rdm_stmnt_1;Please indicate how much you agree with the following statements - All my analyses are preregistered;13;numeric;4.15384615384615;1.14354374979373
rdm_stmnt_2;rdm_stmnt_2;Please indicate how much you agree with the following statements - Sharing my data is very important to me;13;numeric;4;
rdm_stmnt_3;rdm_stmnt_3;Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues;13;numeric;2.84615384615385;0.800640769025436
rdm_stmnt_4;rdm_stmnt_4;Please indicate how much you agree with the following statements - I think research data management is overrated;13;numeric;1.84615384615385;0.987096233585649
rdm_stmnt_5;rdm_stmnt_5;Please indicate how much you agree with the following statements - Sharing data is bad scientific practice;13;numeric;1.15384615384615;0.375533808099405
career_level_1;career_level_1;What is your current career level?;13;factor;;
career_level_2;career_level_2;How long have you been working in science (in years)?;13;numeric;6.26923076923077;10.1788493632126
1 variable;label;n;type;mean;sd
2 ResponseId;ResponseId;Response ID;13;character;;
3 age;age;How old are you? Please enter your age in years.;13;numeric;29.6923076923077;5.99144689515278
4 sex;sex;Please indicate your sex.;13;factor;;
5 data_sharing_1;data_sharing_1;Have you ever published data in a repository?;13;factor;;
6 data_sharing_2;data_sharing_2;How many of your data sets have you published so far?;13;numeric;1.38461538461538;1.85015591858549
7 rdm_stmnt_1;rdm_stmnt_1;Please indicate how much you agree with the following statements - All my analyses are preregistered;13;numeric;4.15384615384615;1.14354374979373
8 rdm_stmnt_2;rdm_stmnt_2;Please indicate how much you agree with the following statements - Sharing my data is very important to me;13;numeric;4;
9 rdm_stmnt_3;rdm_stmnt_3;Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues;13;numeric;2.84615384615385;0.800640769025436
10 rdm_stmnt_4;rdm_stmnt_4;Please indicate how much you agree with the following statements - I think research data management is overrated;13;numeric;1.84615384615385;0.987096233585649
11 rdm_stmnt_5;rdm_stmnt_5;Please indicate how much you agree with the following statements - Sharing data is bad scientific practice;13;numeric;1.15384615384615;0.375533808099405
12 career_level_1;career_level_1;What is your current career level?;13;factor;;
13 career_level_2;career_level_2;How long have you been working in science (in years)?;13;numeric;6.26923076923077;10.1788493632126
@@ -0,0 +1,10 @@
#' ---
#' title: Codebook for Data Set "RDM MS SS 2024"
#' author: Nora Wickelmaier
#' ---
#+ echo = FALSE
dat <- haven::read_spss("../rawdata/RDM_MS_SS2024_download_2024-06-04.sav")
codebook::codebook(dat)
@@ -0,0 +1,94 @@
###############################################################################
This file contains an overview of the variables from a toy data set collected
at the methods seminar SS 2024. The raw data contain in
"RDM_MS_SS2024_download_2024-06-03_v1.csv" contain additional variables
created by Qualtrics. The variables have been preprocessed and are stored in
"data_rdm-ms-ss2024_cleaned.csv".
###############################################################################
ResponseId. <Qualtrics ID of subject>
-------------------------------------------------------------------------------
random sequence of numbers, letters, and underscore
sex. Please indicate your sex.
-------------------------------------------------------------------------------
-1. m
-2. f
-3. d
-4. not indicated
age. How old are you? Please enter your age in years.
-------------------------------------------------------------------------------
numerical input
data_sharing_1. Have you ever published data in a repository?
-------------------------------------------------------------------------------
-1. No
-2. Yes
data_sharing_2. How many of your data sets have you published so far?
-------------------------------------------------------------------------------
numerical input
rdm_stmnt. Please indicate how much you agree with the following statements:
rdm_stmnt_1. All my analyses are preregistered
-------------------------------------------------------------------------------
-1. Strongly disagree
-2. Disagree
-3. Neither agree nor disagree
-4. Agree
-5. Strongly agree
rdm_stmnt_2. Sharing my data is very important to me
-------------------------------------------------------------------------------
-1. Strongly disagree
-2. Disagree
-3. Neither agree nor disagree
-4. Agree
-5. Strongly agree
rdm_stmnt_3. I invest more time in research data management than my colleagues
-------------------------------------------------------------------------------
-1. Strongly disagree
-2. Disagree
-3. Neither agree nor disagree
-4. Agree
-5. Strongly agree
rdm_stmnt_4. I think research data management is overrated
-------------------------------------------------------------------------------
-1. Strongly disagree
-2. Disagree
-3. Neither agree nor disagree
-4. Agree
-5. Strongly agree
rdm_stmnt_5. Sharing data is bad scientific practice
-------------------------------------------------------------------------------
-1. Strongly disagree
-2. Disagree
-3. Neither agree nor disagree
-4. Agree
-5. Strongly agree
career_level_1. What is your current career level?
-------------------------------------------------------------------------------
-1. Student
-2. PhD student
-3. Postdoc
-4. Senior researcher
-5 Professor
-6. Other
career_level_2. How long have you been working in science (in years)?
-------------------------------------------------------------------------------
numerical input