Slides and example for third session

This commit is contained in:
Nora Wickelmaier 2024-06-07 13:47:03 +02:00
parent 102834032c
commit f1f7f35988
25 changed files with 1388 additions and 0 deletions

View File

@ -0,0 +1,844 @@
\documentclass[aspectratio=169]{beamer}
\usepackage{listings}
\usepackage[utf8,latin1]{inputenc}
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
\addbibresource{../literature/lit.bib}
\usepackage{fancyvrb}
\usepackage{fontawesome5} % get icons
\usepackage{multirow}
\usepackage{color, colortbl}
\usepackage{tikz}
\usetikzlibrary{fit}
\usepackage[edges]{forest}
\lstset{language=R,%
backgroundcolor=\color{iwmgray!15!white},
basicstyle=\ttfamily\color{iwmgray},
frame=none,
commentstyle=\slshape\color{iwmgreen},
keywordstyle=\bfseries\color{iwmgray},
identifierstyle=\color{iwmpurple},
stringstyle=\color{iwmblue},
numbers=none,%left,numberstyle=\tiny,
basewidth={.5em, .4em},
showstringspaces=false,
emphstyle=\color{red!50!white}}
\makeatletter \def\newblock{\beamer@newblock} \makeatother
\beamertemplatenavigationsymbolsempty
\setbeamertemplate{itemize items}[circle]
\setbeamertemplate{section in toc}[circle]
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
\setbeamercolor{block body}{bg=iwmorange!50!white}
\setbeamercolor{block title}{fg=white, bg=iwmorange}
% Definitions for biblatex
\setbeamercolor{bibliography entry note}{fg=iwmgray}
\setbeamercolor{bibliography entry author}{fg=iwmgray}
\setbeamertemplate{bibliography item}{}
\definecolor{iwmorange}{RGB}{255,105,0}
\definecolor{iwmgray}{RGB}{67,79,79}
\definecolor{iwmblue}{RGB}{60,180,220}
\definecolor{iwmgreen}{RGB}{145,200,110}
\definecolor{iwmpurple}{RGB}{120,0,75}
\setbeamercolor{title}{fg=iwmorange}
\setbeamercolor{frametitle}{fg=iwmorange}
\setbeamercolor{structure}{fg=iwmorange}
\setbeamercolor{normal text}{fg=iwmgray}
\setbeamercolor{author}{fg=iwmgray}
\setbeamercolor{date}{fg=iwmgray}
\newcommand{\vect}[1]{\mathbf{#1}}
\newcommand{\mat}[1]{\mathbf{#1}}
\newcommand{\gvect}[1]{\boldsymbol{#1}}
\newcommand{\gmat}[1]{\boldsymbol{#1}}
\AtBeginSection[]{
\frame{
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
\setbeamertemplate{headline}{
\begin{beamercolorbox}{section in head}
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
\end{beamercolorbox}
}
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
\title{Data organisation for effective research data management}
\author{Nora Wickelmaier}
\date{June 10, 2024}
\begin{document}
\begin{frame}{}
\thispagestyle{empty}
\titlepage
\end{frame}
\begin{frame}{Data request}
\begin{center}
\includegraphics[scale = .55]{../figures/email_data_request_2024_01}
\end{center}
\end{frame}
\begin{frame}{Data folder for the data requested}
\begin{center}
\includegraphics[scale = .6]{../figures/email_data_request_2024_03}
\end{center}
\end{frame}
\begin{frame}{What is bad about this data organisation?}
% slido
\centering
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 3}
\url{https://app.sli.do/event/3S1Bn3Tjknuk5J5WiqAYzG}
\end{frame}
\begin{frame}[<+->]{Bad things about this data organisation}
\begin{itemize}
\item Raw and processed data are in the same folder
\item File naming does not sort in a sensible way: Best order would be first
by subject, then by session
\item Data and data scripts are in the same folder
\item Data scripts are not numbered, unclear in what order they need to be
executed
\item There are plot files (PDFs) between the data and code files
\item It is unclear which are the final and processed data files
\item The final data files are not stored in an interoperable format: There
is only an \texttt{.RData} file that (probably) contains the final data
which was used for further analyses
\item There is no documentation whatsoever
\item \dots
\end{itemize}
\end{frame}
\begin{frame}{Topics for this semester}
\centering
\begin{tabular}{ll}
\hline
Date & Topic \\
\hline
2024-05-13 & Introduction to data management \\
2024-05-27 & Workflow \\
\only<1>{2024-06-10}\only<2>{\bf 2024-06-10} & \only<1>{Data organisation}\only<2>{\bf Data organisation}\\
2024-06-24 & Data sharing \\
2024-07-08 & Clean coding \\
2024-07-22 & Version control \\
\hline
\end{tabular}
\end{frame}
% * different data sources
% * content README file
% * best arrangement of data
% * redundancy
% * anonymizing/pseudonymizing data
\section{Folder organisation}
\begin{frame}[<+->]{Some general rules}
\begin{itemize}
\item One project, one folder
\item Add README file at top level
\item Raw data are in a separate folder (and stay separate!)
\item Have a code folder
\item It is often a good idea to separate your data analysis from papers,
talks, etc. (especially if you want to publish your data)
\item Have designated folders where stuff is written to (e.\,g.,
\texttt{results}, \texttt{figures}, \texttt{processed}, etc.)
\end{itemize}
\end{frame}
\begin{frame}[fragile]{Folder organisation}
{One possible example!}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (file.west);
\end{tikzpicture}
\vfill
\end{frame}
\begin{frame}[fragile]{Folder organisation}
{Analysis folder}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (file.west);
% second level
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
\path (n2.east) -- (o1.west);
\path (n2.east) -- (o2.west);
\path (n2.east) -- (o3.west);
\path (n2.east) -- (o4.west);
\path (n2.east) -- (o5.west);
\end{tikzpicture}
\vfill
\end{frame}
\begin{frame}[fragile]{Folder organisation}
{Analysis folder}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (file.west);
% second level
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
\path (n2.east) -- (o1.west);
\path (n2.east) -- (o2.west);
\path (n2.east) -- (o3.west);
\path (n2.east) -- (o4.west);
\path (n2.east) -- (o5.west);
% third level
\node[text width = 5cm] (p1) at (12, 0) {\faIcon[regular]{file} \verb+01_preprocessing.R+};
\node[text width = 5cm] (p2) at (12, -0.7) {\faIcon[regular]{file} \verb+02_descriptives.R+};
\node[text width = 5cm] (p3) at (12, -1.4) {\faIcon[regular]{file} \verb+03_modeling.R+};
\node[text width = 5cm] (p4) at (12, -2.1) {\faIcon[regular]{file} \verb+04_plots.R+};
\path (o1.center) -- (p1.west);
\path (o1.center) -- (p2.west);
\path (o1.center) -- (p3.west);
\path (o1.center) -- (p4.west);
\end{tikzpicture}
\vfill
\end{frame}
\begin{frame}[fragile]{Folder organisation}
{Analysis folder}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (file.west);
% second level
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
\path (n2.east) -- (o1.west);
\path (n2.east) -- (o2.west);
\path (n2.east) -- (o3.west);
\path (n2.east) -- (o4.west);
\path (n2.east) -- (o5.west);
% third level
\node[text width = 5cm] (p1) at (12, 0) {\faIcon[regular]{file} \verb+subj1_ses01.txt+};
\node[text width = 5cm] (p2) at (12, -0.7) {\faIcon[regular]{file} \verb+subj1_ses02.txt+};
\node[text width = 5cm] (p3) at (12, -1.4) {\faIcon[regular]{file} \verb+subj2_ses01.txt+};
\node[text width = 5cm] (p4) at (12, -2.1) {\faIcon[regular]{file} \verb+subj2_ses02.txt+};
\node[text width = 5cm] (p5) at (12, -2.8) {\faIcon[regular]{file} \dots};
\path (o2.center) -- (p1.west);
\path (o2.center) -- (p2.west);
\path (o2.center) -- (p3.west);
\path (o2.center) -- (p4.west);
\path (o2.center) -- (p5.west);
\end{tikzpicture}
\vfill
\end{frame}
\begin{frame}[fragile]{Folder organisation}
{Analysis folder}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (file.west);
% second level
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
\path (n2.east) -- (o1.west);
\path (n2.east) -- (o2.west);
\path (n2.east) -- (o3.west);
\path (n2.east) -- (o4.west);
\path (n2.east) -- (o5.west);
% third level
\node[text width = 5cm] (p1) at (12, -0.7) {\faIcon[regular]{file}
\verb+data_all-subj.csv+};
\node[text width = 5cm] (p2) at (12, -1.4) {\faIcon[regular]{file}
\verb+data_all-subj.RData+};
\node[text width = 5cm] (p3) at (12, -2.1) {\faIcon[regular]{file}
\verb+eval_model1.csv+};
\node[text width = 5cm] (p4) at (12, -2.8) {\faIcon[regular]{file}
\verb+eval_model2.csv+};
\path (o4.center) -- (p1.west);
\path (o4.center) -- (p2.west);
\path (o4.center) -- (p3.west);
\path (o4.center) -- (p4.west);
\end{tikzpicture}
\vfill
\pause
The analysis folder you might want to share on OSF, Github, etc.
\end{frame}
\begin{frame}[fragile]{Folder organisation}
{Dissemination folder}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+analysis+};
\node[text width = 3.2cm] (n3) at (3.6, -1.4) {\faIcon{folder} \verb+dissemination+};
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+material+};
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (file.west);
% second level
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+paper+};
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+talks+};
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
\node (o5) at (8.5, -2.8) {\faIcon{folder} \verb+tables+};
\path (n3.east) -- (o1.west);
\path (n3.east) -- (o2.west);
\path (n3.east) -- (o3.west);
\path (n3.east) -- (o4.west);
\path (n3.east) -- (o5.west);
\end{tikzpicture}
\vfill
\pause
Having separate folders for figures and tables helps you keep track of them
for your paper and talks
\end{frame}
\begin{frame}[fragile]{Figures and tables}
\begin{itemize}
\item Most of us (including me!) are not at a stage where we are
writing our papers or talks as reproducible documents
\pause
\item It is still a good idea to create tables and figures in R and keep the
code easily accessible
\pause
\item One suggestion
\begin{tikzpicture}[
every node/.style = {text width = 4.2cm, align = left},
every path/.style = {thick, draw}
]
% figures
\node (fig) at (0, 0) {\faIcon{folder} \verb+figures+};
\node (n1) at (4, 0) {\faIcon[regular]{file} \verb+h1_barplot.R+};
\node (n2) at (4, -0.7) {\faIcon[regular]{file} \verb+h1_barplot.png+};
\path (fig.center) -- (n1.west);
\path (fig.center) -- (n2.west);
% tables
\node (tab) at (0, -1.5) {\faIcon{folder} \verb+tables+};
\node (o1) at (4, -1.5) {\faIcon[regular]{file} \verb+h1_mean-table.Rmd+};
\node (o2) at (4, -2.2) {\faIcon[regular]{file} \verb+h1_mean-table.docx+};
\path (tab.center) -- (o1.west);
\path (tab.center) -- (o2.west);
\end{tikzpicture}
\pause
\item I export the data for figures and tables from \texttt{analysis/code}
to \texttt{dissemination/results} so the dissemination folder is
self-contained
\end{itemize}
\end{frame}
\begin{frame}[fragile]{Several data sources}
\begin{itemize}
\item When you have several different data sources like questionnaires and
eye-tracking data keep them in separate folders
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node (data) at (0, 0) {\faIcon{folder} \verb+data+};
\node (n1) at (4, 0) {\faIcon{folder} \verb+eyetracking+};
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+qualtrics+};
\path (data.center) -- (n1.west);
\path (data.center) -- (n2.west);
\end{tikzpicture}
\pause
\item Process them separately, e.\,g., with
\verb+01a_preprocessing_eyetracking.R+ and
\verb+01b_preprocessing_surveys.R+ and then \verb+02_combine-data.R+
\begin{tikzpicture}[
every node/.style = {text width = 5cm, align = left},
every path/.style = {thick, draw}
]
\node (results) at (0, 0) {\faIcon{folder} \verb+results+};
\node (n1) at (4, 0) {\faIcon[regular]{file} \verb+data_eyetracking.csv+};
\node (n2) at (4, -0.7) {\faIcon[regular]{file} \verb+data_surveys.csv+};
\node (n3) at (4, -1.4) {\faIcon[regular]{file} \verb+data_complete.csv+};
\path (results.center) -- (n1.west);
\path (results.center) -- (n2.west);
\path (results.center) -- (n3.west);
\end{tikzpicture}
\end{itemize}
\end{frame}
\begin{frame}{Toy example with 11 questions}
Thank you everybody for filling out our little toy survey in Qualtrics!
\vfill
\tiny
\begin{tabular}{lllll}
\hline
ResponseId & age & sex & data\_sharing\_1 & data\_sharing\_2 \\
\hline
R\_225ffqhb7qRaIGO:1 & Min. :24.00 & m : 2 & No :7 & Min. :1.000 \\
R\_2F9fXxf3NedHqZl:1 & 1st Qu.:26.50 & f :11 & Yes:7 & 1st Qu.:1.000 \\
R\_2foYj4iSgaBTkEO:1 & Median :28.00 & d : 1 & & Median :2.000 \\
R\_2J9B4aLaasQ1m81:1 & Mean :29.86 & not indicated: 0 & & Mean :2.214 \\
R\_2P1TMDNlwm0gSIk:1 & 3rd Qu.:30.00 & & & 3rd Qu.:2.000 \\
R\_2pXfOSq8DBImG6R:1 & Max. :43.00 & & & Max. :6.000 \\
(Other) :8 & & & & \\
\hline
\end{tabular}
\vspace{.5cm}
\begin{tabular}{lllllll}
\hline
rdm\_stmnt\_1 & rdm\_stmnt\_2 & rdm\_stmnt\_3 & rdm\_stmnt\_4 & rdm\_stmnt\_5 & career\_level\_1 & career\_level\_2 \\
\hline
Min. :2.000 & Min. :2 & Min. :2.000 & Min. :1.000 & Min. :1.000 & Student : 0 & Min. : 1.000 \\
1st Qu.:3.250 & 1st Qu.:4 & 1st Qu.:2.250 & 1st Qu.:1.000 & 1st Qu.:1.000 & PhD student :11 & 1st Qu.: 1.625 \\
Median :4.500 & Median :4 & Median :3.000 & Median :1.000 & Median :1.000 & Postdoc : 1 & Median : 2.500 \\
Mean :4.071 & Mean :4 & Mean :2.857 & Mean :1.143 & Mean :1.143 & Senior researcher: 0 & Mean : 5.964 \\
3rd Qu.:5.000 & 3rd Qu.:5 & 3rd Qu.:3.000 & 3rd Qu.:1.000 & 3rd Qu.:1.000 & Professor : 1 & 3rd Qu.: 4.500 \\
Max. :5.000 & Max. :5 & Max. :5.000 & Max. :2.000 & Max. :2.000 & Other : 1 & Max. :38.000 \\
& NA's :1 & & & & & \\
\hline
\end{tabular}
\end{frame}
% print(xtable::xtable(summary(dat[, 1:5])), include.rownames = FALSE)
% print(xtable::xtable(summary(dat[, 6:12])), include.rownames = FALSE)
\begin{frame}[fragile]{Folder structure for toy example}
{One possible structure!}
\begin{tikzpicture}[
every node/.style = {text width = 4.3cm, align = left},
every path/.style = {thick, draw}
]
\node (ex) at (0, 0) {\faIcon{folder} \verb+example+};
\node (n1) at (3, 0) {\faIcon{folder} \verb+code+};
\node (n2) at (3, -0.7) {\faIcon{folder} \verb+data+};
\node (n3) at (3, -1.4) {\faIcon[regular]{file} \verb+README.md+};
\path (ex.center) -- (n1.west);
\path (ex.center) -- (n2.west);
\path (ex.center) -- (n3.west);
\node (o1) at (7, 0.7) {\faIcon[regular]{file} \verb+01_preprocessing.R+};
\node (o2) at (7, -0.7) {\faIcon{folder} \verb+codebook+};
\node (o3) at (7, -1.4) {\faIcon{folder} \verb+rawdata+};
\node (o4) at (7, -2.1) {\faIcon{folder} \verb+results+};
\path (n1.center) -- (o1.west);
\path (n2.center) -- (o2.west);
\path (n2.center) -- (o3.west);
\path (n2.center) -- (o4.west);
\node (p1) at (11, -0.7) {\faIcon[regular]{file} \verb+codebook_01.R+};
\node (p2) at (11, -1.4) {\faIcon[regular]{file} \verb+codebook_01.xlsx+};
\node (p3) at (11, -2.1) {\dots};
\path (o2.center) -- (p1.west);
\path (o2.center) -- (p2.west);
\path (o2.center) -- (p3.west);
\end{tikzpicture}
\end{frame}
\section{Metadata}
\begin{frame}{Metadata answers questions}
\begin{itemize}
\item {\bf Who} created the data?
\item {\bf Why} was the data created?
\item {\bf When} was the data created?
\item {\bf Where} is the data?
\item {\bf How} was the data created?
\item {\bf What} is the content of the data?
\end{itemize}
\vfill
\hfill{\tiny \citet{Wilbrandt2023}}
\end{frame}
\begin{frame}{Metadata}
\begin{block}{Metadata}
\dots is data about data.\\
\dots can be \emph{descriptive}, \emph{structural}, or \emph{administrative}.
\end{block}
\vfill
\begin{columns}
\begin{column}[t]{.5\textwidth}
Contains information on origin and background of data like
\begin{itemize}
\item Who, when, why, how, \dots
\item Used resources
\item Used abbreviations, units, names
\item Licenses
\item \dots
\end{itemize}
\end{column}
\begin{column}[t]{.5\textwidth}
Data can be anything like
\begin{itemize}
\item Book content
\item Pictures or audio files
\item Website content or a blog post
\item Journal paper
\item Research data
\item \dots
\end{itemize}
\end{column}
\end{columns}
\vfill
\end{frame}
\begin{frame}{Metadata examples}
{Photo}
\begin{center}
\includegraphics[scale = .31]{../figures/metadata_photo}
\end{center}
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
\end{frame}
\begin{frame}{Metadata examples}
{Book}
\begin{center}
\includegraphics[scale = .36]{../figures/metadata_book}
\end{center}
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
\end{frame}
\begin{frame}{Metadata examples}
{Webpage}
\begin{center}
\includegraphics[scale = .27]{../figures/metadata_webpage}
\end{center}
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
\end{frame}
\begin{frame}{Metadata examples}
{WORD document}
\begin{center}
\includegraphics[scale = .23]{../figures/metadata_word_document}
\end{center}
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
\end{frame}
\begin{frame}{Metadata for research data}
\begin{tikzpicture}
\node[font=\Large] (n1) at (0,0) {\bf \color{iwmorange} Study};
\node[font=\large] (i1) at (0,-1) {$\bullet$ Persons};
\node[font=\large] (i2) at (.36,-1.5) {$\bullet$ Background};
\node[font=\large] (i3) at (.03,-2) {$\bullet$ Funding};
\node[font=\large] (i4) at (-.38,-2.5) {$\bullet$ \dots};
\node[draw=iwmorange, thick, fit={(n1) (i1) (i2) (i3) (i4)}, inner sep=10pt] (box) {};
\node[font=\Large] (n2) at (5,0) {\bf \color{iwmorange} Data set};
\node[font=\large] (j1) at (4.3,-1) {$\bullet$ Files};
\node[font=\large] (j2) at (4.57,-1.5) {$\bullet$ Sources};
\node[font=\large] (j3) at (4.65,-2) {$\bullet$ Methods};
\node[font=\large] (j4) at (4.18,-2.5) {$\bullet$ \dots};
\node[draw=iwmorange, thick, fit={(n2) (j1) (j2) (j3) (j4)}, inner sep=10pt] (box) {};
\node[font=\Large] (n3) at (10,0) {\bf \color{iwmorange} Variables};
\node[font=\large] (k1) at (9.7,-1) {$\bullet$ Data type};
\node[font=\large] (k2) at (9.69,-1.5) {$\bullet$ Scale unit};
\node[font=\large] (k3) at (9.85,-2) {$\bullet$ Value range};
\node[font=\large] (k4) at (9.12,-2.5) {$\bullet$ \dots};
\node[draw=iwmorange, thick, fit={(n3) (k1) (k2) (k3) (k4)}, inner sep=10pt] (box) {};
\draw[-latex, thick] (n1) -- (n2);
\draw[-latex, thick] (n2) -- (n3);
\end{tikzpicture}
\vfill
\hfill\tiny \url{https://datamanagement.hms.harvard.edu/collect/readme-files}
\end{frame}
\section{README files}
\begin{frame}{README files}
\begin{itemize}
\item Can be used to give information about all levels in a research
project: study/project, data set, variables; either in one README or in
several ones
\item Should provide a clear and concise description of all relevant details
about data collection, processing, and analysis
\item README files are created for different purposes:
\begin{itemize}
\item to document changes to files or file names within a folder
\item to explain file naming conventions, practices, etc.\ ``in
general'' for future reference
\item to specifically accompany files/data being deposited in a
repository
\end{itemize}
\item Creating a README file at the beginning of your research process,
and updating it consistently throughout your research, will help you
to compile a final README file when your data is ready for deposit
\item Find a template here:
\url{https://cornell.app.box.com/v/ReadmeTemplate}
\end{itemize}
\vfill
\hfill\tiny \url{https://datamanagement.hms.harvard.edu/collect/readme-files}
\end{frame}
\begin{frame}{Study/project}{README on top level}
\begin{itemize}
\item Project name and purpose
\item Funding information (process number!)
\item Ethics approved? LEK number!
\item Person(s) responsible for study conduction
\item One or several studies? Infos about them
\item Time/Duration of project
\item \dots
\end{itemize}
\end{frame}
\begin{frame}{Data set}{README accompanying data set(s)}
\begin{itemize}
\item One or more data sets?
\item Time of data collection
\item Person(s) responsible for data collection
\item File organisation
\item Naming conventions
\item Preprocessing methods
\item Anything that is special about the data set(s)
\item Number of subjects
\item Variables
\item \dots
\end{itemize}
\end{frame}
\begin{frame}{Variables}{README accompanying a specific data set}
\begin{itemize}
\item You can use a README (or text file called \texttt{codebook.txt} or
similar) to document your variables
\item Especially, if you only have a few variables, this is an easy and fast
way to document them
\item If you are working with extensive surveys or questionnaires, it might
be a good time investment to create a more elaborate codebook
\end{itemize}
\vfill
\end{frame}
\section{Codebooks}
\begin{frame}{What information about variables should a codebook include?}
% slido
\centering
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 3}
\url{https://app.sli.do/event/3S1Bn3Tjknuk5J5WiqAYzG}
\end{frame}
\begin{frame}{A codebook should include}
\begin{tabular}{lp{11cm}}
\hline
Variable name & Usually some abbreviation like \texttt{pna01} \\
Variable label & Brief description to identify variable \\
Question text & If applicable, exact wording from survey question \\
Values & Values variable can take (e.\,g, 1 to 5) \\
Value labels & If applicable, textual descriptions of the values \\
Statistics & For example, range, mean, standard deviation for
numeric variables; frequencies and percentages for categorical variables \\
Missing data & If applicable, values and labels of missing data \\
Notes & Additional notes, remarks, or comments; for measures or
questions from copyrighted instruments, the notes field can be used to
cite the source \\
\hline
\end{tabular}
\vfill
\hfill\tiny \url{https://www.icpsr.umich.edu/web/ICPSR/cms/1983}
\end{frame}
\begin{frame}{Codebooks}
\begin{itemize}
\item There are many different ways to create a codebook
\item It can be a README, some other plain text file, a table (stored as CSV
or XLSX), a WORD document, or PDF
\item For a short questionnaire, it can be sufficient to export it as a PDF
\item Let's walk through a couple of options\dots
\end{itemize}
\vfill
\end{frame}
\begin{frame}{Option 1 -- Toy example with 11 questions}
{Simple PDF}
\begin{columns}
\begin{column}{.5\textwidth}
\begin{center}
\vspace{-.4cm}
Export from Qualtrics\\
\includegraphics[scale = .3]{../figures/codebook_1.png}
\end{center}
\end{column}
\begin{column}{.6\textwidth}
\begin{itemize}
\item For a simple questionnaire like this, the exported WORD document
from Qualtrics exported to PDF might be sufficient as a codebook
\item For longer questionnaires, the WORD document can still be a good
starting point to create a more elaborate codebook
\end{itemize}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[fragile]{Option 2 -- Toy example with 11 questions}
{Plain text file}
\begin{center}
\vspace{-.3cm}
\footnotesize
\begin{lstlisting}[language = bash, identifierstyle=\color{iwmgray}]
sex. Please indicate your sex.
-------------------------------------------------------------------------------
-1. m
-2. f
-3. d
-4. not indicated
age. How old are you? Please enter your age in years.
-------------------------------------------------------------------------------
numerical input
data_sharing_1. Have you ever published data in a repository?
-------------------------------------------------------------------------------
-1. No
-2. Yes
\end{lstlisting}
\end{center}
\end{frame}
\begin{frame}[fragile]{Option 3 -- Toy example with 11 questions}
{Creating a simple codebook in R ``by hand''}
\footnotesize
\begin{lstlisting}
load("results/data_rdm-ms-ss2024_cleaned.RData")
codebook <- data.frame(var_name = names(dat),
var_text = c("Response Id", "Please indicate your sex.",
"How old are you? Please enter your age in years.",
...
"Sharing data is bad scientific practice",
"What is your current career level?",
"How long have you been working in science (in years)?"))
codebook$type <- sapply(dat, class)
codebook$n <- sapply(dat, length)
codebook$mean <- sapply(dat,
function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
openxlsx::write.xlsx(codebook, file = "codebook/codebook_01.xlsx")
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]{Option 3 -- Toy example with 11 questions}
{Creating a simple codebook in R ``by hand''}
\begin{center}
\includegraphics[scale = .6]{../figures/codebook_2.png}
\end{center}
\end{frame}
\begin{frame}[fragile]{Option 4 -- Toy example with 11 questions}
{Using the codebook package in R}
\begin{itemize}
\item When you export a qualtrics questionnaire as SPSS file and import it
into R using the haven package, you can use RMarkdown to create an
elaborate HTML codebook
\item It works best for classical questionnaire items
\item In our example, the survey is not formatted well enough for the
generated codebook to be completely correct
\end{itemize}
\footnotesize
\begin{lstlisting}
#' ---
#' title: Codebook for Data Set "RDM MS SS 2024"
#' author: Nora Wickelmaier
#' ---
#+ echo = FALSE
dat <- haven::read_spss("../rawdata/RDM_MS_SS2024_download_2024-06-04.sav")
codebook::codebook(dat)
\end{lstlisting}
\end{frame}
\appendix
%%\begin{frame}[allowframebreaks]{References}
\begin{frame}{References}
%\renewcommand{\bibfont}{\small}
\printbibliography
\vfill
\end{frame}
\end{document}

View File

@ -0,0 +1,47 @@
# Toy data set for the methods seminar on data management SS2024
## Responsible person
Nora Wickelmaier
Referentin Forschungsmethoden und Forschungsdatenmanagement
Leibniz-Institut für Wissensmedien (IWM)
n.wickelmaier@iwm-tuebingen.de
## Folder structure and naming conventions
```
/example/
|
|- /code/
|- /data/
|- /codebook/
|- /rawdata/
|- /results/
```
The `code` folder contains analysis scripts written in R. The scripts are
numbered, indicating the order they should be executed in.
The `data` folder contains all folders associated with data and its
documentation.
The `code` folder with contains different codebook options and R scripts that
create these codebooks. If the codebook is created by an R script, the script
and the codebook are named identically, e.g., `codebook_01.R` and
`codebook_01.xslx`.
The `rawdata` folder contains the downloads from Qualtrics. In Qualtrics, the
variables have been selected and ordered and then downloaded, without the
additional columns Qualtrics adds by default. The naming convention for the
downloaded files is
```
RDM_MS_SS2024_download_<YYYY-MM-DD>.<fileending>
```
No other files than the downloads from qualtrics should go into this folder!
The `results` folder contains processed data. The scripts in `/code/` process
the data from `/rawdata/` and saves the files containing the processed data to
`/results/`. Data can be exported as CSV files or RData files. If different file
formats contain the same data, they should be named identically, e.g.,
`data_rdm-ms-ss2024_cleaned.csv` and `data_rdm-ms-ss2024_cleaned.RData`.

View File

@ -0,0 +1,78 @@
# 01_preprocessing.R
#
# Cleaning up data for toy data set Methods Seminar SS2024
#
# Input: RDM_MS_SS2024_download_2024-06-07.csv
# Output: results/data_rdm-ms-ss2024_cleaned.csv
# results/data_rdm-ms-ss2024_cleaned.RData
#
# created: 2024-06-03
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
dat <- read.table("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv",
sep = ",", skip = 3, stringsAsFactors = TRUE, na.string = "")
names(dat) <-
readLines("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv", 1) |>
strsplit(split = ",") |>
unlist()
# Clean up variables
dat$ResponseId <- factor(dat$ResponseId)
dat$sex <- factor(dat$sex,
levels = c("m", "f", "d", "not indicated"))
dat$data_sharing_1 <- factor(dat$data_sharing_1,
levels = c("No", "Yes"))
dat$career_level_1 <- factor(dat$career_level_1,
levels = c("Student", "PhD student", "Postdoc",
"Senior researcher", "Professor",
"Other"))
dat$rdm_stmnt_1 <- factor(dat$rdm_stmnt_1,
levels = c("Strongly disagree", "Disagree",
"Neither agree nor disagree", "Agree",
"Strongly agree"))
dat$rdm_stmnt_2 <- factor(dat$rdm_stmnt_2,
levels = c("Strongly disagree", "Disagree",
"Neither agree nor disagree", "Agree",
"Strongly agree"))
dat$rdm_stmnt_3 <- factor(dat$rdm_stmnt_3,
levels = c("Strongly disagree", "Disagree",
"Neither agree nor disagree", "Agree",
"Strongly agree"))
dat$rdm_stmnt_4 <- factor(dat$rdm_stmnt_4,
levels = c("Strongly disagree", "Disagree",
"Neither agree nor disagree", "Agree",
"Strongly agree"))
dat$rdm_stmnt_5 <- factor(dat$rdm_stmnt_5,
levels = c("Strongly disagree", "Disagree",
"Neither agree nor disagree", "Agree",
"Strongly agree"))
## Fix data_sharing_2
dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1
dat$data_sharing_2 <- as.numeric(dat$data_sharing_2)
# Create numeric statement variables
dat$rdm_stmnt_1 <- as.numeric(dat$rdm_stmnt_1)
dat$rdm_stmnt_2 <- as.numeric(dat$rdm_stmnt_2)
dat$rdm_stmnt_3 <- as.numeric(dat$rdm_stmnt_3)
dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_5)
dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_4)
dat$rdm_stmnt_5 <- as.numeric(dat$rdm_stmnt_5)
# Save cleaned data set
write.table(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.csv", sep = ";",
row.names = FALSE, quote = FALSE)
save(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.RData")

View File

@ -0,0 +1,41 @@
# codebook_generation_01.R
#
# Code generation example
#
# Input: results/data_rdm-ms-ss2024_cleaned.RData
# Output:
#
# created: 2024-06-04
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
load("data/results/data_rdm-ms-ss2024_cleaned.RData")
codebook <- data.frame(var_name = names(dat),
var_text = c("Response Id", "Please indicate your sex.",
"How old are you? Please enter your age in years.",
"Have you ever published data in a repository?",
"How many of your data sets have you published so far?",
"All my analyses are preregistered",
"Sharing my data is very important to me",
"I invest more time in research data management than my colleagues",
"I think research data management is overrated",
"Sharing data is bad scientific practice",
"What is your current career level?",
"How long have you been working in science (in years)?")
)
codebook$type <- sapply(dat, class)
codebook$n <- sapply(dat, length)
codebook$mean <- sapply(dat, function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
write.table(codebook,
file = "data/codebook/codebook_01.csv",
na = "",
sep = ";",
quote = FALSE)
openxlsx::write.xlsx(codebook, file = "data/codebook/codebook_01.xlsx")

View File

@ -0,0 +1,13 @@
var_name;var_text;type;n;mean;sd
1;ResponseId;Response Id;factor;13;;
2;age;Please indicate your sex.;integer;13;29.6923076923077;5.99144689515278
3;sex;How old are you? Please enter your age in years.;factor;13;;
4;data_sharing_1;Have you ever published data in a repository?;factor;13;;
5;data_sharing_2;How many of your data sets have you published so far?;numeric;13;2.30769230769231;1.65250392761083
6;rdm_stmnt_1;All my analyses are preregistered;numeric;13;4.15384615384615;1.14354374979373
7;rdm_stmnt_2;Sharing my data is very important to me;numeric;13;4;
8;rdm_stmnt_3;I invest more time in research data management than my colleagues;numeric;13;2.84615384615385;0.800640769025436
9;rdm_stmnt_4;I think research data management is overrated;numeric;13;1.15384615384615;0.375533808099405
10;rdm_stmnt_5;Sharing data is bad scientific practice;numeric;13;1.15384615384615;0.375533808099405
11;career_level_1;What is your current career level?;factor;13;;
12;career_level_2;How long have you been working in science (in years)?;numeric;13;6.26923076923077;10.1788493632126
1 var_name;var_text;type;n;mean;sd
2 1;ResponseId;Response Id;factor;13;;
3 2;age;Please indicate your sex.;integer;13;29.6923076923077;5.99144689515278
4 3;sex;How old are you? Please enter your age in years.;factor;13;;
5 4;data_sharing_1;Have you ever published data in a repository?;factor;13;;
6 5;data_sharing_2;How many of your data sets have you published so far?;numeric;13;2.30769230769231;1.65250392761083
7 6;rdm_stmnt_1;All my analyses are preregistered;numeric;13;4.15384615384615;1.14354374979373
8 7;rdm_stmnt_2;Sharing my data is very important to me;numeric;13;4;
9 8;rdm_stmnt_3;I invest more time in research data management than my colleagues;numeric;13;2.84615384615385;0.800640769025436
10 9;rdm_stmnt_4;I think research data management is overrated;numeric;13;1.15384615384615;0.375533808099405
11 10;rdm_stmnt_5;Sharing data is bad scientific practice;numeric;13;1.15384615384615;0.375533808099405
12 11;career_level_1;What is your current career level?;factor;13;;
13 12;career_level_2;How long have you been working in science (in years)?;numeric;13;6.26923076923077;10.1788493632126

View File

@ -0,0 +1,68 @@
# codebook_generation_01.R
#
# Code generation example
#
# Input: results/data_rdm-ms-ss2024_cleaned.RData
# Output:
#
# created: 2024-06-04
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
dat <- as.data.frame(haven::read_spss("data/rawdata/RDM_MS_SS2024_download_2024-06-04.sav"))
## Fix data_sharing_2
dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1
# Look at attributes
attributes(dat$sex)
# Create codebook with survey questions
codebook <- data.frame(variable = names(dat),
label = sapply(dat, function(x) attr(x, "label")))
# Clean up data frame
dat <- as.data.frame(lapply(dat, sjlabelled::unlabel))
sapply(dat, class) # Look at classes
dat$age <- as.numeric(dat$age)
dat$career_level_2 <- as.numeric(dat$career_level_2)
dat$data_sharing_2 <- as.numeric(dat$data_sharing_2)
dat$sex <- factor(dat$sex,
levels = 1:4,
labels = names(attr(dat$sex, "labels")))
dat$data_sharing_1 <- factor(dat$data_sharing_1,
levels = 1:2,
labels = names(attr(dat$data_sharing_1, "labels")))
dat$career_level_1 <- factor(dat$career_level_1,
levels = 1:6,
labels = names(attr(dat$career_level_1, "labels")))
# Add descriptive statistics to codebook
codebook$n <- sapply(dat, length)
codebook$type <- sapply(dat, class)
codebook$mean <- sapply(dat, function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
# props <- function(x) {
# if (is.factor(x)) {
# proportions(summary(x))
# } else {
# NA
# }
# }
#
# codebook$prop <- lapply(dat, props)
write.table(codebook,
file = "data/codebook/codebook_02.csv",
na = "",
sep = ";",
quote = FALSE)
openxlsx::write.xlsx(codebook, file = "data/codebook/codebook_02.xlsx")

View File

@ -0,0 +1,13 @@
variable;label;n;type;mean;sd
ResponseId;ResponseId;Response ID;13;character;;
age;age;How old are you? Please enter your age in years.;13;numeric;29.6923076923077;5.99144689515278
sex;sex;Please indicate your sex.;13;factor;;
data_sharing_1;data_sharing_1;Have you ever published data in a repository?;13;factor;;
data_sharing_2;data_sharing_2;How many of your data sets have you published so far?;13;numeric;1.38461538461538;1.85015591858549
rdm_stmnt_1;rdm_stmnt_1;Please indicate how much you agree with the following statements - All my analyses are preregistered;13;numeric;4.15384615384615;1.14354374979373
rdm_stmnt_2;rdm_stmnt_2;Please indicate how much you agree with the following statements - Sharing my data is very important to me;13;numeric;4;
rdm_stmnt_3;rdm_stmnt_3;Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues;13;numeric;2.84615384615385;0.800640769025436
rdm_stmnt_4;rdm_stmnt_4;Please indicate how much you agree with the following statements - I think research data management is overrated;13;numeric;1.84615384615385;0.987096233585649
rdm_stmnt_5;rdm_stmnt_5;Please indicate how much you agree with the following statements - Sharing data is bad scientific practice;13;numeric;1.15384615384615;0.375533808099405
career_level_1;career_level_1;What is your current career level?;13;factor;;
career_level_2;career_level_2;How long have you been working in science (in years)?;13;numeric;6.26923076923077;10.1788493632126
1 variable;label;n;type;mean;sd
2 ResponseId;ResponseId;Response ID;13;character;;
3 age;age;How old are you? Please enter your age in years.;13;numeric;29.6923076923077;5.99144689515278
4 sex;sex;Please indicate your sex.;13;factor;;
5 data_sharing_1;data_sharing_1;Have you ever published data in a repository?;13;factor;;
6 data_sharing_2;data_sharing_2;How many of your data sets have you published so far?;13;numeric;1.38461538461538;1.85015591858549
7 rdm_stmnt_1;rdm_stmnt_1;Please indicate how much you agree with the following statements - All my analyses are preregistered;13;numeric;4.15384615384615;1.14354374979373
8 rdm_stmnt_2;rdm_stmnt_2;Please indicate how much you agree with the following statements - Sharing my data is very important to me;13;numeric;4;
9 rdm_stmnt_3;rdm_stmnt_3;Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues;13;numeric;2.84615384615385;0.800640769025436
10 rdm_stmnt_4;rdm_stmnt_4;Please indicate how much you agree with the following statements - I think research data management is overrated;13;numeric;1.84615384615385;0.987096233585649
11 rdm_stmnt_5;rdm_stmnt_5;Please indicate how much you agree with the following statements - Sharing data is bad scientific practice;13;numeric;1.15384615384615;0.375533808099405
12 career_level_1;career_level_1;What is your current career level?;13;factor;;
13 career_level_2;career_level_2;How long have you been working in science (in years)?;13;numeric;6.26923076923077;10.1788493632126

View File

@ -0,0 +1,10 @@
#' ---
#' title: Codebook for Data Set "RDM MS SS 2024"
#' author: Nora Wickelmaier
#' ---
#+ echo = FALSE
dat <- haven::read_spss("../rawdata/RDM_MS_SS2024_download_2024-06-04.sav")
codebook::codebook(dat)

View File

@ -0,0 +1,94 @@
###############################################################################
This file contains an overview of the variables from a toy data set collected
at the methods seminar SS 2024. The raw data contain in
"RDM_MS_SS2024_download_2024-06-03_v1.csv" contain additional variables
created by Qualtrics. The variables have been preprocessed and are stored in
"data_rdm-ms-ss2024_cleaned.csv".
###############################################################################
ResponseId. <Qualtrics ID of subject>
-------------------------------------------------------------------------------
random sequence of numbers, letters, and underscore
sex. Please indicate your sex.
-------------------------------------------------------------------------------
-1. m
-2. f
-3. d
-4. not indicated
age. How old are you? Please enter your age in years.
-------------------------------------------------------------------------------
numerical input
data_sharing_1. Have you ever published data in a repository?
-------------------------------------------------------------------------------
-1. No
-2. Yes
data_sharing_2. How many of your data sets have you published so far?
-------------------------------------------------------------------------------
numerical input
rdm_stmnt. Please indicate how much you agree with the following statements:
rdm_stmnt_1. All my analyses are preregistered
-------------------------------------------------------------------------------
-1. Strongly disagree
-2. Disagree
-3. Neither agree nor disagree
-4. Agree
-5. Strongly agree
rdm_stmnt_2. Sharing my data is very important to me
-------------------------------------------------------------------------------
-1. Strongly disagree
-2. Disagree
-3. Neither agree nor disagree
-4. Agree
-5. Strongly agree
rdm_stmnt_3. I invest more time in research data management than my colleagues
-------------------------------------------------------------------------------
-1. Strongly disagree
-2. Disagree
-3. Neither agree nor disagree
-4. Agree
-5. Strongly agree
rdm_stmnt_4. I think research data management is overrated
-------------------------------------------------------------------------------
-1. Strongly disagree
-2. Disagree
-3. Neither agree nor disagree
-4. Agree
-5. Strongly agree
rdm_stmnt_5. Sharing data is bad scientific practice
-------------------------------------------------------------------------------
-1. Strongly disagree
-2. Disagree
-3. Neither agree nor disagree
-4. Agree
-5. Strongly agree
career_level_1. What is your current career level?
-------------------------------------------------------------------------------
-1. Student
-2. PhD student
-3. Postdoc
-4. Senior researcher
-5 Professor
-6. Other
career_level_2. How long have you been working in science (in years)?
-------------------------------------------------------------------------------
numerical input

View File

@ -0,0 +1,16 @@
ResponseId,age,sex,data_sharing_1,data_sharing_2,rdm_stmnt_1,rdm_stmnt_2,rdm_stmnt_3,rdm_stmnt_4,rdm_stmnt_5,career_level_1,career_level_2
Response ID,How old are you? Please enter your age in years.,Please indicate your sex.,Have you ever published data in a repository?,How many of your data sets have you published so far?,Please indicate how much you agree with the following statements - All my analyses are preregistered,Please indicate how much you agree with the following statements - Sharing my data is very important to me,Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues,Please indicate how much you agree with the following statements - I think research data management is overrated,Please indicate how much you agree with the following statements - Sharing data is bad scientific practice,What is your current career level?,How long have you been working in science (in years)?
"{""ImportId"":""_recordId""}","{""ImportId"":""QID3_TEXT""}","{""ImportId"":""QID1""}","{""ImportId"":""QID4""}","{""ImportId"":""QID7_TEXT""}","{""ImportId"":""QID2_1""}","{""ImportId"":""QID2_2""}","{""ImportId"":""QID2_3""}","{""ImportId"":""QID2_4""}","{""ImportId"":""QID2_5""}","{""ImportId"":""QID8""}","{""ImportId"":""QID9_TEXT""}"
R_8q7OpSkcuPT7SbI,42,f,No,1,Neither agree nor disagree,Agree,Strongly agree,Strongly disagree,Strongly disagree,Other,14
R_8Io4pbk0A1a37VL,28,f,Yes,1,Strongly agree,,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,1
R_2J9B4aLaasQ1m81,28,f,Yes,1 out of 4,Strongly agree,Strongly agree,Disagree,Disagree,Strongly disagree,PhD student,3
R_80kqWr3W48SgiUZ,43,f,Yes,6,Agree,Agree,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,3
R_8QpI8T0rjTjaPPr,30,f,Yes,4,Strongly agree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,5
R_8QoVv6THz1Qjtuz,28,f,Yes,1,Disagree,Disagree,Disagree,Agree,Strongly disagree,Professor,38
R_2F9fXxf3NedHqZl,25,d,No,0,Agree,Strongly agree,Disagree,Neither agree nor disagree,Disagree,PhD student,2
R_2foYj4iSgaBTkEO,24,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
R_83T6Oak5vI6GNJ7,30,f,Yes,1,Strongly agree,Agree,Neither agree nor disagree,Neither agree nor disagree,Strongly disagree,Postdoc,7
R_2Vz26rWsOLYwqnD,25,m,Yes,3,Agree,Agree,Neither agree nor disagree,Disagree,Disagree,PhD student,2
R_8HcBgUUm1BXFfhv,29,m,No,0,Strongly agree,Disagree,Disagree,Strongly disagree,Strongly disagree,PhD student,3
R_2P1TMDNlwm0gSIk,26,f,No,0,Disagree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1.5
R_225ffqhb7qRaIGO,28,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
1 ResponseId age sex data_sharing_1 data_sharing_2 rdm_stmnt_1 rdm_stmnt_2 rdm_stmnt_3 rdm_stmnt_4 rdm_stmnt_5 career_level_1 career_level_2
2 Response ID How old are you? Please enter your age in years. Please indicate your sex. Have you ever published data in a repository? How many of your data sets have you published so far? Please indicate how much you agree with the following statements - All my analyses are preregistered Please indicate how much you agree with the following statements - Sharing my data is very important to me Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues Please indicate how much you agree with the following statements - I think research data management is overrated Please indicate how much you agree with the following statements - Sharing data is bad scientific practice What is your current career level? How long have you been working in science (in years)?
3 {"ImportId":"_recordId"} {"ImportId":"QID3_TEXT"} {"ImportId":"QID1"} {"ImportId":"QID4"} {"ImportId":"QID7_TEXT"} {"ImportId":"QID2_1"} {"ImportId":"QID2_2"} {"ImportId":"QID2_3"} {"ImportId":"QID2_4"} {"ImportId":"QID2_5"} {"ImportId":"QID8"} {"ImportId":"QID9_TEXT"}
4 R_8q7OpSkcuPT7SbI 42 f No 1 Neither agree nor disagree Agree Strongly agree Strongly disagree Strongly disagree Other 14
5 R_8Io4pbk0A1a37VL 28 f Yes 1 Strongly agree Neither agree nor disagree Disagree Strongly disagree PhD student 1
6 R_2J9B4aLaasQ1m81 28 f Yes 1 out of 4 Strongly agree Strongly agree Disagree Disagree Strongly disagree PhD student 3
7 R_80kqWr3W48SgiUZ 43 f Yes 6 Agree Agree Neither agree nor disagree Disagree Strongly disagree PhD student 3
8 R_8QpI8T0rjTjaPPr 30 f Yes 4 Strongly agree Agree Neither agree nor disagree Strongly disagree Strongly disagree PhD student 5
9 R_8QoVv6THz1Qjtuz 28 f Yes 1 Disagree Disagree Disagree Agree Strongly disagree Professor 38
10 R_2F9fXxf3NedHqZl 25 d No 0 Agree Strongly agree Disagree Neither agree nor disagree Disagree PhD student 2
11 R_2foYj4iSgaBTkEO 24 f No 0 Strongly agree Strongly agree Neither agree nor disagree Strongly disagree Strongly disagree PhD student 1
12 R_83T6Oak5vI6GNJ7 30 f Yes 1 Strongly agree Agree Neither agree nor disagree Neither agree nor disagree Strongly disagree Postdoc 7
13 R_2Vz26rWsOLYwqnD 25 m Yes 3 Agree Agree Neither agree nor disagree Disagree Disagree PhD student 2
14 R_8HcBgUUm1BXFfhv 29 m No 0 Strongly agree Disagree Disagree Strongly disagree Strongly disagree PhD student 3
15 R_2P1TMDNlwm0gSIk 26 f No 0 Disagree Agree Neither agree nor disagree Strongly disagree Strongly disagree PhD student 1.5
16 R_225ffqhb7qRaIGO 28 f No 0 Strongly agree Strongly agree Neither agree nor disagree Strongly disagree Strongly disagree PhD student 1

View File

@ -0,0 +1,66 @@
* Encoding: UTF-8.
TITLE "RDM_MS_SS2024".
SUBTITLE "".
VARIABLE LABELS
ResponseId "Response ID"
age "How old are you? Please enter your age in years."
sex "Please indicate your sex."
data_sharing_1 "Have you ever published data in a repository?"
data_sharing_2 "How many of your data sets have you published so far?"
rdm_stmnt_1 "Please indicate how much you agree with the following statements - All my analyses are preregistered"
rdm_stmnt_2 "Please indicate how much you agree with the following statements - Sharing my data is very important to me"
rdm_stmnt_3 "Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues"
rdm_stmnt_4 "Please indicate how much you agree with the following statements - I think research data management is overrated"
rdm_stmnt_5 "Please indicate how much you agree with the following statements - Sharing data is bad scientific practice"
career_level_1 "What is your current career level?"
career_level_2 "How long have you been working in science (in years)?"
.
VALUE LABELS
/sex
1 "m"
2 "f"
3 "d"
4 "not indicated"
/data_sharing_1
1 "No"
2 "Yes"
/rdm_stmnt_1
1 "Strongly disagree"
2 "Disagree"
3 "Neither agree nor disagree"
4 "Agree"
5 "Strongly agree"
/rdm_stmnt_2
1 "Strongly disagree"
2 "Disagree"
3 "Neither agree nor disagree"
4 "Agree"
5 "Strongly agree"
/rdm_stmnt_3
1 "Strongly disagree"
2 "Disagree"
3 "Neither agree nor disagree"
4 "Agree"
5 "Strongly agree"
/rdm_stmnt_4
1 "Strongly disagree"
2 "Disagree"
3 "Neither agree nor disagree"
4 "Agree"
5 "Strongly agree"
/rdm_stmnt_5
1 "Strongly disagree"
2 "Disagree"
3 "Neither agree nor disagree"
4 "Agree"
5 "Strongly agree"
/career_level_1
1 "Student"
2 "PhD student"
3 "Postdoc"
4 "Senior researcher"
5 "Professor"
6 "Other"
.
CACHE.
EXECUTE.

View File

@ -0,0 +1,17 @@
ResponseId,age,sex,data_sharing_1,data_sharing_2,rdm_stmnt_1,rdm_stmnt_2,rdm_stmnt_3,rdm_stmnt_4,rdm_stmnt_5,career_level_1,career_level_2
Response ID,How old are you? Please enter your age in years.,Please indicate your sex.,Have you ever published data in a repository?,How many of your data sets have you published so far?,Please indicate how much you agree with the following statements - All my analyses are preregistered,Please indicate how much you agree with the following statements - Sharing my data is very important to me,Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues,Please indicate how much you agree with the following statements - I think research data management is overrated,Please indicate how much you agree with the following statements - Sharing data is bad scientific practice,What is your current career level?,How long have you been working in science (in years)?
"{""ImportId"":""_recordId""}","{""ImportId"":""QID3_TEXT""}","{""ImportId"":""QID1""}","{""ImportId"":""QID4""}","{""ImportId"":""QID7_TEXT""}","{""ImportId"":""QID2_1""}","{""ImportId"":""QID2_2""}","{""ImportId"":""QID2_3""}","{""ImportId"":""QID2_4""}","{""ImportId"":""QID2_5""}","{""ImportId"":""QID8""}","{""ImportId"":""QID9_TEXT""}"
R_8q7OpSkcuPT7SbI,42,f,No,1,Neither agree nor disagree,Agree,Strongly agree,Strongly disagree,Strongly disagree,Other,14
R_8Io4pbk0A1a37VL,28,f,Yes,1,Strongly agree,,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,1
R_2J9B4aLaasQ1m81,28,f,Yes,1 out of 4,Strongly agree,Strongly agree,Disagree,Disagree,Strongly disagree,PhD student,3
R_80kqWr3W48SgiUZ,43,f,Yes,6,Agree,Agree,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,3
R_8QpI8T0rjTjaPPr,30,f,Yes,4,Strongly agree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,5
R_8QoVv6THz1Qjtuz,28,f,Yes,1,Disagree,Disagree,Disagree,Agree,Strongly disagree,Professor,38
R_2F9fXxf3NedHqZl,25,d,No,0,Agree,Strongly agree,Disagree,Neither agree nor disagree,Disagree,PhD student,2
R_2foYj4iSgaBTkEO,24,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
R_83T6Oak5vI6GNJ7,30,f,Yes,1,Strongly agree,Agree,Neither agree nor disagree,Neither agree nor disagree,Strongly disagree,Postdoc,7
R_2Vz26rWsOLYwqnD,25,m,Yes,3,Agree,Agree,Neither agree nor disagree,Disagree,Disagree,PhD student,2
R_8HcBgUUm1BXFfhv,29,m,No,0,Strongly agree,Disagree,Disagree,Strongly disagree,Strongly disagree,PhD student,3
R_2P1TMDNlwm0gSIk,26,f,No,0,Disagree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1.5
R_225ffqhb7qRaIGO,28,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
R_2pXfOSq8DBImG6R,32,f,No,0,Neither agree nor disagree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,2
1 ResponseId age sex data_sharing_1 data_sharing_2 rdm_stmnt_1 rdm_stmnt_2 rdm_stmnt_3 rdm_stmnt_4 rdm_stmnt_5 career_level_1 career_level_2
2 Response ID How old are you? Please enter your age in years. Please indicate your sex. Have you ever published data in a repository? How many of your data sets have you published so far? Please indicate how much you agree with the following statements - All my analyses are preregistered Please indicate how much you agree with the following statements - Sharing my data is very important to me Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues Please indicate how much you agree with the following statements - I think research data management is overrated Please indicate how much you agree with the following statements - Sharing data is bad scientific practice What is your current career level? How long have you been working in science (in years)?
3 {"ImportId":"_recordId"} {"ImportId":"QID3_TEXT"} {"ImportId":"QID1"} {"ImportId":"QID4"} {"ImportId":"QID7_TEXT"} {"ImportId":"QID2_1"} {"ImportId":"QID2_2"} {"ImportId":"QID2_3"} {"ImportId":"QID2_4"} {"ImportId":"QID2_5"} {"ImportId":"QID8"} {"ImportId":"QID9_TEXT"}
4 R_8q7OpSkcuPT7SbI 42 f No 1 Neither agree nor disagree Agree Strongly agree Strongly disagree Strongly disagree Other 14
5 R_8Io4pbk0A1a37VL 28 f Yes 1 Strongly agree Neither agree nor disagree Disagree Strongly disagree PhD student 1
6 R_2J9B4aLaasQ1m81 28 f Yes 1 out of 4 Strongly agree Strongly agree Disagree Disagree Strongly disagree PhD student 3
7 R_80kqWr3W48SgiUZ 43 f Yes 6 Agree Agree Neither agree nor disagree Disagree Strongly disagree PhD student 3
8 R_8QpI8T0rjTjaPPr 30 f Yes 4 Strongly agree Agree Neither agree nor disagree Strongly disagree Strongly disagree PhD student 5
9 R_8QoVv6THz1Qjtuz 28 f Yes 1 Disagree Disagree Disagree Agree Strongly disagree Professor 38
10 R_2F9fXxf3NedHqZl 25 d No 0 Agree Strongly agree Disagree Neither agree nor disagree Disagree PhD student 2
11 R_2foYj4iSgaBTkEO 24 f No 0 Strongly agree Strongly agree Neither agree nor disagree Strongly disagree Strongly disagree PhD student 1
12 R_83T6Oak5vI6GNJ7 30 f Yes 1 Strongly agree Agree Neither agree nor disagree Neither agree nor disagree Strongly disagree Postdoc 7
13 R_2Vz26rWsOLYwqnD 25 m Yes 3 Agree Agree Neither agree nor disagree Disagree Disagree PhD student 2
14 R_8HcBgUUm1BXFfhv 29 m No 0 Strongly agree Disagree Disagree Strongly disagree Strongly disagree PhD student 3
15 R_2P1TMDNlwm0gSIk 26 f No 0 Disagree Agree Neither agree nor disagree Strongly disagree Strongly disagree PhD student 1.5
16 R_225ffqhb7qRaIGO 28 f No 0 Strongly agree Strongly agree Neither agree nor disagree Strongly disagree Strongly disagree PhD student 1
17 R_2pXfOSq8DBImG6R 32 f No 0 Neither agree nor disagree Agree Neither agree nor disagree Strongly disagree Strongly disagree PhD student 2

View File

@ -0,0 +1,66 @@
* Encoding: UTF-8.
TITLE "RDM_MS_SS2024".
SUBTITLE "".
VARIABLE LABELS
ResponseId "Response ID"
age "How old are you? Please enter your age in years."
sex "Please indicate your sex."
data_sharing_1 "Have you ever published data in a repository?"
data_sharing_2 "How many of your data sets have you published so far?"
rdm_stmnt_1 "Please indicate how much you agree with the following statements - All my analyses are preregistered"
rdm_stmnt_2 "Please indicate how much you agree with the following statements - Sharing my data is very important to me"
rdm_stmnt_3 "Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues"
rdm_stmnt_4 "Please indicate how much you agree with the following statements - I think research data management is overrated"
rdm_stmnt_5 "Please indicate how much you agree with the following statements - Sharing data is bad scientific practice"
career_level_1 "What is your current career level?"
career_level_2 "How long have you been working in science (in years)?"
.
VALUE LABELS
/sex
1 "m"
2 "f"
3 "d"
4 "not indicated"
/data_sharing_1
1 "No"
2 "Yes"
/rdm_stmnt_1
1 "Strongly disagree"
2 "Disagree"
3 "Neither agree nor disagree"
4 "Agree"
5 "Strongly agree"
/rdm_stmnt_2
1 "Strongly disagree"
2 "Disagree"
3 "Neither agree nor disagree"
4 "Agree"
5 "Strongly agree"
/rdm_stmnt_3
1 "Strongly disagree"
2 "Disagree"
3 "Neither agree nor disagree"
4 "Agree"
5 "Strongly agree"
/rdm_stmnt_4
1 "Strongly disagree"
2 "Disagree"
3 "Neither agree nor disagree"
4 "Agree"
5 "Strongly agree"
/rdm_stmnt_5
1 "Strongly disagree"
2 "Disagree"
3 "Neither agree nor disagree"
4 "Agree"
5 "Strongly agree"
/career_level_1
1 "Student"
2 "PhD student"
3 "Postdoc"
4 "Senior researcher"
5 "Professor"
6 "Other"
.
CACHE.
EXECUTE.

View File

@ -0,0 +1,15 @@
ResponseId;age;sex;data_sharing_1;data_sharing_2;rdm_stmnt_1;rdm_stmnt_2;rdm_stmnt_3;rdm_stmnt_4;rdm_stmnt_5;career_level_1;career_level_2
R_8q7OpSkcuPT7SbI;42;f;No;2;3;4;5;1;1;Other;14
R_8Io4pbk0A1a37VL;28;f;Yes;2;5;NA;3;1;1;PhD student;1
R_2J9B4aLaasQ1m81;28;f;Yes;2;5;5;2;1;1;PhD student;3
R_80kqWr3W48SgiUZ;43;f;Yes;6;4;4;3;1;1;PhD student;3
R_8QpI8T0rjTjaPPr;30;f;Yes;5;5;4;3;1;1;PhD student;5
R_8QoVv6THz1Qjtuz;28;f;Yes;2;2;2;2;1;1;Professor;38
R_2F9fXxf3NedHqZl;25;d;No;1;4;5;2;2;2;PhD student;2
R_2foYj4iSgaBTkEO;24;f;No;1;5;5;3;1;1;PhD student;1
R_83T6Oak5vI6GNJ7;30;f;Yes;2;5;4;3;1;1;Postdoc;7
R_2Vz26rWsOLYwqnD;25;m;Yes;4;4;4;3;2;2;PhD student;2
R_8HcBgUUm1BXFfhv;29;m;No;1;5;2;2;1;1;PhD student;3
R_2P1TMDNlwm0gSIk;26;f;No;1;2;4;3;1;1;PhD student;1.5
R_225ffqhb7qRaIGO;28;f;No;1;5;5;3;1;1;PhD student;1
R_2pXfOSq8DBImG6R;32;f;No;1;3;4;3;1;1;PhD student;2
1 ResponseId age sex data_sharing_1 data_sharing_2 rdm_stmnt_1 rdm_stmnt_2 rdm_stmnt_3 rdm_stmnt_4 rdm_stmnt_5 career_level_1 career_level_2
2 R_8q7OpSkcuPT7SbI 42 f No 2 3 4 5 1 1 Other 14
3 R_8Io4pbk0A1a37VL 28 f Yes 2 5 NA 3 1 1 PhD student 1
4 R_2J9B4aLaasQ1m81 28 f Yes 2 5 5 2 1 1 PhD student 3
5 R_80kqWr3W48SgiUZ 43 f Yes 6 4 4 3 1 1 PhD student 3
6 R_8QpI8T0rjTjaPPr 30 f Yes 5 5 4 3 1 1 PhD student 5
7 R_8QoVv6THz1Qjtuz 28 f Yes 2 2 2 2 1 1 Professor 38
8 R_2F9fXxf3NedHqZl 25 d No 1 4 5 2 2 2 PhD student 2
9 R_2foYj4iSgaBTkEO 24 f No 1 5 5 3 1 1 PhD student 1
10 R_83T6Oak5vI6GNJ7 30 f Yes 2 5 4 3 1 1 Postdoc 7
11 R_2Vz26rWsOLYwqnD 25 m Yes 4 4 4 3 2 2 PhD student 2
12 R_8HcBgUUm1BXFfhv 29 m No 1 5 2 2 1 1 PhD student 3
13 R_2P1TMDNlwm0gSIk 26 f No 1 2 4 3 1 1 PhD student 1.5
14 R_225ffqhb7qRaIGO 28 f No 1 5 5 3 1 1 PhD student 1
15 R_2pXfOSq8DBImG6R 32 f No 1 3 4 3 1 1 PhD student 2

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

BIN
figures/codebook_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

BIN
figures/codebook_2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB