Slides and example for third session
This commit is contained in:
parent
102834032c
commit
f1f7f35988
844
03_data_organisation/03_data_organisation.tex
Normal file
844
03_data_organisation/03_data_organisation.tex
Normal file
@ -0,0 +1,844 @@
|
||||
\documentclass[aspectratio=169]{beamer}
|
||||
|
||||
\usepackage{listings}
|
||||
\usepackage[utf8,latin1]{inputenc}
|
||||
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
|
||||
\addbibresource{../literature/lit.bib}
|
||||
|
||||
\usepackage{fancyvrb}
|
||||
\usepackage{fontawesome5} % get icons
|
||||
\usepackage{multirow}
|
||||
\usepackage{color, colortbl}
|
||||
|
||||
\usepackage{tikz}
|
||||
\usetikzlibrary{fit}
|
||||
\usepackage[edges]{forest}
|
||||
|
||||
\lstset{language=R,%
|
||||
backgroundcolor=\color{iwmgray!15!white},
|
||||
basicstyle=\ttfamily\color{iwmgray},
|
||||
frame=none,
|
||||
commentstyle=\slshape\color{iwmgreen},
|
||||
keywordstyle=\bfseries\color{iwmgray},
|
||||
identifierstyle=\color{iwmpurple},
|
||||
stringstyle=\color{iwmblue},
|
||||
numbers=none,%left,numberstyle=\tiny,
|
||||
basewidth={.5em, .4em},
|
||||
showstringspaces=false,
|
||||
emphstyle=\color{red!50!white}}
|
||||
|
||||
\makeatletter \def\newblock{\beamer@newblock} \makeatother
|
||||
|
||||
\beamertemplatenavigationsymbolsempty
|
||||
\setbeamertemplate{itemize items}[circle]
|
||||
\setbeamertemplate{section in toc}[circle]
|
||||
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
|
||||
\setbeamercolor{block body}{bg=iwmorange!50!white}
|
||||
\setbeamercolor{block title}{fg=white, bg=iwmorange}
|
||||
% Definitions for biblatex
|
||||
\setbeamercolor{bibliography entry note}{fg=iwmgray}
|
||||
\setbeamercolor{bibliography entry author}{fg=iwmgray}
|
||||
\setbeamertemplate{bibliography item}{}
|
||||
|
||||
\definecolor{iwmorange}{RGB}{255,105,0}
|
||||
\definecolor{iwmgray}{RGB}{67,79,79}
|
||||
\definecolor{iwmblue}{RGB}{60,180,220}
|
||||
\definecolor{iwmgreen}{RGB}{145,200,110}
|
||||
\definecolor{iwmpurple}{RGB}{120,0,75}
|
||||
|
||||
\setbeamercolor{title}{fg=iwmorange}
|
||||
\setbeamercolor{frametitle}{fg=iwmorange}
|
||||
\setbeamercolor{structure}{fg=iwmorange}
|
||||
\setbeamercolor{normal text}{fg=iwmgray}
|
||||
\setbeamercolor{author}{fg=iwmgray}
|
||||
\setbeamercolor{date}{fg=iwmgray}
|
||||
|
||||
\newcommand{\vect}[1]{\mathbf{#1}}
|
||||
\newcommand{\mat}[1]{\mathbf{#1}}
|
||||
\newcommand{\gvect}[1]{\boldsymbol{#1}}
|
||||
\newcommand{\gmat}[1]{\boldsymbol{#1}}
|
||||
|
||||
\AtBeginSection[]{
|
||||
\frame{
|
||||
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
|
||||
|
||||
\setbeamertemplate{headline}{
|
||||
\begin{beamercolorbox}{section in head}
|
||||
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
|
||||
\end{beamercolorbox}
|
||||
}
|
||||
|
||||
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
|
||||
|
||||
\title{Data organisation for effective research data management}
|
||||
\author{Nora Wickelmaier}
|
||||
\date{June 10, 2024}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{frame}{}
|
||||
\thispagestyle{empty}
|
||||
\titlepage
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Data request}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .55]{../figures/email_data_request_2024_01}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Data folder for the data requested}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .6]{../figures/email_data_request_2024_03}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{What is bad about this data organisation?}
|
||||
% slido
|
||||
\centering
|
||||
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 3}
|
||||
|
||||
\url{https://app.sli.do/event/3S1Bn3Tjknuk5J5WiqAYzG}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[<+->]{Bad things about this data organisation}
|
||||
\begin{itemize}
|
||||
\item Raw and processed data are in the same folder
|
||||
\item File naming does not sort in a sensible way: Best order would be first
|
||||
by subject, then by session
|
||||
\item Data and data scripts are in the same folder
|
||||
\item Data scripts are not numbered, unclear in what order they need to be
|
||||
executed
|
||||
\item There are plot files (PDFs) between the data and code files
|
||||
\item It is unclear which are the final and processed data files
|
||||
\item The final data files are not stored in an interoperable format: There
|
||||
is only an \texttt{.RData} file that (probably) contains the final data
|
||||
which was used for further analyses
|
||||
\item There is no documentation whatsoever
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Topics for this semester}
|
||||
\centering
|
||||
\begin{tabular}{ll}
|
||||
\hline
|
||||
Date & Topic \\
|
||||
\hline
|
||||
2024-05-13 & Introduction to data management \\
|
||||
2024-05-27 & Workflow \\
|
||||
\only<1>{2024-06-10}\only<2>{\bf 2024-06-10} & \only<1>{Data organisation}\only<2>{\bf Data organisation}\\
|
||||
2024-06-24 & Data sharing \\
|
||||
2024-07-08 & Clean coding \\
|
||||
2024-07-22 & Version control \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{frame}
|
||||
|
||||
% * different data sources
|
||||
% * content README file
|
||||
% * best arrangement of data
|
||||
% * redundancy
|
||||
% * anonymizing/pseudonymizing data
|
||||
|
||||
\section{Folder organisation}
|
||||
|
||||
\begin{frame}[<+->]{Some general rules}
|
||||
\begin{itemize}
|
||||
\item One project, one folder
|
||||
\item Add README file at top level
|
||||
\item Raw data are in a separate folder (and stay separate!)
|
||||
\item Have a code folder
|
||||
\item It is often a good idea to separate your data analysis from papers,
|
||||
talks, etc. (especially if you want to publish your data)
|
||||
\item Have designated folders where stuff is written to (e.\,g.,
|
||||
\texttt{results}, \texttt{figures}, \texttt{processed}, etc.)
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{One possible example!}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{Analysis folder}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
|
||||
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (n2.east) -- (o1.west);
|
||||
\path (n2.east) -- (o2.west);
|
||||
\path (n2.east) -- (o3.west);
|
||||
\path (n2.east) -- (o4.west);
|
||||
\path (n2.east) -- (o5.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{Analysis folder}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
|
||||
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (n2.east) -- (o1.west);
|
||||
\path (n2.east) -- (o2.west);
|
||||
\path (n2.east) -- (o3.west);
|
||||
\path (n2.east) -- (o4.west);
|
||||
\path (n2.east) -- (o5.west);
|
||||
% third level
|
||||
\node[text width = 5cm] (p1) at (12, 0) {\faIcon[regular]{file} \verb+01_preprocessing.R+};
|
||||
\node[text width = 5cm] (p2) at (12, -0.7) {\faIcon[regular]{file} \verb+02_descriptives.R+};
|
||||
\node[text width = 5cm] (p3) at (12, -1.4) {\faIcon[regular]{file} \verb+03_modeling.R+};
|
||||
\node[text width = 5cm] (p4) at (12, -2.1) {\faIcon[regular]{file} \verb+04_plots.R+};
|
||||
\path (o1.center) -- (p1.west);
|
||||
\path (o1.center) -- (p2.west);
|
||||
\path (o1.center) -- (p3.west);
|
||||
\path (o1.center) -- (p4.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{Analysis folder}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
|
||||
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (n2.east) -- (o1.west);
|
||||
\path (n2.east) -- (o2.west);
|
||||
\path (n2.east) -- (o3.west);
|
||||
\path (n2.east) -- (o4.west);
|
||||
\path (n2.east) -- (o5.west);
|
||||
% third level
|
||||
\node[text width = 5cm] (p1) at (12, 0) {\faIcon[regular]{file} \verb+subj1_ses01.txt+};
|
||||
\node[text width = 5cm] (p2) at (12, -0.7) {\faIcon[regular]{file} \verb+subj1_ses02.txt+};
|
||||
\node[text width = 5cm] (p3) at (12, -1.4) {\faIcon[regular]{file} \verb+subj2_ses01.txt+};
|
||||
\node[text width = 5cm] (p4) at (12, -2.1) {\faIcon[regular]{file} \verb+subj2_ses02.txt+};
|
||||
\node[text width = 5cm] (p5) at (12, -2.8) {\faIcon[regular]{file} \dots};
|
||||
\path (o2.center) -- (p1.west);
|
||||
\path (o2.center) -- (p2.west);
|
||||
\path (o2.center) -- (p3.west);
|
||||
\path (o2.center) -- (p4.west);
|
||||
\path (o2.center) -- (p5.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{Analysis folder}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
|
||||
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (n2.east) -- (o1.west);
|
||||
\path (n2.east) -- (o2.west);
|
||||
\path (n2.east) -- (o3.west);
|
||||
\path (n2.east) -- (o4.west);
|
||||
\path (n2.east) -- (o5.west);
|
||||
% third level
|
||||
\node[text width = 5cm] (p1) at (12, -0.7) {\faIcon[regular]{file}
|
||||
\verb+data_all-subj.csv+};
|
||||
\node[text width = 5cm] (p2) at (12, -1.4) {\faIcon[regular]{file}
|
||||
\verb+data_all-subj.RData+};
|
||||
\node[text width = 5cm] (p3) at (12, -2.1) {\faIcon[regular]{file}
|
||||
\verb+eval_model1.csv+};
|
||||
\node[text width = 5cm] (p4) at (12, -2.8) {\faIcon[regular]{file}
|
||||
\verb+eval_model2.csv+};
|
||||
\path (o4.center) -- (p1.west);
|
||||
\path (o4.center) -- (p2.west);
|
||||
\path (o4.center) -- (p3.west);
|
||||
\path (o4.center) -- (p4.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\pause
|
||||
The analysis folder you might want to share on OSF, Github, etc.
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{Dissemination folder}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node[text width = 3.2cm] (n3) at (3.6, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+paper+};
|
||||
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+talks+};
|
||||
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
|
||||
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\node (o5) at (8.5, -2.8) {\faIcon{folder} \verb+tables+};
|
||||
\path (n3.east) -- (o1.west);
|
||||
\path (n3.east) -- (o2.west);
|
||||
\path (n3.east) -- (o3.west);
|
||||
\path (n3.east) -- (o4.west);
|
||||
\path (n3.east) -- (o5.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\pause
|
||||
Having separate folders for figures and tables helps you keep track of them
|
||||
for your paper and talks
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Figures and tables}
|
||||
\begin{itemize}
|
||||
\item Most of us (including me!) are not at a stage where we are
|
||||
writing our papers or talks as reproducible documents
|
||||
\pause
|
||||
\item It is still a good idea to create tables and figures in R and keep the
|
||||
code easily accessible
|
||||
\pause
|
||||
\item One suggestion
|
||||
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4.2cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
% figures
|
||||
\node (fig) at (0, 0) {\faIcon{folder} \verb+figures+};
|
||||
\node (n1) at (4, 0) {\faIcon[regular]{file} \verb+h1_barplot.R+};
|
||||
\node (n2) at (4, -0.7) {\faIcon[regular]{file} \verb+h1_barplot.png+};
|
||||
\path (fig.center) -- (n1.west);
|
||||
\path (fig.center) -- (n2.west);
|
||||
% tables
|
||||
\node (tab) at (0, -1.5) {\faIcon{folder} \verb+tables+};
|
||||
\node (o1) at (4, -1.5) {\faIcon[regular]{file} \verb+h1_mean-table.Rmd+};
|
||||
\node (o2) at (4, -2.2) {\faIcon[regular]{file} \verb+h1_mean-table.docx+};
|
||||
\path (tab.center) -- (o1.west);
|
||||
\path (tab.center) -- (o2.west);
|
||||
\end{tikzpicture}
|
||||
\pause
|
||||
\item I export the data for figures and tables from \texttt{analysis/code}
|
||||
to \texttt{dissemination/results} so the dissemination folder is
|
||||
self-contained
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Several data sources}
|
||||
\begin{itemize}
|
||||
\item When you have several different data sources like questionnaires and
|
||||
eye-tracking data keep them in separate folders
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node (data) at (0, 0) {\faIcon{folder} \verb+data+};
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+eyetracking+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+qualtrics+};
|
||||
\path (data.center) -- (n1.west);
|
||||
\path (data.center) -- (n2.west);
|
||||
\end{tikzpicture}
|
||||
\pause
|
||||
\item Process them separately, e.\,g., with
|
||||
\verb+01a_preprocessing_eyetracking.R+ and
|
||||
\verb+01b_preprocessing_surveys.R+ and then \verb+02_combine-data.R+
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 5cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node (results) at (0, 0) {\faIcon{folder} \verb+results+};
|
||||
\node (n1) at (4, 0) {\faIcon[regular]{file} \verb+data_eyetracking.csv+};
|
||||
\node (n2) at (4, -0.7) {\faIcon[regular]{file} \verb+data_surveys.csv+};
|
||||
\node (n3) at (4, -1.4) {\faIcon[regular]{file} \verb+data_complete.csv+};
|
||||
\path (results.center) -- (n1.west);
|
||||
\path (results.center) -- (n2.west);
|
||||
\path (results.center) -- (n3.west);
|
||||
\end{tikzpicture}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Toy example with 11 questions}
|
||||
Thank you everybody for filling out our little toy survey in Qualtrics!
|
||||
\vfill
|
||||
\tiny
|
||||
\begin{tabular}{lllll}
|
||||
\hline
|
||||
ResponseId & age & sex & data\_sharing\_1 & data\_sharing\_2 \\
|
||||
\hline
|
||||
R\_225ffqhb7qRaIGO:1 & Min. :24.00 & m : 2 & No :7 & Min. :1.000 \\
|
||||
R\_2F9fXxf3NedHqZl:1 & 1st Qu.:26.50 & f :11 & Yes:7 & 1st Qu.:1.000 \\
|
||||
R\_2foYj4iSgaBTkEO:1 & Median :28.00 & d : 1 & & Median :2.000 \\
|
||||
R\_2J9B4aLaasQ1m81:1 & Mean :29.86 & not indicated: 0 & & Mean :2.214 \\
|
||||
R\_2P1TMDNlwm0gSIk:1 & 3rd Qu.:30.00 & & & 3rd Qu.:2.000 \\
|
||||
R\_2pXfOSq8DBImG6R:1 & Max. :43.00 & & & Max. :6.000 \\
|
||||
(Other) :8 & & & & \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
|
||||
\vspace{.5cm}
|
||||
\begin{tabular}{lllllll}
|
||||
\hline
|
||||
rdm\_stmnt\_1 & rdm\_stmnt\_2 & rdm\_stmnt\_3 & rdm\_stmnt\_4 & rdm\_stmnt\_5 & career\_level\_1 & career\_level\_2 \\
|
||||
\hline
|
||||
Min. :2.000 & Min. :2 & Min. :2.000 & Min. :1.000 & Min. :1.000 & Student : 0 & Min. : 1.000 \\
|
||||
1st Qu.:3.250 & 1st Qu.:4 & 1st Qu.:2.250 & 1st Qu.:1.000 & 1st Qu.:1.000 & PhD student :11 & 1st Qu.: 1.625 \\
|
||||
Median :4.500 & Median :4 & Median :3.000 & Median :1.000 & Median :1.000 & Postdoc : 1 & Median : 2.500 \\
|
||||
Mean :4.071 & Mean :4 & Mean :2.857 & Mean :1.143 & Mean :1.143 & Senior researcher: 0 & Mean : 5.964 \\
|
||||
3rd Qu.:5.000 & 3rd Qu.:5 & 3rd Qu.:3.000 & 3rd Qu.:1.000 & 3rd Qu.:1.000 & Professor : 1 & 3rd Qu.: 4.500 \\
|
||||
Max. :5.000 & Max. :5 & Max. :5.000 & Max. :2.000 & Max. :2.000 & Other : 1 & Max. :38.000 \\
|
||||
& NA's :1 & & & & & \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{frame}
|
||||
|
||||
% print(xtable::xtable(summary(dat[, 1:5])), include.rownames = FALSE)
|
||||
% print(xtable::xtable(summary(dat[, 6:12])), include.rownames = FALSE)
|
||||
|
||||
\begin{frame}[fragile]{Folder structure for toy example}
|
||||
{One possible structure!}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4.3cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node (ex) at (0, 0) {\faIcon{folder} \verb+example+};
|
||||
\node (n1) at (3, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (n2) at (3, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (n3) at (3, -1.4) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (ex.center) -- (n1.west);
|
||||
\path (ex.center) -- (n2.west);
|
||||
\path (ex.center) -- (n3.west);
|
||||
|
||||
\node (o1) at (7, 0.7) {\faIcon[regular]{file} \verb+01_preprocessing.R+};
|
||||
\node (o2) at (7, -0.7) {\faIcon{folder} \verb+codebook+};
|
||||
\node (o3) at (7, -1.4) {\faIcon{folder} \verb+rawdata+};
|
||||
\node (o4) at (7, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\path (n1.center) -- (o1.west);
|
||||
\path (n2.center) -- (o2.west);
|
||||
\path (n2.center) -- (o3.west);
|
||||
\path (n2.center) -- (o4.west);
|
||||
|
||||
\node (p1) at (11, -0.7) {\faIcon[regular]{file} \verb+codebook_01.R+};
|
||||
\node (p2) at (11, -1.4) {\faIcon[regular]{file} \verb+codebook_01.xlsx+};
|
||||
\node (p3) at (11, -2.1) {\dots};
|
||||
|
||||
\path (o2.center) -- (p1.west);
|
||||
\path (o2.center) -- (p2.west);
|
||||
\path (o2.center) -- (p3.west);
|
||||
\end{tikzpicture}
|
||||
|
||||
\end{frame}
|
||||
|
||||
\section{Metadata}
|
||||
|
||||
\begin{frame}{Metadata answers questions}
|
||||
\begin{itemize}
|
||||
\item {\bf Who} created the data?
|
||||
\item {\bf Why} was the data created?
|
||||
\item {\bf When} was the data created?
|
||||
\item {\bf Where} is the data?
|
||||
\item {\bf How} was the data created?
|
||||
\item {\bf What} is the content of the data?
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\hfill{\tiny \citet{Wilbrandt2023}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata}
|
||||
\begin{block}{Metadata}
|
||||
\dots is data about data.\\
|
||||
\dots can be \emph{descriptive}, \emph{structural}, or \emph{administrative}.
|
||||
\end{block}
|
||||
\vfill
|
||||
\begin{columns}
|
||||
\begin{column}[t]{.5\textwidth}
|
||||
Contains information on origin and background of data like
|
||||
\begin{itemize}
|
||||
\item Who, when, why, how, \dots
|
||||
\item Used resources
|
||||
\item Used abbreviations, units, names
|
||||
\item Licenses
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\begin{column}[t]{.5\textwidth}
|
||||
Data can be anything like
|
||||
\begin{itemize}
|
||||
\item Book content
|
||||
\item Pictures or audio files
|
||||
\item Website content or a blog post
|
||||
\item Journal paper
|
||||
\item Research data
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata examples}
|
||||
{Photo}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .31]{../figures/metadata_photo}
|
||||
\end{center}
|
||||
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata examples}
|
||||
{Book}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .36]{../figures/metadata_book}
|
||||
\end{center}
|
||||
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata examples}
|
||||
{Webpage}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .27]{../figures/metadata_webpage}
|
||||
\end{center}
|
||||
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata examples}
|
||||
{WORD document}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .23]{../figures/metadata_word_document}
|
||||
\end{center}
|
||||
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata for research data}
|
||||
\begin{tikzpicture}
|
||||
\node[font=\Large] (n1) at (0,0) {\bf \color{iwmorange} Study};
|
||||
|
||||
\node[font=\large] (i1) at (0,-1) {$\bullet$ Persons};
|
||||
\node[font=\large] (i2) at (.36,-1.5) {$\bullet$ Background};
|
||||
\node[font=\large] (i3) at (.03,-2) {$\bullet$ Funding};
|
||||
\node[font=\large] (i4) at (-.38,-2.5) {$\bullet$ \dots};
|
||||
\node[draw=iwmorange, thick, fit={(n1) (i1) (i2) (i3) (i4)}, inner sep=10pt] (box) {};
|
||||
|
||||
\node[font=\Large] (n2) at (5,0) {\bf \color{iwmorange} Data set};
|
||||
|
||||
\node[font=\large] (j1) at (4.3,-1) {$\bullet$ Files};
|
||||
\node[font=\large] (j2) at (4.57,-1.5) {$\bullet$ Sources};
|
||||
\node[font=\large] (j3) at (4.65,-2) {$\bullet$ Methods};
|
||||
\node[font=\large] (j4) at (4.18,-2.5) {$\bullet$ \dots};
|
||||
\node[draw=iwmorange, thick, fit={(n2) (j1) (j2) (j3) (j4)}, inner sep=10pt] (box) {};
|
||||
|
||||
\node[font=\Large] (n3) at (10,0) {\bf \color{iwmorange} Variables};
|
||||
|
||||
\node[font=\large] (k1) at (9.7,-1) {$\bullet$ Data type};
|
||||
\node[font=\large] (k2) at (9.69,-1.5) {$\bullet$ Scale unit};
|
||||
\node[font=\large] (k3) at (9.85,-2) {$\bullet$ Value range};
|
||||
\node[font=\large] (k4) at (9.12,-2.5) {$\bullet$ \dots};
|
||||
\node[draw=iwmorange, thick, fit={(n3) (k1) (k2) (k3) (k4)}, inner sep=10pt] (box) {};
|
||||
|
||||
\draw[-latex, thick] (n1) -- (n2);
|
||||
\draw[-latex, thick] (n2) -- (n3);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\hfill\tiny \url{https://datamanagement.hms.harvard.edu/collect/readme-files}
|
||||
\end{frame}
|
||||
|
||||
\section{README files}
|
||||
|
||||
\begin{frame}{README files}
|
||||
\begin{itemize}
|
||||
\item Can be used to give information about all levels in a research
|
||||
project: study/project, data set, variables; either in one README or in
|
||||
several ones
|
||||
\item Should provide a clear and concise description of all relevant details
|
||||
about data collection, processing, and analysis
|
||||
\item README files are created for different purposes:
|
||||
\begin{itemize}
|
||||
\item to document changes to files or file names within a folder
|
||||
\item to explain file naming conventions, practices, etc.\ ``in
|
||||
general'' for future reference
|
||||
\item to specifically accompany files/data being deposited in a
|
||||
repository
|
||||
\end{itemize}
|
||||
\item Creating a README file at the beginning of your research process,
|
||||
and updating it consistently throughout your research, will help you
|
||||
to compile a final README file when your data is ready for deposit
|
||||
\item Find a template here:
|
||||
\url{https://cornell.app.box.com/v/ReadmeTemplate}
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\hfill\tiny \url{https://datamanagement.hms.harvard.edu/collect/readme-files}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Study/project}{README on top level}
|
||||
\begin{itemize}
|
||||
\item Project name and purpose
|
||||
\item Funding information (process number!)
|
||||
\item Ethics approved? LEK number!
|
||||
\item Person(s) responsible for study conduction
|
||||
\item One or several studies? Infos about them
|
||||
\item Time/Duration of project
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Data set}{README accompanying data set(s)}
|
||||
\begin{itemize}
|
||||
\item One or more data sets?
|
||||
\item Time of data collection
|
||||
\item Person(s) responsible for data collection
|
||||
\item File organisation
|
||||
\item Naming conventions
|
||||
\item Preprocessing methods
|
||||
\item Anything that is special about the data set(s)
|
||||
\item Number of subjects
|
||||
\item Variables
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Variables}{README accompanying a specific data set}
|
||||
\begin{itemize}
|
||||
\item You can use a README (or text file called \texttt{codebook.txt} or
|
||||
similar) to document your variables
|
||||
\item Especially, if you only have a few variables, this is an easy and fast
|
||||
way to document them
|
||||
\item If you are working with extensive surveys or questionnaires, it might
|
||||
be a good time investment to create a more elaborate codebook
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\section{Codebooks}
|
||||
|
||||
\begin{frame}{What information about variables should a codebook include?}
|
||||
% slido
|
||||
\centering
|
||||
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 3}
|
||||
|
||||
\url{https://app.sli.do/event/3S1Bn3Tjknuk5J5WiqAYzG}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{A codebook should include}
|
||||
\begin{tabular}{lp{11cm}}
|
||||
\hline
|
||||
Variable name & Usually some abbreviation like \texttt{pna01} \\
|
||||
Variable label & Brief description to identify variable \\
|
||||
Question text & If applicable, exact wording from survey question \\
|
||||
Values & Values variable can take (e.\,g, 1 to 5) \\
|
||||
Value labels & If applicable, textual descriptions of the values \\
|
||||
Statistics & For example, range, mean, standard deviation for
|
||||
numeric variables; frequencies and percentages for categorical variables \\
|
||||
Missing data & If applicable, values and labels of missing data \\
|
||||
Notes & Additional notes, remarks, or comments; for measures or
|
||||
questions from copyrighted instruments, the notes field can be used to
|
||||
cite the source \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\vfill
|
||||
|
||||
\hfill\tiny \url{https://www.icpsr.umich.edu/web/ICPSR/cms/1983}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Codebooks}
|
||||
\begin{itemize}
|
||||
\item There are many different ways to create a codebook
|
||||
\item It can be a README, some other plain text file, a table (stored as CSV
|
||||
or XLSX), a WORD document, or PDF
|
||||
\item For a short questionnaire, it can be sufficient to export it as a PDF
|
||||
\item Let's walk through a couple of options\dots
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Option 1 -- Toy example with 11 questions}
|
||||
{Simple PDF}
|
||||
\begin{columns}
|
||||
\begin{column}{.5\textwidth}
|
||||
\begin{center}
|
||||
\vspace{-.4cm}
|
||||
Export from Qualtrics\\
|
||||
\includegraphics[scale = .3]{../figures/codebook_1.png}
|
||||
\end{center}
|
||||
\end{column}
|
||||
\begin{column}{.6\textwidth}
|
||||
\begin{itemize}
|
||||
\item For a simple questionnaire like this, the exported WORD document
|
||||
from Qualtrics exported to PDF might be sufficient as a codebook
|
||||
\item For longer questionnaires, the WORD document can still be a good
|
||||
starting point to create a more elaborate codebook
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Option 2 -- Toy example with 11 questions}
|
||||
{Plain text file}
|
||||
\begin{center}
|
||||
\vspace{-.3cm}
|
||||
\footnotesize
|
||||
\begin{lstlisting}[language = bash, identifierstyle=\color{iwmgray}]
|
||||
sex. Please indicate your sex.
|
||||
-------------------------------------------------------------------------------
|
||||
-1. m
|
||||
-2. f
|
||||
-3. d
|
||||
-4. not indicated
|
||||
|
||||
age. How old are you? Please enter your age in years.
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
||||
|
||||
data_sharing_1. Have you ever published data in a repository?
|
||||
-------------------------------------------------------------------------------
|
||||
-1. No
|
||||
-2. Yes
|
||||
\end{lstlisting}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Option 3 -- Toy example with 11 questions}
|
||||
{Creating a simple codebook in R ``by hand''}
|
||||
\footnotesize
|
||||
\begin{lstlisting}
|
||||
load("results/data_rdm-ms-ss2024_cleaned.RData")
|
||||
codebook <- data.frame(var_name = names(dat),
|
||||
var_text = c("Response Id", "Please indicate your sex.",
|
||||
"How old are you? Please enter your age in years.",
|
||||
...
|
||||
"Sharing data is bad scientific practice",
|
||||
"What is your current career level?",
|
||||
"How long have you been working in science (in years)?"))
|
||||
|
||||
codebook$type <- sapply(dat, class)
|
||||
codebook$n <- sapply(dat, length)
|
||||
codebook$mean <- sapply(dat,
|
||||
function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
|
||||
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
|
||||
|
||||
openxlsx::write.xlsx(codebook, file = "codebook/codebook_01.xlsx")
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Option 3 -- Toy example with 11 questions}
|
||||
{Creating a simple codebook in R ``by hand''}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .6]{../figures/codebook_2.png}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Option 4 -- Toy example with 11 questions}
|
||||
{Using the codebook package in R}
|
||||
\begin{itemize}
|
||||
\item When you export a qualtrics questionnaire as SPSS file and import it
|
||||
into R using the haven package, you can use RMarkdown to create an
|
||||
elaborate HTML codebook
|
||||
\item It works best for classical questionnaire items
|
||||
\item In our example, the survey is not formatted well enough for the
|
||||
generated codebook to be completely correct
|
||||
\end{itemize}
|
||||
\footnotesize
|
||||
\begin{lstlisting}
|
||||
#' ---
|
||||
#' title: Codebook for Data Set "RDM MS SS 2024"
|
||||
#' author: Nora Wickelmaier
|
||||
#' ---
|
||||
|
||||
#+ echo = FALSE
|
||||
dat <- haven::read_spss("../rawdata/RDM_MS_SS2024_download_2024-06-04.sav")
|
||||
codebook::codebook(dat)
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
|
||||
\appendix
|
||||
%%\begin{frame}[allowframebreaks]{References}
|
||||
\begin{frame}{References}
|
||||
%\renewcommand{\bibfont}{\small}
|
||||
\printbibliography
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
||||
|
47
03_data_organisation/example/README.md
Normal file
47
03_data_organisation/example/README.md
Normal file
@ -0,0 +1,47 @@
|
||||
# Toy data set for the methods seminar on data management SS2024
|
||||
|
||||
## Responsible person
|
||||
|
||||
Nora Wickelmaier
|
||||
Referentin Forschungsmethoden und Forschungsdatenmanagement
|
||||
Leibniz-Institut für Wissensmedien (IWM)
|
||||
n.wickelmaier@iwm-tuebingen.de
|
||||
|
||||
## Folder structure and naming conventions
|
||||
|
||||
```
|
||||
/example/
|
||||
|
|
||||
|- /code/
|
||||
|- /data/
|
||||
|- /codebook/
|
||||
|- /rawdata/
|
||||
|- /results/
|
||||
```
|
||||
|
||||
The `code` folder contains analysis scripts written in R. The scripts are
|
||||
numbered, indicating the order they should be executed in.
|
||||
|
||||
The `data` folder contains all folders associated with data and its
|
||||
documentation.
|
||||
|
||||
The `code` folder with contains different codebook options and R scripts that
|
||||
create these codebooks. If the codebook is created by an R script, the script
|
||||
and the codebook are named identically, e.g., `codebook_01.R` and
|
||||
`codebook_01.xslx`.
|
||||
|
||||
The `rawdata` folder contains the downloads from Qualtrics. In Qualtrics, the
|
||||
variables have been selected and ordered and then downloaded, without the
|
||||
additional columns Qualtrics adds by default. The naming convention for the
|
||||
downloaded files is
|
||||
```
|
||||
RDM_MS_SS2024_download_<YYYY-MM-DD>.<fileending>
|
||||
```
|
||||
No other files than the downloads from qualtrics should go into this folder!
|
||||
|
||||
The `results` folder contains processed data. The scripts in `/code/` process
|
||||
the data from `/rawdata/` and saves the files containing the processed data to
|
||||
`/results/`. Data can be exported as CSV files or RData files. If different file
|
||||
formats contain the same data, they should be named identically, e.g.,
|
||||
`data_rdm-ms-ss2024_cleaned.csv` and `data_rdm-ms-ss2024_cleaned.RData`.
|
||||
|
78
03_data_organisation/example/code/01_preprocessing.R
Normal file
78
03_data_organisation/example/code/01_preprocessing.R
Normal file
@ -0,0 +1,78 @@
|
||||
# 01_preprocessing.R
|
||||
#
|
||||
# Cleaning up data for toy data set Methods Seminar SS2024
|
||||
#
|
||||
# Input: RDM_MS_SS2024_download_2024-06-07.csv
|
||||
# Output: results/data_rdm-ms-ss2024_cleaned.csv
|
||||
# results/data_rdm-ms-ss2024_cleaned.RData
|
||||
#
|
||||
# created: 2024-06-03
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
|
||||
|
||||
dat <- read.table("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv",
|
||||
sep = ",", skip = 3, stringsAsFactors = TRUE, na.string = "")
|
||||
|
||||
names(dat) <-
|
||||
readLines("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv", 1) |>
|
||||
strsplit(split = ",") |>
|
||||
unlist()
|
||||
|
||||
# Clean up variables
|
||||
dat$ResponseId <- factor(dat$ResponseId)
|
||||
|
||||
dat$sex <- factor(dat$sex,
|
||||
levels = c("m", "f", "d", "not indicated"))
|
||||
|
||||
dat$data_sharing_1 <- factor(dat$data_sharing_1,
|
||||
levels = c("No", "Yes"))
|
||||
|
||||
dat$career_level_1 <- factor(dat$career_level_1,
|
||||
levels = c("Student", "PhD student", "Postdoc",
|
||||
"Senior researcher", "Professor",
|
||||
"Other"))
|
||||
|
||||
dat$rdm_stmnt_1 <- factor(dat$rdm_stmnt_1,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_2 <- factor(dat$rdm_stmnt_2,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_3 <- factor(dat$rdm_stmnt_3,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_4 <- factor(dat$rdm_stmnt_4,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_5 <- factor(dat$rdm_stmnt_5,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
## Fix data_sharing_2
|
||||
dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1
|
||||
dat$data_sharing_2 <- as.numeric(dat$data_sharing_2)
|
||||
|
||||
# Create numeric statement variables
|
||||
|
||||
dat$rdm_stmnt_1 <- as.numeric(dat$rdm_stmnt_1)
|
||||
dat$rdm_stmnt_2 <- as.numeric(dat$rdm_stmnt_2)
|
||||
dat$rdm_stmnt_3 <- as.numeric(dat$rdm_stmnt_3)
|
||||
dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_5)
|
||||
dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_4)
|
||||
dat$rdm_stmnt_5 <- as.numeric(dat$rdm_stmnt_5)
|
||||
|
||||
# Save cleaned data set
|
||||
write.table(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.csv", sep = ";",
|
||||
row.names = FALSE, quote = FALSE)
|
||||
|
||||
save(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.RData")
|
||||
|
BIN
03_data_organisation/example/data/codebook/RDM_MS_SS2024.docx
Normal file
BIN
03_data_organisation/example/data/codebook/RDM_MS_SS2024.docx
Normal file
Binary file not shown.
41
03_data_organisation/example/data/codebook/codebook_01.R
Normal file
41
03_data_organisation/example/data/codebook/codebook_01.R
Normal file
@ -0,0 +1,41 @@
|
||||
# codebook_generation_01.R
|
||||
#
|
||||
# Code generation example
|
||||
#
|
||||
# Input: results/data_rdm-ms-ss2024_cleaned.RData
|
||||
# Output:
|
||||
#
|
||||
# created: 2024-06-04
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
|
||||
|
||||
load("data/results/data_rdm-ms-ss2024_cleaned.RData")
|
||||
|
||||
codebook <- data.frame(var_name = names(dat),
|
||||
var_text = c("Response Id", "Please indicate your sex.",
|
||||
"How old are you? Please enter your age in years.",
|
||||
"Have you ever published data in a repository?",
|
||||
"How many of your data sets have you published so far?",
|
||||
"All my analyses are preregistered",
|
||||
"Sharing my data is very important to me",
|
||||
"I invest more time in research data management than my colleagues",
|
||||
"I think research data management is overrated",
|
||||
"Sharing data is bad scientific practice",
|
||||
"What is your current career level?",
|
||||
"How long have you been working in science (in years)?")
|
||||
|
||||
)
|
||||
|
||||
codebook$type <- sapply(dat, class)
|
||||
codebook$n <- sapply(dat, length)
|
||||
codebook$mean <- sapply(dat, function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
|
||||
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
|
||||
|
||||
write.table(codebook,
|
||||
file = "data/codebook/codebook_01.csv",
|
||||
na = "",
|
||||
sep = ";",
|
||||
quote = FALSE)
|
||||
|
||||
openxlsx::write.xlsx(codebook, file = "data/codebook/codebook_01.xlsx")
|
||||
|
13
03_data_organisation/example/data/codebook/codebook_01.csv
Normal file
13
03_data_organisation/example/data/codebook/codebook_01.csv
Normal file
@ -0,0 +1,13 @@
|
||||
var_name;var_text;type;n;mean;sd
|
||||
1;ResponseId;Response Id;factor;13;;
|
||||
2;age;Please indicate your sex.;integer;13;29.6923076923077;5.99144689515278
|
||||
3;sex;How old are you? Please enter your age in years.;factor;13;;
|
||||
4;data_sharing_1;Have you ever published data in a repository?;factor;13;;
|
||||
5;data_sharing_2;How many of your data sets have you published so far?;numeric;13;2.30769230769231;1.65250392761083
|
||||
6;rdm_stmnt_1;All my analyses are preregistered;numeric;13;4.15384615384615;1.14354374979373
|
||||
7;rdm_stmnt_2;Sharing my data is very important to me;numeric;13;4;
|
||||
8;rdm_stmnt_3;I invest more time in research data management than my colleagues;numeric;13;2.84615384615385;0.800640769025436
|
||||
9;rdm_stmnt_4;I think research data management is overrated;numeric;13;1.15384615384615;0.375533808099405
|
||||
10;rdm_stmnt_5;Sharing data is bad scientific practice;numeric;13;1.15384615384615;0.375533808099405
|
||||
11;career_level_1;What is your current career level?;factor;13;;
|
||||
12;career_level_2;How long have you been working in science (in years)?;numeric;13;6.26923076923077;10.1788493632126
|
|
BIN
03_data_organisation/example/data/codebook/codebook_01.xlsx
Normal file
BIN
03_data_organisation/example/data/codebook/codebook_01.xlsx
Normal file
Binary file not shown.
68
03_data_organisation/example/data/codebook/codebook_02.R
Normal file
68
03_data_organisation/example/data/codebook/codebook_02.R
Normal file
@ -0,0 +1,68 @@
|
||||
# codebook_generation_01.R
|
||||
#
|
||||
# Code generation example
|
||||
#
|
||||
# Input: results/data_rdm-ms-ss2024_cleaned.RData
|
||||
# Output:
|
||||
#
|
||||
# created: 2024-06-04
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
|
||||
|
||||
dat <- as.data.frame(haven::read_spss("data/rawdata/RDM_MS_SS2024_download_2024-06-04.sav"))
|
||||
|
||||
## Fix data_sharing_2
|
||||
dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1
|
||||
|
||||
# Look at attributes
|
||||
attributes(dat$sex)
|
||||
|
||||
# Create codebook with survey questions
|
||||
codebook <- data.frame(variable = names(dat),
|
||||
label = sapply(dat, function(x) attr(x, "label")))
|
||||
|
||||
# Clean up data frame
|
||||
dat <- as.data.frame(lapply(dat, sjlabelled::unlabel))
|
||||
|
||||
sapply(dat, class) # Look at classes
|
||||
|
||||
dat$age <- as.numeric(dat$age)
|
||||
dat$career_level_2 <- as.numeric(dat$career_level_2)
|
||||
dat$data_sharing_2 <- as.numeric(dat$data_sharing_2)
|
||||
|
||||
dat$sex <- factor(dat$sex,
|
||||
levels = 1:4,
|
||||
labels = names(attr(dat$sex, "labels")))
|
||||
dat$data_sharing_1 <- factor(dat$data_sharing_1,
|
||||
levels = 1:2,
|
||||
labels = names(attr(dat$data_sharing_1, "labels")))
|
||||
dat$career_level_1 <- factor(dat$career_level_1,
|
||||
levels = 1:6,
|
||||
labels = names(attr(dat$career_level_1, "labels")))
|
||||
|
||||
# Add descriptive statistics to codebook
|
||||
|
||||
codebook$n <- sapply(dat, length)
|
||||
codebook$type <- sapply(dat, class)
|
||||
codebook$mean <- sapply(dat, function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
|
||||
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
|
||||
|
||||
# props <- function(x) {
|
||||
# if (is.factor(x)) {
|
||||
# proportions(summary(x))
|
||||
# } else {
|
||||
# NA
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# codebook$prop <- lapply(dat, props)
|
||||
|
||||
write.table(codebook,
|
||||
file = "data/codebook/codebook_02.csv",
|
||||
na = "",
|
||||
sep = ";",
|
||||
quote = FALSE)
|
||||
|
||||
openxlsx::write.xlsx(codebook, file = "data/codebook/codebook_02.xlsx")
|
||||
|
||||
|
13
03_data_organisation/example/data/codebook/codebook_02.csv
Normal file
13
03_data_organisation/example/data/codebook/codebook_02.csv
Normal file
@ -0,0 +1,13 @@
|
||||
variable;label;n;type;mean;sd
|
||||
ResponseId;ResponseId;Response ID;13;character;;
|
||||
age;age;How old are you? Please enter your age in years.;13;numeric;29.6923076923077;5.99144689515278
|
||||
sex;sex;Please indicate your sex.;13;factor;;
|
||||
data_sharing_1;data_sharing_1;Have you ever published data in a repository?;13;factor;;
|
||||
data_sharing_2;data_sharing_2;How many of your data sets have you published so far?;13;numeric;1.38461538461538;1.85015591858549
|
||||
rdm_stmnt_1;rdm_stmnt_1;Please indicate how much you agree with the following statements - All my analyses are preregistered;13;numeric;4.15384615384615;1.14354374979373
|
||||
rdm_stmnt_2;rdm_stmnt_2;Please indicate how much you agree with the following statements - Sharing my data is very important to me;13;numeric;4;
|
||||
rdm_stmnt_3;rdm_stmnt_3;Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues;13;numeric;2.84615384615385;0.800640769025436
|
||||
rdm_stmnt_4;rdm_stmnt_4;Please indicate how much you agree with the following statements - I think research data management is overrated;13;numeric;1.84615384615385;0.987096233585649
|
||||
rdm_stmnt_5;rdm_stmnt_5;Please indicate how much you agree with the following statements - Sharing data is bad scientific practice;13;numeric;1.15384615384615;0.375533808099405
|
||||
career_level_1;career_level_1;What is your current career level?;13;factor;;
|
||||
career_level_2;career_level_2;How long have you been working in science (in years)?;13;numeric;6.26923076923077;10.1788493632126
|
|
BIN
03_data_organisation/example/data/codebook/codebook_02.xlsx
Normal file
BIN
03_data_organisation/example/data/codebook/codebook_02.xlsx
Normal file
Binary file not shown.
10
03_data_organisation/example/data/codebook/codebook_03.R
Normal file
10
03_data_organisation/example/data/codebook/codebook_03.R
Normal file
@ -0,0 +1,10 @@
|
||||
#' ---
|
||||
#' title: Codebook for Data Set "RDM MS SS 2024"
|
||||
#' author: Nora Wickelmaier
|
||||
#' ---
|
||||
|
||||
#+ echo = FALSE
|
||||
dat <- haven::read_spss("../rawdata/RDM_MS_SS2024_download_2024-06-04.sav")
|
||||
|
||||
codebook::codebook(dat)
|
||||
|
@ -0,0 +1,94 @@
|
||||
###############################################################################
|
||||
This file contains an overview of the variables from a toy data set collected
|
||||
at the methods seminar SS 2024. The raw data contain in
|
||||
"RDM_MS_SS2024_download_2024-06-03_v1.csv" contain additional variables
|
||||
created by Qualtrics. The variables have been preprocessed and are stored in
|
||||
"data_rdm-ms-ss2024_cleaned.csv".
|
||||
###############################################################################
|
||||
|
||||
|
||||
ResponseId. <Qualtrics ID of subject>
|
||||
-------------------------------------------------------------------------------
|
||||
random sequence of numbers, letters, and underscore
|
||||
|
||||
|
||||
sex. Please indicate your sex.
|
||||
-------------------------------------------------------------------------------
|
||||
-1. m
|
||||
-2. f
|
||||
-3. d
|
||||
-4. not indicated
|
||||
|
||||
|
||||
age. How old are you? Please enter your age in years.
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
||||
|
||||
|
||||
data_sharing_1. Have you ever published data in a repository?
|
||||
-------------------------------------------------------------------------------
|
||||
-1. No
|
||||
-2. Yes
|
||||
|
||||
|
||||
data_sharing_2. How many of your data sets have you published so far?
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
||||
|
||||
|
||||
rdm_stmnt. Please indicate how much you agree with the following statements:
|
||||
|
||||
rdm_stmnt_1. All my analyses are preregistered
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_2. Sharing my data is very important to me
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_3. I invest more time in research data management than my colleagues
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_4. I think research data management is overrated
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_5. Sharing data is bad scientific practice
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
|
||||
career_level_1. What is your current career level?
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Student
|
||||
-2. PhD student
|
||||
-3. Postdoc
|
||||
-4. Senior researcher
|
||||
-5 Professor
|
||||
-6. Other
|
||||
|
||||
|
||||
career_level_2. How long have you been working in science (in years)?
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
@ -0,0 +1,16 @@
|
||||
ResponseId,age,sex,data_sharing_1,data_sharing_2,rdm_stmnt_1,rdm_stmnt_2,rdm_stmnt_3,rdm_stmnt_4,rdm_stmnt_5,career_level_1,career_level_2
|
||||
Response ID,How old are you? Please enter your age in years.,Please indicate your sex.,Have you ever published data in a repository?,How many of your data sets have you published so far?,Please indicate how much you agree with the following statements - All my analyses are preregistered,Please indicate how much you agree with the following statements - Sharing my data is very important to me,Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues,Please indicate how much you agree with the following statements - I think research data management is overrated,Please indicate how much you agree with the following statements - Sharing data is bad scientific practice,What is your current career level?,How long have you been working in science (in years)?
|
||||
"{""ImportId"":""_recordId""}","{""ImportId"":""QID3_TEXT""}","{""ImportId"":""QID1""}","{""ImportId"":""QID4""}","{""ImportId"":""QID7_TEXT""}","{""ImportId"":""QID2_1""}","{""ImportId"":""QID2_2""}","{""ImportId"":""QID2_3""}","{""ImportId"":""QID2_4""}","{""ImportId"":""QID2_5""}","{""ImportId"":""QID8""}","{""ImportId"":""QID9_TEXT""}"
|
||||
R_8q7OpSkcuPT7SbI,42,f,No,1,Neither agree nor disagree,Agree,Strongly agree,Strongly disagree,Strongly disagree,Other,14
|
||||
R_8Io4pbk0A1a37VL,28,f,Yes,1,Strongly agree,,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,1
|
||||
R_2J9B4aLaasQ1m81,28,f,Yes,1 out of 4,Strongly agree,Strongly agree,Disagree,Disagree,Strongly disagree,PhD student,3
|
||||
R_80kqWr3W48SgiUZ,43,f,Yes,6,Agree,Agree,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,3
|
||||
R_8QpI8T0rjTjaPPr,30,f,Yes,4,Strongly agree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,5
|
||||
R_8QoVv6THz1Qjtuz,28,f,Yes,1,Disagree,Disagree,Disagree,Agree,Strongly disagree,Professor,38
|
||||
R_2F9fXxf3NedHqZl,25,d,No,0,Agree,Strongly agree,Disagree,Neither agree nor disagree,Disagree,PhD student,2
|
||||
R_2foYj4iSgaBTkEO,24,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
|
||||
R_83T6Oak5vI6GNJ7,30,f,Yes,1,Strongly agree,Agree,Neither agree nor disagree,Neither agree nor disagree,Strongly disagree,Postdoc,7
|
||||
R_2Vz26rWsOLYwqnD,25,m,Yes,3,Agree,Agree,Neither agree nor disagree,Disagree,Disagree,PhD student,2
|
||||
R_8HcBgUUm1BXFfhv,29,m,No,0,Strongly agree,Disagree,Disagree,Strongly disagree,Strongly disagree,PhD student,3
|
||||
R_2P1TMDNlwm0gSIk,26,f,No,0,Disagree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1.5
|
||||
R_225ffqhb7qRaIGO,28,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
|
|
Binary file not shown.
@ -0,0 +1,66 @@
|
||||
* Encoding: UTF-8.
|
||||
TITLE "RDM_MS_SS2024".
|
||||
SUBTITLE "".
|
||||
VARIABLE LABELS
|
||||
ResponseId "Response ID"
|
||||
age "How old are you? Please enter your age in years."
|
||||
sex "Please indicate your sex."
|
||||
data_sharing_1 "Have you ever published data in a repository?"
|
||||
data_sharing_2 "How many of your data sets have you published so far?"
|
||||
rdm_stmnt_1 "Please indicate how much you agree with the following statements - All my analyses are preregistered"
|
||||
rdm_stmnt_2 "Please indicate how much you agree with the following statements - Sharing my data is very important to me"
|
||||
rdm_stmnt_3 "Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues"
|
||||
rdm_stmnt_4 "Please indicate how much you agree with the following statements - I think research data management is overrated"
|
||||
rdm_stmnt_5 "Please indicate how much you agree with the following statements - Sharing data is bad scientific practice"
|
||||
career_level_1 "What is your current career level?"
|
||||
career_level_2 "How long have you been working in science (in years)?"
|
||||
.
|
||||
VALUE LABELS
|
||||
/sex
|
||||
1 "m"
|
||||
2 "f"
|
||||
3 "d"
|
||||
4 "not indicated"
|
||||
/data_sharing_1
|
||||
1 "No"
|
||||
2 "Yes"
|
||||
/rdm_stmnt_1
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_2
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_3
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_4
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_5
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/career_level_1
|
||||
1 "Student"
|
||||
2 "PhD student"
|
||||
3 "Postdoc"
|
||||
4 "Senior researcher"
|
||||
5 "Professor"
|
||||
6 "Other"
|
||||
.
|
||||
CACHE.
|
||||
EXECUTE.
|
@ -0,0 +1,17 @@
|
||||
ResponseId,age,sex,data_sharing_1,data_sharing_2,rdm_stmnt_1,rdm_stmnt_2,rdm_stmnt_3,rdm_stmnt_4,rdm_stmnt_5,career_level_1,career_level_2
|
||||
Response ID,How old are you? Please enter your age in years.,Please indicate your sex.,Have you ever published data in a repository?,How many of your data sets have you published so far?,Please indicate how much you agree with the following statements - All my analyses are preregistered,Please indicate how much you agree with the following statements - Sharing my data is very important to me,Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues,Please indicate how much you agree with the following statements - I think research data management is overrated,Please indicate how much you agree with the following statements - Sharing data is bad scientific practice,What is your current career level?,How long have you been working in science (in years)?
|
||||
"{""ImportId"":""_recordId""}","{""ImportId"":""QID3_TEXT""}","{""ImportId"":""QID1""}","{""ImportId"":""QID4""}","{""ImportId"":""QID7_TEXT""}","{""ImportId"":""QID2_1""}","{""ImportId"":""QID2_2""}","{""ImportId"":""QID2_3""}","{""ImportId"":""QID2_4""}","{""ImportId"":""QID2_5""}","{""ImportId"":""QID8""}","{""ImportId"":""QID9_TEXT""}"
|
||||
R_8q7OpSkcuPT7SbI,42,f,No,1,Neither agree nor disagree,Agree,Strongly agree,Strongly disagree,Strongly disagree,Other,14
|
||||
R_8Io4pbk0A1a37VL,28,f,Yes,1,Strongly agree,,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,1
|
||||
R_2J9B4aLaasQ1m81,28,f,Yes,1 out of 4,Strongly agree,Strongly agree,Disagree,Disagree,Strongly disagree,PhD student,3
|
||||
R_80kqWr3W48SgiUZ,43,f,Yes,6,Agree,Agree,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,3
|
||||
R_8QpI8T0rjTjaPPr,30,f,Yes,4,Strongly agree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,5
|
||||
R_8QoVv6THz1Qjtuz,28,f,Yes,1,Disagree,Disagree,Disagree,Agree,Strongly disagree,Professor,38
|
||||
R_2F9fXxf3NedHqZl,25,d,No,0,Agree,Strongly agree,Disagree,Neither agree nor disagree,Disagree,PhD student,2
|
||||
R_2foYj4iSgaBTkEO,24,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
|
||||
R_83T6Oak5vI6GNJ7,30,f,Yes,1,Strongly agree,Agree,Neither agree nor disagree,Neither agree nor disagree,Strongly disagree,Postdoc,7
|
||||
R_2Vz26rWsOLYwqnD,25,m,Yes,3,Agree,Agree,Neither agree nor disagree,Disagree,Disagree,PhD student,2
|
||||
R_8HcBgUUm1BXFfhv,29,m,No,0,Strongly agree,Disagree,Disagree,Strongly disagree,Strongly disagree,PhD student,3
|
||||
R_2P1TMDNlwm0gSIk,26,f,No,0,Disagree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1.5
|
||||
R_225ffqhb7qRaIGO,28,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
|
||||
R_2pXfOSq8DBImG6R,32,f,No,0,Neither agree nor disagree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,2
|
|
Binary file not shown.
@ -0,0 +1,66 @@
|
||||
* Encoding: UTF-8.
|
||||
TITLE "RDM_MS_SS2024".
|
||||
SUBTITLE "".
|
||||
VARIABLE LABELS
|
||||
ResponseId "Response ID"
|
||||
age "How old are you? Please enter your age in years."
|
||||
sex "Please indicate your sex."
|
||||
data_sharing_1 "Have you ever published data in a repository?"
|
||||
data_sharing_2 "How many of your data sets have you published so far?"
|
||||
rdm_stmnt_1 "Please indicate how much you agree with the following statements - All my analyses are preregistered"
|
||||
rdm_stmnt_2 "Please indicate how much you agree with the following statements - Sharing my data is very important to me"
|
||||
rdm_stmnt_3 "Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues"
|
||||
rdm_stmnt_4 "Please indicate how much you agree with the following statements - I think research data management is overrated"
|
||||
rdm_stmnt_5 "Please indicate how much you agree with the following statements - Sharing data is bad scientific practice"
|
||||
career_level_1 "What is your current career level?"
|
||||
career_level_2 "How long have you been working in science (in years)?"
|
||||
.
|
||||
VALUE LABELS
|
||||
/sex
|
||||
1 "m"
|
||||
2 "f"
|
||||
3 "d"
|
||||
4 "not indicated"
|
||||
/data_sharing_1
|
||||
1 "No"
|
||||
2 "Yes"
|
||||
/rdm_stmnt_1
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_2
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_3
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_4
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_5
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/career_level_1
|
||||
1 "Student"
|
||||
2 "PhD student"
|
||||
3 "Postdoc"
|
||||
4 "Senior researcher"
|
||||
5 "Professor"
|
||||
6 "Other"
|
||||
.
|
||||
CACHE.
|
||||
EXECUTE.
|
Binary file not shown.
@ -0,0 +1,15 @@
|
||||
ResponseId;age;sex;data_sharing_1;data_sharing_2;rdm_stmnt_1;rdm_stmnt_2;rdm_stmnt_3;rdm_stmnt_4;rdm_stmnt_5;career_level_1;career_level_2
|
||||
R_8q7OpSkcuPT7SbI;42;f;No;2;3;4;5;1;1;Other;14
|
||||
R_8Io4pbk0A1a37VL;28;f;Yes;2;5;NA;3;1;1;PhD student;1
|
||||
R_2J9B4aLaasQ1m81;28;f;Yes;2;5;5;2;1;1;PhD student;3
|
||||
R_80kqWr3W48SgiUZ;43;f;Yes;6;4;4;3;1;1;PhD student;3
|
||||
R_8QpI8T0rjTjaPPr;30;f;Yes;5;5;4;3;1;1;PhD student;5
|
||||
R_8QoVv6THz1Qjtuz;28;f;Yes;2;2;2;2;1;1;Professor;38
|
||||
R_2F9fXxf3NedHqZl;25;d;No;1;4;5;2;2;2;PhD student;2
|
||||
R_2foYj4iSgaBTkEO;24;f;No;1;5;5;3;1;1;PhD student;1
|
||||
R_83T6Oak5vI6GNJ7;30;f;Yes;2;5;4;3;1;1;Postdoc;7
|
||||
R_2Vz26rWsOLYwqnD;25;m;Yes;4;4;4;3;2;2;PhD student;2
|
||||
R_8HcBgUUm1BXFfhv;29;m;No;1;5;2;2;1;1;PhD student;3
|
||||
R_2P1TMDNlwm0gSIk;26;f;No;1;2;4;3;1;1;PhD student;1.5
|
||||
R_225ffqhb7qRaIGO;28;f;No;1;5;5;3;1;1;PhD student;1
|
||||
R_2pXfOSq8DBImG6R;32;f;No;1;3;4;3;1;1;PhD student;2
|
|
BIN
figures/QR Code for Methodenseminar SS 2024 - Session 3.png
Normal file
BIN
figures/QR Code for Methodenseminar SS 2024 - Session 3.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.5 KiB |
BIN
figures/codebook_1.png
Normal file
BIN
figures/codebook_1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 47 KiB |
BIN
figures/codebook_2.png
Normal file
BIN
figures/codebook_2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 35 KiB |
BIN
figures/email_data_request_2024_03.png
Normal file
BIN
figures/email_data_request_2024_03.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 63 KiB |
BIN
figures/email_data_request_2024_04.png
Normal file
BIN
figures/email_data_request_2024_04.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 24 KiB |
Loading…
Reference in New Issue
Block a user