Compare commits
7 Commits
a1e720aa95
..
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 2a2df1d6f4 | |||
| 7a2327aba3 | |||
| 66c9711d45 | |||
| 5cdb5fdb8f | |||
| e723230ca5 | |||
| f1f7f35988 | |||
| 102834032c |
@@ -0,0 +1,6 @@
|
||||
"I know more than 3 RStudio short cuts" "6 votes" 38%
|
||||
"I have never updated my R packages" "2 votes" 13%
|
||||
"I know what the ISO 8601 date format is" "1 vote" 6%
|
||||
"I regularly delete duplicate files" "12 votes" 75%
|
||||
"I usually have a clean e-mail inbox" "7 votes" 44%
|
||||
"I use file naming conventions" "6 votes" 38%
|
||||
@@ -0,0 +1,17 @@
|
||||
overcome first barrier of starting action by knowing what to do next
|
||||
A heuristic on how to do work. It standardises the involved steps, so to make sure nothing is left out or not completely done, it also increases speed, and the level of sureness all is done correctly
|
||||
Situationally adaptable steps one can follow to always know what comes next in a awork goal
|
||||
standardizing steps
|
||||
Process that optimises and connects all single parts/work steps
|
||||
Predefined steps to take (e.g. during preparing an experiments, collectin, analysing and publishing the data) - need it to make my life easier (not start over every time) and to make the process understandable and reproducible for others (other labs or someone taking over my project)
|
||||
needed that others can follow your steps without getting confused
|
||||
We can easily trace back and replicate the process
|
||||
Successive processing of subtasks
|
||||
version tracking with the possibility to update and share the progress
|
||||
Setting priorities
|
||||
Fixed sequence of work steps to ensure the replicability of the results
|
||||
So I can replicate "the result" the next time I need it
|
||||
a clear, reproducible process of work, to keep things clear
|
||||
ideally an automated process that we follow
|
||||
routines
|
||||
time management
|
||||
@@ -0,0 +1,844 @@
|
||||
\documentclass[aspectratio=169]{beamer}
|
||||
|
||||
\usepackage{listings}
|
||||
\usepackage[utf8,latin1]{inputenc}
|
||||
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
|
||||
\addbibresource{../literature/lit.bib}
|
||||
|
||||
\usepackage{fancyvrb}
|
||||
\usepackage{fontawesome5} % get icons
|
||||
\usepackage{multirow}
|
||||
\usepackage{color, colortbl}
|
||||
|
||||
\usepackage{tikz}
|
||||
\usetikzlibrary{fit}
|
||||
\usepackage[edges]{forest}
|
||||
|
||||
\lstset{language=R,%
|
||||
backgroundcolor=\color{iwmgray!15!white},
|
||||
basicstyle=\ttfamily\color{iwmgray},
|
||||
frame=none,
|
||||
commentstyle=\slshape\color{iwmgreen},
|
||||
keywordstyle=\bfseries\color{iwmgray},
|
||||
identifierstyle=\color{iwmpurple},
|
||||
stringstyle=\color{iwmblue},
|
||||
numbers=none,%left,numberstyle=\tiny,
|
||||
basewidth={.5em, .4em},
|
||||
showstringspaces=false,
|
||||
emphstyle=\color{red!50!white}}
|
||||
|
||||
\makeatletter \def\newblock{\beamer@newblock} \makeatother
|
||||
|
||||
\beamertemplatenavigationsymbolsempty
|
||||
\setbeamertemplate{itemize items}[circle]
|
||||
\setbeamertemplate{section in toc}[circle]
|
||||
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
|
||||
\setbeamercolor{block body}{bg=iwmorange!50!white}
|
||||
\setbeamercolor{block title}{fg=white, bg=iwmorange}
|
||||
% Definitions for biblatex
|
||||
\setbeamercolor{bibliography entry note}{fg=iwmgray}
|
||||
\setbeamercolor{bibliography entry author}{fg=iwmgray}
|
||||
\setbeamertemplate{bibliography item}{}
|
||||
|
||||
\definecolor{iwmorange}{RGB}{255,105,0}
|
||||
\definecolor{iwmgray}{RGB}{67,79,79}
|
||||
\definecolor{iwmblue}{RGB}{60,180,220}
|
||||
\definecolor{iwmgreen}{RGB}{145,200,110}
|
||||
\definecolor{iwmpurple}{RGB}{120,0,75}
|
||||
|
||||
\setbeamercolor{title}{fg=iwmorange}
|
||||
\setbeamercolor{frametitle}{fg=iwmorange}
|
||||
\setbeamercolor{structure}{fg=iwmorange}
|
||||
\setbeamercolor{normal text}{fg=iwmgray}
|
||||
\setbeamercolor{author}{fg=iwmgray}
|
||||
\setbeamercolor{date}{fg=iwmgray}
|
||||
|
||||
\newcommand{\vect}[1]{\mathbf{#1}}
|
||||
\newcommand{\mat}[1]{\mathbf{#1}}
|
||||
\newcommand{\gvect}[1]{\boldsymbol{#1}}
|
||||
\newcommand{\gmat}[1]{\boldsymbol{#1}}
|
||||
|
||||
\AtBeginSection[]{
|
||||
\frame{
|
||||
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
|
||||
|
||||
\setbeamertemplate{headline}{
|
||||
\begin{beamercolorbox}{section in head}
|
||||
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
|
||||
\end{beamercolorbox}
|
||||
}
|
||||
|
||||
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
|
||||
|
||||
\title{Data organisation for effective research data management}
|
||||
\author{Nora Wickelmaier}
|
||||
\date{June 10, 2024}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{frame}{}
|
||||
\thispagestyle{empty}
|
||||
\titlepage
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Data request}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .55]{../figures/email_data_request_2024_01}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Data folder for the data requested}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .6]{../figures/email_data_request_2024_03}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{What is bad about this data organisation?}
|
||||
% slido
|
||||
\centering
|
||||
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 3}
|
||||
|
||||
\url{https://app.sli.do/event/3S1Bn3Tjknuk5J5WiqAYzG}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[<+->]{Bad things about this data organisation}
|
||||
\begin{itemize}
|
||||
\item Raw and processed data are in the same folder
|
||||
\item File naming does not sort in a sensible way: Best order would be first
|
||||
by subject, then by session
|
||||
\item Data and data scripts are in the same folder
|
||||
\item Data scripts are not numbered, unclear in what order they need to be
|
||||
executed
|
||||
\item There are plot files (PDFs) between the data and code files
|
||||
\item It is unclear which are the final and processed data files
|
||||
\item The final data files are not stored in an interoperable format: There
|
||||
is only an \texttt{.RData} file that (probably) contains the final data
|
||||
which was used for further analyses
|
||||
\item There is no documentation whatsoever
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Topics for this semester}
|
||||
\centering
|
||||
\begin{tabular}{ll}
|
||||
\hline
|
||||
Date & Topic \\
|
||||
\hline
|
||||
2024-05-13 & Introduction to data management \\
|
||||
2024-05-27 & Workflow \\
|
||||
\only<1>{2024-06-10}\only<2>{\bf 2024-06-10} & \only<1>{Data organisation}\only<2>{\bf Data organisation}\\
|
||||
2024-06-24 & Data sharing \\
|
||||
2024-07-08 & Clean coding \\
|
||||
2024-07-22 & Version control \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{frame}
|
||||
|
||||
% * different data sources
|
||||
% * content README file
|
||||
% * best arrangement of data
|
||||
% * redundancy
|
||||
% * anonymizing/pseudonymizing data
|
||||
|
||||
\section{Folder organisation}
|
||||
|
||||
\begin{frame}[<+->]{Some general rules}
|
||||
\begin{itemize}
|
||||
\item One project, one folder
|
||||
\item Add README file at top level
|
||||
\item Raw data are in a separate folder (and stay separate!)
|
||||
\item Have a code folder
|
||||
\item It is often a good idea to separate your data analysis from papers,
|
||||
talks, etc. (especially if you want to publish your data)
|
||||
\item Have designated folders where stuff is written to (e.\,g.,
|
||||
\texttt{results}, \texttt{figures}, \texttt{processed}, etc.)
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{One possible example!}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{Analysis folder}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
|
||||
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (n2.east) -- (o1.west);
|
||||
\path (n2.east) -- (o2.west);
|
||||
\path (n2.east) -- (o3.west);
|
||||
\path (n2.east) -- (o4.west);
|
||||
\path (n2.east) -- (o5.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{Analysis folder}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
|
||||
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (n2.east) -- (o1.west);
|
||||
\path (n2.east) -- (o2.west);
|
||||
\path (n2.east) -- (o3.west);
|
||||
\path (n2.east) -- (o4.west);
|
||||
\path (n2.east) -- (o5.west);
|
||||
% third level
|
||||
\node[text width = 5cm] (p1) at (12, 0) {\faIcon[regular]{file} \verb+01_preprocessing.R+};
|
||||
\node[text width = 5cm] (p2) at (12, -0.7) {\faIcon[regular]{file} \verb+02_descriptives.R+};
|
||||
\node[text width = 5cm] (p3) at (12, -1.4) {\faIcon[regular]{file} \verb+03_modeling.R+};
|
||||
\node[text width = 5cm] (p4) at (12, -2.1) {\faIcon[regular]{file} \verb+04_plots.R+};
|
||||
\path (o1.center) -- (p1.west);
|
||||
\path (o1.center) -- (p2.west);
|
||||
\path (o1.center) -- (p3.west);
|
||||
\path (o1.center) -- (p4.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{Analysis folder}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
|
||||
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (n2.east) -- (o1.west);
|
||||
\path (n2.east) -- (o2.west);
|
||||
\path (n2.east) -- (o3.west);
|
||||
\path (n2.east) -- (o4.west);
|
||||
\path (n2.east) -- (o5.west);
|
||||
% third level
|
||||
\node[text width = 5cm] (p1) at (12, 0) {\faIcon[regular]{file} \verb+subj1_ses01.txt+};
|
||||
\node[text width = 5cm] (p2) at (12, -0.7) {\faIcon[regular]{file} \verb+subj1_ses02.txt+};
|
||||
\node[text width = 5cm] (p3) at (12, -1.4) {\faIcon[regular]{file} \verb+subj2_ses01.txt+};
|
||||
\node[text width = 5cm] (p4) at (12, -2.1) {\faIcon[regular]{file} \verb+subj2_ses02.txt+};
|
||||
\node[text width = 5cm] (p5) at (12, -2.8) {\faIcon[regular]{file} \dots};
|
||||
\path (o2.center) -- (p1.west);
|
||||
\path (o2.center) -- (p2.west);
|
||||
\path (o2.center) -- (p3.west);
|
||||
\path (o2.center) -- (p4.west);
|
||||
\path (o2.center) -- (p5.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{Analysis folder}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node[text width = 3cm] (n2) at (3.5, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node (n4) at (4, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n3) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
|
||||
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\node (o5) at (8.5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (n2.east) -- (o1.west);
|
||||
\path (n2.east) -- (o2.west);
|
||||
\path (n2.east) -- (o3.west);
|
||||
\path (n2.east) -- (o4.west);
|
||||
\path (n2.east) -- (o5.west);
|
||||
% third level
|
||||
\node[text width = 5cm] (p1) at (12, -0.7) {\faIcon[regular]{file}
|
||||
\verb+data_all-subj.csv+};
|
||||
\node[text width = 5cm] (p2) at (12, -1.4) {\faIcon[regular]{file}
|
||||
\verb+data_all-subj.RData+};
|
||||
\node[text width = 5cm] (p3) at (12, -2.1) {\faIcon[regular]{file}
|
||||
\verb+eval_model1.csv+};
|
||||
\node[text width = 5cm] (p4) at (12, -2.8) {\faIcon[regular]{file}
|
||||
\verb+eval_model2.csv+};
|
||||
\path (o4.center) -- (p1.west);
|
||||
\path (o4.center) -- (p2.west);
|
||||
\path (o4.center) -- (p3.west);
|
||||
\path (o4.center) -- (p4.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\pause
|
||||
The analysis folder you might want to share on OSF, Github, etc.
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Folder organisation}
|
||||
{Dissemination folder}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+project+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+analysis+};
|
||||
\node[text width = 3.2cm] (n3) at (3.6, -1.4) {\faIcon{folder} \verb+dissemination+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node (o1) at (8.5, 0) {\faIcon{folder} \verb+paper+};
|
||||
\node (o2) at (8.5, -0.7) {\faIcon{folder} \verb+talks+};
|
||||
\node (o3) at (8.5, -1.4) {\faIcon{folder} \verb+figures+};
|
||||
\node (o4) at (8.5, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\node (o5) at (8.5, -2.8) {\faIcon{folder} \verb+tables+};
|
||||
\path (n3.east) -- (o1.west);
|
||||
\path (n3.east) -- (o2.west);
|
||||
\path (n3.east) -- (o3.west);
|
||||
\path (n3.east) -- (o4.west);
|
||||
\path (n3.east) -- (o5.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\pause
|
||||
Having separate folders for figures and tables helps you keep track of them
|
||||
for your paper and talks
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Figures and tables}
|
||||
\begin{itemize}
|
||||
\item Most of us (including me!) are not at a stage where we are
|
||||
writing our papers or talks as reproducible documents
|
||||
\pause
|
||||
\item It is still a good idea to create tables and figures in R and keep the
|
||||
code easily accessible
|
||||
\pause
|
||||
\item One suggestion
|
||||
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4.2cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
% figures
|
||||
\node (fig) at (0, 0) {\faIcon{folder} \verb+figures+};
|
||||
\node (n1) at (4, 0) {\faIcon[regular]{file} \verb+h1_barplot.R+};
|
||||
\node (n2) at (4, -0.7) {\faIcon[regular]{file} \verb+h1_barplot.png+};
|
||||
\path (fig.center) -- (n1.west);
|
||||
\path (fig.center) -- (n2.west);
|
||||
% tables
|
||||
\node (tab) at (0, -1.5) {\faIcon{folder} \verb+tables+};
|
||||
\node (o1) at (4, -1.5) {\faIcon[regular]{file} \verb+h1_mean-table.Rmd+};
|
||||
\node (o2) at (4, -2.2) {\faIcon[regular]{file} \verb+h1_mean-table.docx+};
|
||||
\path (tab.center) -- (o1.west);
|
||||
\path (tab.center) -- (o2.west);
|
||||
\end{tikzpicture}
|
||||
\pause
|
||||
\item I export the data for figures and tables from \texttt{analysis/code}
|
||||
to \texttt{dissemination/results} so the dissemination folder is
|
||||
self-contained
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Several data sources}
|
||||
\begin{itemize}
|
||||
\item When you have several different data sources like questionnaires and
|
||||
eye-tracking data keep them in separate folders
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node (data) at (0, 0) {\faIcon{folder} \verb+data+};
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+eyetracking+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+qualtrics+};
|
||||
\path (data.center) -- (n1.west);
|
||||
\path (data.center) -- (n2.west);
|
||||
\end{tikzpicture}
|
||||
\pause
|
||||
\item Process them separately, e.\,g., with
|
||||
\verb+01a_preprocessing_eyetracking.R+ and
|
||||
\verb+01b_preprocessing_surveys.R+ and then \verb+02_combine-data.R+
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 5cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node (results) at (0, 0) {\faIcon{folder} \verb+results+};
|
||||
\node (n1) at (4, 0) {\faIcon[regular]{file} \verb+data_eyetracking.csv+};
|
||||
\node (n2) at (4, -0.7) {\faIcon[regular]{file} \verb+data_surveys.csv+};
|
||||
\node (n3) at (4, -1.4) {\faIcon[regular]{file} \verb+data_complete.csv+};
|
||||
\path (results.center) -- (n1.west);
|
||||
\path (results.center) -- (n2.west);
|
||||
\path (results.center) -- (n3.west);
|
||||
\end{tikzpicture}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Toy example with 11 questions}
|
||||
Thank you everybody for filling out our little toy survey in Qualtrics!
|
||||
\vfill
|
||||
\tiny
|
||||
\begin{tabular}{lllll}
|
||||
\hline
|
||||
ResponseId & age & sex & data\_sharing\_1 & data\_sharing\_2 \\
|
||||
\hline
|
||||
R\_225ffqhb7qRaIGO:1 & Min. :24.00 & m : 2 & No :7 & Min. :1.000 \\
|
||||
R\_2F9fXxf3NedHqZl:1 & 1st Qu.:26.50 & f :11 & Yes:7 & 1st Qu.:1.000 \\
|
||||
R\_2foYj4iSgaBTkEO:1 & Median :28.00 & d : 1 & & Median :2.000 \\
|
||||
R\_2J9B4aLaasQ1m81:1 & Mean :29.86 & not indicated: 0 & & Mean :2.214 \\
|
||||
R\_2P1TMDNlwm0gSIk:1 & 3rd Qu.:30.00 & & & 3rd Qu.:2.000 \\
|
||||
R\_2pXfOSq8DBImG6R:1 & Max. :43.00 & & & Max. :6.000 \\
|
||||
(Other) :8 & & & & \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
|
||||
\vspace{.5cm}
|
||||
\begin{tabular}{lllllll}
|
||||
\hline
|
||||
rdm\_stmnt\_1 & rdm\_stmnt\_2 & rdm\_stmnt\_3 & rdm\_stmnt\_4 & rdm\_stmnt\_5 & career\_level\_1 & career\_level\_2 \\
|
||||
\hline
|
||||
Min. :2.000 & Min. :2 & Min. :2.000 & Min. :1.000 & Min. :1.000 & Student : 0 & Min. : 1.000 \\
|
||||
1st Qu.:3.250 & 1st Qu.:4 & 1st Qu.:2.250 & 1st Qu.:1.000 & 1st Qu.:1.000 & PhD student :11 & 1st Qu.: 1.625 \\
|
||||
Median :4.500 & Median :4 & Median :3.000 & Median :1.000 & Median :1.000 & Postdoc : 1 & Median : 2.500 \\
|
||||
Mean :4.071 & Mean :4 & Mean :2.857 & Mean :1.143 & Mean :1.143 & Senior researcher: 0 & Mean : 5.964 \\
|
||||
3rd Qu.:5.000 & 3rd Qu.:5 & 3rd Qu.:3.000 & 3rd Qu.:1.000 & 3rd Qu.:1.000 & Professor : 1 & 3rd Qu.: 4.500 \\
|
||||
Max. :5.000 & Max. :5 & Max. :5.000 & Max. :2.000 & Max. :2.000 & Other : 1 & Max. :38.000 \\
|
||||
& NA's :1 & & & & & \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{frame}
|
||||
|
||||
% print(xtable::xtable(summary(dat[, 1:5])), include.rownames = FALSE)
|
||||
% print(xtable::xtable(summary(dat[, 6:12])), include.rownames = FALSE)
|
||||
|
||||
\begin{frame}[fragile]{Folder structure for toy example}
|
||||
{One possible structure!}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 4.3cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node (ex) at (0, 0) {\faIcon{folder} \verb+example+};
|
||||
\node (n1) at (3, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (n2) at (3, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (n3) at (3, -1.4) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (ex.center) -- (n1.west);
|
||||
\path (ex.center) -- (n2.west);
|
||||
\path (ex.center) -- (n3.west);
|
||||
|
||||
\node (o1) at (7, 0.7) {\faIcon[regular]{file} \verb+01_preprocessing.R+};
|
||||
\node (o2) at (7, -0.7) {\faIcon{folder} \verb+codebook+};
|
||||
\node (o3) at (7, -1.4) {\faIcon{folder} \verb+rawdata+};
|
||||
\node (o4) at (7, -2.1) {\faIcon{folder} \verb+results+};
|
||||
\path (n1.center) -- (o1.west);
|
||||
\path (n2.center) -- (o2.west);
|
||||
\path (n2.center) -- (o3.west);
|
||||
\path (n2.center) -- (o4.west);
|
||||
|
||||
\node (p1) at (11, -0.7) {\faIcon[regular]{file} \verb+codebook_01.R+};
|
||||
\node (p2) at (11, -1.4) {\faIcon[regular]{file} \verb+codebook_01.xlsx+};
|
||||
\node (p3) at (11, -2.1) {\dots};
|
||||
|
||||
\path (o2.center) -- (p1.west);
|
||||
\path (o2.center) -- (p2.west);
|
||||
\path (o2.center) -- (p3.west);
|
||||
\end{tikzpicture}
|
||||
|
||||
\end{frame}
|
||||
|
||||
\section{Metadata}
|
||||
|
||||
\begin{frame}{Metadata answers questions}
|
||||
\begin{itemize}
|
||||
\item {\bf Who} created the data?
|
||||
\item {\bf Why} was the data created?
|
||||
\item {\bf When} was the data created?
|
||||
\item {\bf Where} is the data?
|
||||
\item {\bf How} was the data created?
|
||||
\item {\bf What} is the content of the data?
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\hfill{\tiny \citet{Wilbrandt2023}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata}
|
||||
\begin{block}{Metadata}
|
||||
\dots is data about data.\\
|
||||
\dots can be \emph{descriptive}, \emph{structural}, or \emph{administrative}.
|
||||
\end{block}
|
||||
\vfill
|
||||
\begin{columns}
|
||||
\begin{column}[t]{.5\textwidth}
|
||||
Contains information on origin and background of data like
|
||||
\begin{itemize}
|
||||
\item Who, when, why, how, \dots
|
||||
\item Used resources
|
||||
\item Used abbreviations, units, names
|
||||
\item Licenses
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\begin{column}[t]{.5\textwidth}
|
||||
Data can be anything like
|
||||
\begin{itemize}
|
||||
\item Book content
|
||||
\item Pictures or audio files
|
||||
\item Website content or a blog post
|
||||
\item Journal paper
|
||||
\item Research data
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata examples}
|
||||
{Photo}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .31]{../figures/metadata_photo}
|
||||
\end{center}
|
||||
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata examples}
|
||||
{Book}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .36]{../figures/metadata_book}
|
||||
\end{center}
|
||||
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata examples}
|
||||
{Webpage}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .27]{../figures/metadata_webpage}
|
||||
\end{center}
|
||||
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata examples}
|
||||
{WORD document}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .23]{../figures/metadata_word_document}
|
||||
\end{center}
|
||||
\hfill{\tiny \url{https://dataedo.com/kb/data-glossary/what-is-metadata}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Metadata for research data}
|
||||
\begin{tikzpicture}
|
||||
\node[font=\Large] (n1) at (0,0) {\bf \color{iwmorange} Study};
|
||||
|
||||
\node[font=\large] (i1) at (0,-1) {$\bullet$ Persons};
|
||||
\node[font=\large] (i2) at (.36,-1.5) {$\bullet$ Background};
|
||||
\node[font=\large] (i3) at (.03,-2) {$\bullet$ Funding};
|
||||
\node[font=\large] (i4) at (-.38,-2.5) {$\bullet$ \dots};
|
||||
\node[draw=iwmorange, thick, fit={(n1) (i1) (i2) (i3) (i4)}, inner sep=10pt] (box) {};
|
||||
|
||||
\node[font=\Large] (n2) at (5,0) {\bf \color{iwmorange} Data set};
|
||||
|
||||
\node[font=\large] (j1) at (4.3,-1) {$\bullet$ Files};
|
||||
\node[font=\large] (j2) at (4.57,-1.5) {$\bullet$ Sources};
|
||||
\node[font=\large] (j3) at (4.65,-2) {$\bullet$ Methods};
|
||||
\node[font=\large] (j4) at (4.18,-2.5) {$\bullet$ \dots};
|
||||
\node[draw=iwmorange, thick, fit={(n2) (j1) (j2) (j3) (j4)}, inner sep=10pt] (box) {};
|
||||
|
||||
\node[font=\Large] (n3) at (10,0) {\bf \color{iwmorange} Variables};
|
||||
|
||||
\node[font=\large] (k1) at (9.7,-1) {$\bullet$ Data type};
|
||||
\node[font=\large] (k2) at (9.69,-1.5) {$\bullet$ Scale unit};
|
||||
\node[font=\large] (k3) at (9.85,-2) {$\bullet$ Value range};
|
||||
\node[font=\large] (k4) at (9.12,-2.5) {$\bullet$ \dots};
|
||||
\node[draw=iwmorange, thick, fit={(n3) (k1) (k2) (k3) (k4)}, inner sep=10pt] (box) {};
|
||||
|
||||
\draw[-latex, thick] (n1) -- (n2);
|
||||
\draw[-latex, thick] (n2) -- (n3);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\hfill\tiny \url{https://datamanagement.hms.harvard.edu/collect/readme-files}
|
||||
\end{frame}
|
||||
|
||||
\section{README files}
|
||||
|
||||
\begin{frame}{README files}
|
||||
\begin{itemize}
|
||||
\item Can be used to give information about all levels in a research
|
||||
project: study/project, data set, variables; either in one README or in
|
||||
several ones
|
||||
\item Should provide a clear and concise description of all relevant details
|
||||
about data collection, processing, and analysis
|
||||
\item README files are created for different purposes:
|
||||
\begin{itemize}
|
||||
\item to document changes to files or file names within a folder
|
||||
\item to explain file naming conventions, practices, etc.\ ``in
|
||||
general'' for future reference
|
||||
\item to specifically accompany files/data being deposited in a
|
||||
repository
|
||||
\end{itemize}
|
||||
\item Creating a README file at the beginning of your research process,
|
||||
and updating it consistently throughout your research, will help you
|
||||
to compile a final README file when your data is ready for deposit
|
||||
\item Find a template here:
|
||||
\url{https://cornell.app.box.com/v/ReadmeTemplate}
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\hfill\tiny \url{https://datamanagement.hms.harvard.edu/collect/readme-files}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Study/project}{README on top level}
|
||||
\begin{itemize}
|
||||
\item Project name and purpose
|
||||
\item Funding information (process number!)
|
||||
\item Ethics approved? LEK number!
|
||||
\item Person(s) responsible for study conduction
|
||||
\item One or several studies? Infos about them
|
||||
\item Time/Duration of project
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Data set}{README accompanying data set(s)}
|
||||
\begin{itemize}
|
||||
\item One or more data sets?
|
||||
\item Time of data collection
|
||||
\item Person(s) responsible for data collection
|
||||
\item File organisation
|
||||
\item Naming conventions
|
||||
\item Preprocessing methods
|
||||
\item Anything that is special about the data set(s)
|
||||
\item Number of subjects
|
||||
\item Variables
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Variables}{README accompanying a specific data set}
|
||||
\begin{itemize}
|
||||
\item You can use a README (or text file called \texttt{codebook.txt} or
|
||||
similar) to document your variables
|
||||
\item Especially, if you only have a few variables, this is an easy and fast
|
||||
way to document them
|
||||
\item If you are working with extensive surveys or questionnaires, it might
|
||||
be a good time investment to create a more elaborate codebook
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\section{Codebooks}
|
||||
|
||||
\begin{frame}{What information about variables should a codebook include?}
|
||||
% slido
|
||||
\centering
|
||||
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 3}
|
||||
|
||||
\url{https://app.sli.do/event/3S1Bn3Tjknuk5J5WiqAYzG}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{A codebook should include}
|
||||
\begin{tabular}{lp{11cm}}
|
||||
\hline
|
||||
Variable name & Usually some abbreviation like \texttt{pna01} \\
|
||||
Variable label & Brief description to identify variable \\
|
||||
Question text & If applicable, exact wording from survey question \\
|
||||
Values & Values variable can take (e.\,g, 1 to 5) \\
|
||||
Value labels & If applicable, textual descriptions of the values \\
|
||||
Statistics & For example, range, mean, standard deviation for
|
||||
numeric variables; frequencies and percentages for categorical variables \\
|
||||
Missing data & If applicable, values and labels of missing data \\
|
||||
Notes & Additional notes, remarks, or comments; for measures or
|
||||
questions from copyrighted instruments, the notes field can be used to
|
||||
cite the source \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\vfill
|
||||
|
||||
\hfill\tiny \url{https://www.icpsr.umich.edu/web/ICPSR/cms/1983}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Codebooks}
|
||||
\begin{itemize}
|
||||
\item There are many different ways to create a codebook
|
||||
\item It can be a README, some other plain text file, a table (stored as CSV
|
||||
or XLSX), a WORD document, or PDF
|
||||
\item For a short questionnaire, it can be sufficient to export it as a PDF
|
||||
\item Let's walk through a couple of options\dots
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Option 1 -- Toy example with 11 questions}
|
||||
{Simple PDF}
|
||||
\begin{columns}
|
||||
\begin{column}{.5\textwidth}
|
||||
\begin{center}
|
||||
\vspace{-.4cm}
|
||||
Export from Qualtrics\\
|
||||
\includegraphics[scale = .3]{../figures/codebook_1.png}
|
||||
\end{center}
|
||||
\end{column}
|
||||
\begin{column}{.6\textwidth}
|
||||
\begin{itemize}
|
||||
\item For a simple questionnaire like this, the exported WORD document
|
||||
from Qualtrics exported to PDF might be sufficient as a codebook
|
||||
\item For longer questionnaires, the WORD document can still be a good
|
||||
starting point to create a more elaborate codebook
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Option 2 -- Toy example with 11 questions}
|
||||
{Plain text file}
|
||||
\begin{center}
|
||||
\vspace{-.3cm}
|
||||
\footnotesize
|
||||
\begin{lstlisting}[language = bash, identifierstyle=\color{iwmgray}]
|
||||
sex. Please indicate your sex.
|
||||
-------------------------------------------------------------------------------
|
||||
-1. m
|
||||
-2. f
|
||||
-3. d
|
||||
-4. not indicated
|
||||
|
||||
age. How old are you? Please enter your age in years.
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
||||
|
||||
data_sharing_1. Have you ever published data in a repository?
|
||||
-------------------------------------------------------------------------------
|
||||
-1. No
|
||||
-2. Yes
|
||||
\end{lstlisting}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Option 3 -- Toy example with 11 questions}
|
||||
{Creating a simple codebook in R ``by hand''}
|
||||
\footnotesize
|
||||
\begin{lstlisting}
|
||||
load("results/data_rdm-ms-ss2024_cleaned.RData")
|
||||
codebook <- data.frame(var_name = names(dat),
|
||||
var_text = c("Response Id", "Please indicate your sex.",
|
||||
"How old are you? Please enter your age in years.",
|
||||
...
|
||||
"Sharing data is bad scientific practice",
|
||||
"What is your current career level?",
|
||||
"How long have you been working in science (in years)?"))
|
||||
|
||||
codebook$type <- sapply(dat, class)
|
||||
codebook$n <- sapply(dat, length)
|
||||
codebook$mean <- sapply(dat,
|
||||
function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
|
||||
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
|
||||
|
||||
openxlsx::write.xlsx(codebook, file = "codebook/codebook_01.xlsx")
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Option 3 -- Toy example with 11 questions}
|
||||
{Creating a simple codebook in R ``by hand''}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .6]{../figures/codebook_2.png}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Option 4 -- Toy example with 11 questions}
|
||||
{Using the codebook package in R}
|
||||
\begin{itemize}
|
||||
\item When you export a qualtrics questionnaire as SPSS file and import it
|
||||
into R using the haven package, you can use RMarkdown to create an
|
||||
elaborate HTML codebook
|
||||
\item It works best for classical questionnaire items
|
||||
\item In our example, the survey is not formatted well enough for the
|
||||
generated codebook to be completely correct
|
||||
\end{itemize}
|
||||
\footnotesize
|
||||
\begin{lstlisting}
|
||||
#' ---
|
||||
#' title: Codebook for Data Set "RDM MS SS 2024"
|
||||
#' author: Nora Wickelmaier
|
||||
#' ---
|
||||
|
||||
#+ echo = FALSE
|
||||
dat <- haven::read_spss("../rawdata/RDM_MS_SS2024_download_2024-06-04.sav")
|
||||
codebook::codebook(dat)
|
||||
\end{lstlisting}
|
||||
\end{frame}
|
||||
|
||||
\appendix
|
||||
%%\begin{frame}[allowframebreaks]{References}
|
||||
\begin{frame}{References}
|
||||
%\renewcommand{\bibfont}{\small}
|
||||
\printbibliography
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
# Toy data set for the methods seminar on data management SS2024
|
||||
|
||||
## Responsible person
|
||||
|
||||
Nora Wickelmaier
|
||||
|
||||
Referentin Forschungsmethoden und Forschungsdatenmanagement
|
||||
|
||||
Leibniz-Institut für Wissensmedien (IWM)
|
||||
|
||||
n.wickelmaier@iwm-tuebingen.de
|
||||
|
||||
## Folder structure and naming conventions
|
||||
|
||||
```
|
||||
/example/
|
||||
|
|
||||
|- /code/
|
||||
|- /data/
|
||||
|- /codebook/
|
||||
|- /rawdata/
|
||||
|- /results/
|
||||
```
|
||||
|
||||
The `code` folder contains analysis scripts written in R. The scripts are
|
||||
numbered, indicating the order they should be executed in.
|
||||
|
||||
The `data` folder contains all folders associated with data and its
|
||||
documentation.
|
||||
|
||||
The `codebook` folder contains different codebook options and R scripts that
|
||||
create these codebooks. If the codebook is created by an R script, the script
|
||||
and the codebook are named identically, e.g., `codebook_01.R` and
|
||||
`codebook_01.xslx`.
|
||||
|
||||
The `rawdata` folder contains the downloads from Qualtrics. In Qualtrics, the
|
||||
variables have been selected and ordered and then downloaded, without the
|
||||
additional columns Qualtrics adds by default. The naming convention for the
|
||||
downloaded files is
|
||||
```
|
||||
RDM_MS_SS2024_download_<YYYY-MM-DD>.<fileending>
|
||||
```
|
||||
No other files than the downloads from qualtrics should go into this folder!
|
||||
|
||||
The `results` folder contains processed data. The scripts in `/code/` process
|
||||
the data from `/rawdata/` and save the files containing the processed data to
|
||||
`/results/`. Data can be exported as CSV files or RData files. If different file
|
||||
formats contain the same data, they should be named identically, e.g.,
|
||||
`data_rdm-ms-ss2024_cleaned.csv` and `data_rdm-ms-ss2024_cleaned.RData`.
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
# 01_preprocessing.R
|
||||
#
|
||||
# Cleaning up data for toy data set Methods Seminar SS2024
|
||||
#
|
||||
# Input: RDM_MS_SS2024_download_2024-06-07.csv
|
||||
# Output: results/data_rdm-ms-ss2024_cleaned.csv
|
||||
# results/data_rdm-ms-ss2024_cleaned.RData
|
||||
#
|
||||
# created: 2024-06-03
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
|
||||
|
||||
dat <- read.table("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv",
|
||||
sep = ",", skip = 3, stringsAsFactors = TRUE, na.string = "")
|
||||
|
||||
names(dat) <-
|
||||
readLines("data/rawdata/RDM_MS_SS2024_download_2024-06-07.csv", 1) |>
|
||||
strsplit(split = ",") |>
|
||||
unlist()
|
||||
|
||||
# Clean up variables
|
||||
dat$ResponseId <- factor(dat$ResponseId)
|
||||
|
||||
dat$sex <- factor(dat$sex,
|
||||
levels = c("m", "f", "d", "not indicated"))
|
||||
|
||||
dat$data_sharing_1 <- factor(dat$data_sharing_1,
|
||||
levels = c("No", "Yes"))
|
||||
|
||||
dat$career_level_1 <- factor(dat$career_level_1,
|
||||
levels = c("Student", "PhD student", "Postdoc",
|
||||
"Senior researcher", "Professor",
|
||||
"Other"))
|
||||
|
||||
dat$rdm_stmnt_1 <- factor(dat$rdm_stmnt_1,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_2 <- factor(dat$rdm_stmnt_2,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_3 <- factor(dat$rdm_stmnt_3,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_4 <- factor(dat$rdm_stmnt_4,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
dat$rdm_stmnt_5 <- factor(dat$rdm_stmnt_5,
|
||||
levels = c("Strongly disagree", "Disagree",
|
||||
"Neither agree nor disagree", "Agree",
|
||||
"Strongly agree"))
|
||||
|
||||
## Fix data_sharing_2
|
||||
dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1
|
||||
dat$data_sharing_2 <- as.numeric(dat$data_sharing_2)
|
||||
|
||||
# Create numeric statement variables
|
||||
|
||||
dat$rdm_stmnt_1 <- as.numeric(dat$rdm_stmnt_1)
|
||||
dat$rdm_stmnt_2 <- as.numeric(dat$rdm_stmnt_2)
|
||||
dat$rdm_stmnt_3 <- as.numeric(dat$rdm_stmnt_3)
|
||||
dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_5)
|
||||
dat$rdm_stmnt_4 <- as.numeric(dat$rdm_stmnt_4)
|
||||
dat$rdm_stmnt_5 <- as.numeric(dat$rdm_stmnt_5)
|
||||
|
||||
# Save cleaned data set
|
||||
write.table(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.csv", sep = ";",
|
||||
row.names = FALSE, quote = FALSE)
|
||||
|
||||
save(dat, file = "data/results/data_rdm-ms-ss2024_cleaned.RData")
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
# codebook_generation_01.R
|
||||
#
|
||||
# Code generation example
|
||||
#
|
||||
# Input: results/data_rdm-ms-ss2024_cleaned.RData
|
||||
# Output:
|
||||
#
|
||||
# created: 2024-06-04
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
|
||||
|
||||
load("data/results/data_rdm-ms-ss2024_cleaned.RData")
|
||||
|
||||
codebook <- data.frame(var_name = names(dat),
|
||||
var_text = c("Response Id", "Please indicate your sex.",
|
||||
"How old are you? Please enter your age in years.",
|
||||
"Have you ever published data in a repository?",
|
||||
"How many of your data sets have you published so far?",
|
||||
"All my analyses are preregistered",
|
||||
"Sharing my data is very important to me",
|
||||
"I invest more time in research data management than my colleagues",
|
||||
"I think research data management is overrated",
|
||||
"Sharing data is bad scientific practice",
|
||||
"What is your current career level?",
|
||||
"How long have you been working in science (in years)?")
|
||||
|
||||
)
|
||||
|
||||
codebook$type <- sapply(dat, class)
|
||||
codebook$n <- sapply(dat, length)
|
||||
codebook$mean <- sapply(dat, function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
|
||||
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
|
||||
|
||||
write.table(codebook,
|
||||
file = "data/codebook/codebook_01.csv",
|
||||
na = "",
|
||||
sep = ";",
|
||||
quote = FALSE)
|
||||
|
||||
openxlsx::write.xlsx(codebook, file = "data/codebook/codebook_01.xlsx")
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
var_name;var_text;type;n;mean;sd
|
||||
1;ResponseId;Response Id;factor;13;;
|
||||
2;age;Please indicate your sex.;integer;13;29.6923076923077;5.99144689515278
|
||||
3;sex;How old are you? Please enter your age in years.;factor;13;;
|
||||
4;data_sharing_1;Have you ever published data in a repository?;factor;13;;
|
||||
5;data_sharing_2;How many of your data sets have you published so far?;numeric;13;2.30769230769231;1.65250392761083
|
||||
6;rdm_stmnt_1;All my analyses are preregistered;numeric;13;4.15384615384615;1.14354374979373
|
||||
7;rdm_stmnt_2;Sharing my data is very important to me;numeric;13;4;
|
||||
8;rdm_stmnt_3;I invest more time in research data management than my colleagues;numeric;13;2.84615384615385;0.800640769025436
|
||||
9;rdm_stmnt_4;I think research data management is overrated;numeric;13;1.15384615384615;0.375533808099405
|
||||
10;rdm_stmnt_5;Sharing data is bad scientific practice;numeric;13;1.15384615384615;0.375533808099405
|
||||
11;career_level_1;What is your current career level?;factor;13;;
|
||||
12;career_level_2;How long have you been working in science (in years)?;numeric;13;6.26923076923077;10.1788493632126
|
||||
|
@@ -0,0 +1,68 @@
|
||||
# codebook_generation_01.R
|
||||
#
|
||||
# Code generation example
|
||||
#
|
||||
# Input: results/data_rdm-ms-ss2024_cleaned.RData
|
||||
# Output:
|
||||
#
|
||||
# created: 2024-06-04
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/teaching/iwm/data_management/03_data_organisation/example/")
|
||||
|
||||
dat <- as.data.frame(haven::read_spss("data/rawdata/RDM_MS_SS2024_download_2024-06-04.sav"))
|
||||
|
||||
## Fix data_sharing_2
|
||||
dat$data_sharing_2[dat$data_sharing_2 == "1 out of 4"] <- 1
|
||||
|
||||
# Look at attributes
|
||||
attributes(dat$sex)
|
||||
|
||||
# Create codebook with survey questions
|
||||
codebook <- data.frame(variable = names(dat),
|
||||
label = sapply(dat, function(x) attr(x, "label")))
|
||||
|
||||
# Clean up data frame
|
||||
dat <- as.data.frame(lapply(dat, sjlabelled::unlabel))
|
||||
|
||||
sapply(dat, class) # Look at classes
|
||||
|
||||
dat$age <- as.numeric(dat$age)
|
||||
dat$career_level_2 <- as.numeric(dat$career_level_2)
|
||||
dat$data_sharing_2 <- as.numeric(dat$data_sharing_2)
|
||||
|
||||
dat$sex <- factor(dat$sex,
|
||||
levels = 1:4,
|
||||
labels = names(attr(dat$sex, "labels")))
|
||||
dat$data_sharing_1 <- factor(dat$data_sharing_1,
|
||||
levels = 1:2,
|
||||
labels = names(attr(dat$data_sharing_1, "labels")))
|
||||
dat$career_level_1 <- factor(dat$career_level_1,
|
||||
levels = 1:6,
|
||||
labels = names(attr(dat$career_level_1, "labels")))
|
||||
|
||||
# Add descriptive statistics to codebook
|
||||
|
||||
codebook$n <- sapply(dat, length)
|
||||
codebook$type <- sapply(dat, class)
|
||||
codebook$mean <- sapply(dat, function(x) ifelse(is.numeric(x), mean(x, na.rm = TRUE), NA))
|
||||
codebook$sd <- sapply(dat, function(x) ifelse(is.numeric(x), sd(x), NA))
|
||||
|
||||
# props <- function(x) {
|
||||
# if (is.factor(x)) {
|
||||
# proportions(summary(x))
|
||||
# } else {
|
||||
# NA
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# codebook$prop <- lapply(dat, props)
|
||||
|
||||
write.table(codebook,
|
||||
file = "data/codebook/codebook_02.csv",
|
||||
na = "",
|
||||
sep = ";",
|
||||
quote = FALSE)
|
||||
|
||||
openxlsx::write.xlsx(codebook, file = "data/codebook/codebook_02.xlsx")
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
variable;label;n;type;mean;sd
|
||||
ResponseId;ResponseId;Response ID;13;character;;
|
||||
age;age;How old are you? Please enter your age in years.;13;numeric;29.6923076923077;5.99144689515278
|
||||
sex;sex;Please indicate your sex.;13;factor;;
|
||||
data_sharing_1;data_sharing_1;Have you ever published data in a repository?;13;factor;;
|
||||
data_sharing_2;data_sharing_2;How many of your data sets have you published so far?;13;numeric;1.38461538461538;1.85015591858549
|
||||
rdm_stmnt_1;rdm_stmnt_1;Please indicate how much you agree with the following statements - All my analyses are preregistered;13;numeric;4.15384615384615;1.14354374979373
|
||||
rdm_stmnt_2;rdm_stmnt_2;Please indicate how much you agree with the following statements - Sharing my data is very important to me;13;numeric;4;
|
||||
rdm_stmnt_3;rdm_stmnt_3;Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues;13;numeric;2.84615384615385;0.800640769025436
|
||||
rdm_stmnt_4;rdm_stmnt_4;Please indicate how much you agree with the following statements - I think research data management is overrated;13;numeric;1.84615384615385;0.987096233585649
|
||||
rdm_stmnt_5;rdm_stmnt_5;Please indicate how much you agree with the following statements - Sharing data is bad scientific practice;13;numeric;1.15384615384615;0.375533808099405
|
||||
career_level_1;career_level_1;What is your current career level?;13;factor;;
|
||||
career_level_2;career_level_2;How long have you been working in science (in years)?;13;numeric;6.26923076923077;10.1788493632126
|
||||
|
@@ -0,0 +1,10 @@
|
||||
#' ---
|
||||
#' title: Codebook for Data Set "RDM MS SS 2024"
|
||||
#' author: Nora Wickelmaier
|
||||
#' ---
|
||||
|
||||
#+ echo = FALSE
|
||||
dat <- haven::read_spss("../rawdata/RDM_MS_SS2024_download_2024-06-04.sav")
|
||||
|
||||
codebook::codebook(dat)
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
###############################################################################
|
||||
This file contains an overview of the variables from a toy data set collected
|
||||
at the methods seminar SS 2024. The raw data contain in
|
||||
"RDM_MS_SS2024_download_2024-06-03_v1.csv" contain additional variables
|
||||
created by Qualtrics. The variables have been preprocessed and are stored in
|
||||
"data_rdm-ms-ss2024_cleaned.csv".
|
||||
###############################################################################
|
||||
|
||||
|
||||
ResponseId. <Qualtrics ID of subject>
|
||||
-------------------------------------------------------------------------------
|
||||
random sequence of numbers, letters, and underscore
|
||||
|
||||
|
||||
sex. Please indicate your sex.
|
||||
-------------------------------------------------------------------------------
|
||||
-1. m
|
||||
-2. f
|
||||
-3. d
|
||||
-4. not indicated
|
||||
|
||||
|
||||
age. How old are you? Please enter your age in years.
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
||||
|
||||
|
||||
data_sharing_1. Have you ever published data in a repository?
|
||||
-------------------------------------------------------------------------------
|
||||
-1. No
|
||||
-2. Yes
|
||||
|
||||
|
||||
data_sharing_2. How many of your data sets have you published so far?
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
||||
|
||||
|
||||
rdm_stmnt. Please indicate how much you agree with the following statements:
|
||||
|
||||
rdm_stmnt_1. All my analyses are preregistered
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_2. Sharing my data is very important to me
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_3. I invest more time in research data management than my colleagues
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_4. I think research data management is overrated
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
rdm_stmnt_5. Sharing data is bad scientific practice
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Strongly disagree
|
||||
-2. Disagree
|
||||
-3. Neither agree nor disagree
|
||||
-4. Agree
|
||||
-5. Strongly agree
|
||||
|
||||
|
||||
career_level_1. What is your current career level?
|
||||
-------------------------------------------------------------------------------
|
||||
-1. Student
|
||||
-2. PhD student
|
||||
-3. Postdoc
|
||||
-4. Senior researcher
|
||||
-5 Professor
|
||||
-6. Other
|
||||
|
||||
|
||||
career_level_2. How long have you been working in science (in years)?
|
||||
-------------------------------------------------------------------------------
|
||||
numerical input
|
||||
@@ -0,0 +1,16 @@
|
||||
ResponseId,age,sex,data_sharing_1,data_sharing_2,rdm_stmnt_1,rdm_stmnt_2,rdm_stmnt_3,rdm_stmnt_4,rdm_stmnt_5,career_level_1,career_level_2
|
||||
Response ID,How old are you? Please enter your age in years.,Please indicate your sex.,Have you ever published data in a repository?,How many of your data sets have you published so far?,Please indicate how much you agree with the following statements - All my analyses are preregistered,Please indicate how much you agree with the following statements - Sharing my data is very important to me,Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues,Please indicate how much you agree with the following statements - I think research data management is overrated,Please indicate how much you agree with the following statements - Sharing data is bad scientific practice,What is your current career level?,How long have you been working in science (in years)?
|
||||
"{""ImportId"":""_recordId""}","{""ImportId"":""QID3_TEXT""}","{""ImportId"":""QID1""}","{""ImportId"":""QID4""}","{""ImportId"":""QID7_TEXT""}","{""ImportId"":""QID2_1""}","{""ImportId"":""QID2_2""}","{""ImportId"":""QID2_3""}","{""ImportId"":""QID2_4""}","{""ImportId"":""QID2_5""}","{""ImportId"":""QID8""}","{""ImportId"":""QID9_TEXT""}"
|
||||
R_8q7OpSkcuPT7SbI,42,f,No,1,Neither agree nor disagree,Agree,Strongly agree,Strongly disagree,Strongly disagree,Other,14
|
||||
R_8Io4pbk0A1a37VL,28,f,Yes,1,Strongly agree,,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,1
|
||||
R_2J9B4aLaasQ1m81,28,f,Yes,1 out of 4,Strongly agree,Strongly agree,Disagree,Disagree,Strongly disagree,PhD student,3
|
||||
R_80kqWr3W48SgiUZ,43,f,Yes,6,Agree,Agree,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,3
|
||||
R_8QpI8T0rjTjaPPr,30,f,Yes,4,Strongly agree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,5
|
||||
R_8QoVv6THz1Qjtuz,28,f,Yes,1,Disagree,Disagree,Disagree,Agree,Strongly disagree,Professor,38
|
||||
R_2F9fXxf3NedHqZl,25,d,No,0,Agree,Strongly agree,Disagree,Neither agree nor disagree,Disagree,PhD student,2
|
||||
R_2foYj4iSgaBTkEO,24,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
|
||||
R_83T6Oak5vI6GNJ7,30,f,Yes,1,Strongly agree,Agree,Neither agree nor disagree,Neither agree nor disagree,Strongly disagree,Postdoc,7
|
||||
R_2Vz26rWsOLYwqnD,25,m,Yes,3,Agree,Agree,Neither agree nor disagree,Disagree,Disagree,PhD student,2
|
||||
R_8HcBgUUm1BXFfhv,29,m,No,0,Strongly agree,Disagree,Disagree,Strongly disagree,Strongly disagree,PhD student,3
|
||||
R_2P1TMDNlwm0gSIk,26,f,No,0,Disagree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1.5
|
||||
R_225ffqhb7qRaIGO,28,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
|
||||
|
@@ -0,0 +1,66 @@
|
||||
* Encoding: UTF-8.
|
||||
TITLE "RDM_MS_SS2024".
|
||||
SUBTITLE "".
|
||||
VARIABLE LABELS
|
||||
ResponseId "Response ID"
|
||||
age "How old are you? Please enter your age in years."
|
||||
sex "Please indicate your sex."
|
||||
data_sharing_1 "Have you ever published data in a repository?"
|
||||
data_sharing_2 "How many of your data sets have you published so far?"
|
||||
rdm_stmnt_1 "Please indicate how much you agree with the following statements - All my analyses are preregistered"
|
||||
rdm_stmnt_2 "Please indicate how much you agree with the following statements - Sharing my data is very important to me"
|
||||
rdm_stmnt_3 "Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues"
|
||||
rdm_stmnt_4 "Please indicate how much you agree with the following statements - I think research data management is overrated"
|
||||
rdm_stmnt_5 "Please indicate how much you agree with the following statements - Sharing data is bad scientific practice"
|
||||
career_level_1 "What is your current career level?"
|
||||
career_level_2 "How long have you been working in science (in years)?"
|
||||
.
|
||||
VALUE LABELS
|
||||
/sex
|
||||
1 "m"
|
||||
2 "f"
|
||||
3 "d"
|
||||
4 "not indicated"
|
||||
/data_sharing_1
|
||||
1 "No"
|
||||
2 "Yes"
|
||||
/rdm_stmnt_1
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_2
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_3
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_4
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_5
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/career_level_1
|
||||
1 "Student"
|
||||
2 "PhD student"
|
||||
3 "Postdoc"
|
||||
4 "Senior researcher"
|
||||
5 "Professor"
|
||||
6 "Other"
|
||||
.
|
||||
CACHE.
|
||||
EXECUTE.
|
||||
@@ -0,0 +1,17 @@
|
||||
ResponseId,age,sex,data_sharing_1,data_sharing_2,rdm_stmnt_1,rdm_stmnt_2,rdm_stmnt_3,rdm_stmnt_4,rdm_stmnt_5,career_level_1,career_level_2
|
||||
Response ID,How old are you? Please enter your age in years.,Please indicate your sex.,Have you ever published data in a repository?,How many of your data sets have you published so far?,Please indicate how much you agree with the following statements - All my analyses are preregistered,Please indicate how much you agree with the following statements - Sharing my data is very important to me,Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues,Please indicate how much you agree with the following statements - I think research data management is overrated,Please indicate how much you agree with the following statements - Sharing data is bad scientific practice,What is your current career level?,How long have you been working in science (in years)?
|
||||
"{""ImportId"":""_recordId""}","{""ImportId"":""QID3_TEXT""}","{""ImportId"":""QID1""}","{""ImportId"":""QID4""}","{""ImportId"":""QID7_TEXT""}","{""ImportId"":""QID2_1""}","{""ImportId"":""QID2_2""}","{""ImportId"":""QID2_3""}","{""ImportId"":""QID2_4""}","{""ImportId"":""QID2_5""}","{""ImportId"":""QID8""}","{""ImportId"":""QID9_TEXT""}"
|
||||
R_8q7OpSkcuPT7SbI,42,f,No,1,Neither agree nor disagree,Agree,Strongly agree,Strongly disagree,Strongly disagree,Other,14
|
||||
R_8Io4pbk0A1a37VL,28,f,Yes,1,Strongly agree,,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,1
|
||||
R_2J9B4aLaasQ1m81,28,f,Yes,1 out of 4,Strongly agree,Strongly agree,Disagree,Disagree,Strongly disagree,PhD student,3
|
||||
R_80kqWr3W48SgiUZ,43,f,Yes,6,Agree,Agree,Neither agree nor disagree,Disagree,Strongly disagree,PhD student,3
|
||||
R_8QpI8T0rjTjaPPr,30,f,Yes,4,Strongly agree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,5
|
||||
R_8QoVv6THz1Qjtuz,28,f,Yes,1,Disagree,Disagree,Disagree,Agree,Strongly disagree,Professor,38
|
||||
R_2F9fXxf3NedHqZl,25,d,No,0,Agree,Strongly agree,Disagree,Neither agree nor disagree,Disagree,PhD student,2
|
||||
R_2foYj4iSgaBTkEO,24,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
|
||||
R_83T6Oak5vI6GNJ7,30,f,Yes,1,Strongly agree,Agree,Neither agree nor disagree,Neither agree nor disagree,Strongly disagree,Postdoc,7
|
||||
R_2Vz26rWsOLYwqnD,25,m,Yes,3,Agree,Agree,Neither agree nor disagree,Disagree,Disagree,PhD student,2
|
||||
R_8HcBgUUm1BXFfhv,29,m,No,0,Strongly agree,Disagree,Disagree,Strongly disagree,Strongly disagree,PhD student,3
|
||||
R_2P1TMDNlwm0gSIk,26,f,No,0,Disagree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1.5
|
||||
R_225ffqhb7qRaIGO,28,f,No,0,Strongly agree,Strongly agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,1
|
||||
R_2pXfOSq8DBImG6R,32,f,No,0,Neither agree nor disagree,Agree,Neither agree nor disagree,Strongly disagree,Strongly disagree,PhD student,2
|
||||
|
@@ -0,0 +1,66 @@
|
||||
* Encoding: UTF-8.
|
||||
TITLE "RDM_MS_SS2024".
|
||||
SUBTITLE "".
|
||||
VARIABLE LABELS
|
||||
ResponseId "Response ID"
|
||||
age "How old are you? Please enter your age in years."
|
||||
sex "Please indicate your sex."
|
||||
data_sharing_1 "Have you ever published data in a repository?"
|
||||
data_sharing_2 "How many of your data sets have you published so far?"
|
||||
rdm_stmnt_1 "Please indicate how much you agree with the following statements - All my analyses are preregistered"
|
||||
rdm_stmnt_2 "Please indicate how much you agree with the following statements - Sharing my data is very important to me"
|
||||
rdm_stmnt_3 "Please indicate how much you agree with the following statements - I invest more time in research data management than my colleagues"
|
||||
rdm_stmnt_4 "Please indicate how much you agree with the following statements - I think research data management is overrated"
|
||||
rdm_stmnt_5 "Please indicate how much you agree with the following statements - Sharing data is bad scientific practice"
|
||||
career_level_1 "What is your current career level?"
|
||||
career_level_2 "How long have you been working in science (in years)?"
|
||||
.
|
||||
VALUE LABELS
|
||||
/sex
|
||||
1 "m"
|
||||
2 "f"
|
||||
3 "d"
|
||||
4 "not indicated"
|
||||
/data_sharing_1
|
||||
1 "No"
|
||||
2 "Yes"
|
||||
/rdm_stmnt_1
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_2
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_3
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_4
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/rdm_stmnt_5
|
||||
1 "Strongly disagree"
|
||||
2 "Disagree"
|
||||
3 "Neither agree nor disagree"
|
||||
4 "Agree"
|
||||
5 "Strongly agree"
|
||||
/career_level_1
|
||||
1 "Student"
|
||||
2 "PhD student"
|
||||
3 "Postdoc"
|
||||
4 "Senior researcher"
|
||||
5 "Professor"
|
||||
6 "Other"
|
||||
.
|
||||
CACHE.
|
||||
EXECUTE.
|
||||
@@ -0,0 +1,15 @@
|
||||
ResponseId;age;sex;data_sharing_1;data_sharing_2;rdm_stmnt_1;rdm_stmnt_2;rdm_stmnt_3;rdm_stmnt_4;rdm_stmnt_5;career_level_1;career_level_2
|
||||
R_8q7OpSkcuPT7SbI;42;f;No;2;3;4;5;1;1;Other;14
|
||||
R_8Io4pbk0A1a37VL;28;f;Yes;2;5;NA;3;1;1;PhD student;1
|
||||
R_2J9B4aLaasQ1m81;28;f;Yes;2;5;5;2;1;1;PhD student;3
|
||||
R_80kqWr3W48SgiUZ;43;f;Yes;6;4;4;3;1;1;PhD student;3
|
||||
R_8QpI8T0rjTjaPPr;30;f;Yes;5;5;4;3;1;1;PhD student;5
|
||||
R_8QoVv6THz1Qjtuz;28;f;Yes;2;2;2;2;1;1;Professor;38
|
||||
R_2F9fXxf3NedHqZl;25;d;No;1;4;5;2;2;2;PhD student;2
|
||||
R_2foYj4iSgaBTkEO;24;f;No;1;5;5;3;1;1;PhD student;1
|
||||
R_83T6Oak5vI6GNJ7;30;f;Yes;2;5;4;3;1;1;Postdoc;7
|
||||
R_2Vz26rWsOLYwqnD;25;m;Yes;4;4;4;3;2;2;PhD student;2
|
||||
R_8HcBgUUm1BXFfhv;29;m;No;1;5;2;2;1;1;PhD student;3
|
||||
R_2P1TMDNlwm0gSIk;26;f;No;1;2;4;3;1;1;PhD student;1.5
|
||||
R_225ffqhb7qRaIGO;28;f;No;1;5;5;3;1;1;PhD student;1
|
||||
R_2pXfOSq8DBImG6R;32;f;No;1;3;4;3;1;1;PhD student;2
|
||||
|
@@ -0,0 +1,515 @@
|
||||
\documentclass[aspectratio=169]{beamer}
|
||||
|
||||
\usepackage{listings}
|
||||
%\usepackage[utf8]{inputenc}
|
||||
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
|
||||
\addbibresource{../literature/lit.bib}
|
||||
|
||||
\usepackage{fancyvrb}
|
||||
\usepackage{fontawesome5} % get icons
|
||||
\usepackage{multirow}
|
||||
\usepackage{color, colortbl}
|
||||
|
||||
\usepackage{tikz}
|
||||
\usetikzlibrary{fit}
|
||||
\usepackage[edges]{forest}
|
||||
|
||||
\lstset{language=R,%
|
||||
backgroundcolor=\color{iwmgray!15!white},
|
||||
basicstyle=\ttfamily\color{iwmgray},
|
||||
frame=none,
|
||||
commentstyle=\slshape\color{iwmgreen},
|
||||
keywordstyle=\bfseries\color{iwmgray},
|
||||
identifierstyle=\color{iwmpurple},
|
||||
stringstyle=\color{iwmblue},
|
||||
numbers=none,%left,numberstyle=\tiny,
|
||||
basewidth={.5em, .4em},
|
||||
showstringspaces=false,
|
||||
emphstyle=\color{red!50!white}}
|
||||
|
||||
\makeatletter \def\newblock{\beamer@newblock} \makeatother
|
||||
|
||||
\beamertemplatenavigationsymbolsempty
|
||||
\setbeamertemplate{itemize items}[circle]
|
||||
\setbeamertemplate{section in toc}[circle]
|
||||
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
|
||||
\setbeamercolor{block body}{bg=iwmorange!50!white}
|
||||
\setbeamercolor{block title}{fg=white, bg=iwmorange}
|
||||
% Definitions for biblatex
|
||||
\setbeamercolor{bibliography entry note}{fg=iwmgray}
|
||||
\setbeamercolor{bibliography entry author}{fg=iwmgray}
|
||||
\setbeamertemplate{bibliography item}{}
|
||||
|
||||
\definecolor{iwmorange}{RGB}{255,105,0}
|
||||
\definecolor{iwmgray}{RGB}{67,79,79}
|
||||
\definecolor{iwmblue}{RGB}{60,180,220}
|
||||
\definecolor{iwmgreen}{RGB}{145,200,110}
|
||||
\definecolor{iwmpurple}{RGB}{120,0,75}
|
||||
|
||||
\setbeamercolor{title}{fg=iwmorange}
|
||||
\setbeamercolor{frametitle}{fg=iwmorange}
|
||||
\setbeamercolor{structure}{fg=iwmorange}
|
||||
\setbeamercolor{normal text}{fg=iwmgray}
|
||||
\setbeamercolor{author}{fg=iwmgray}
|
||||
\setbeamercolor{date}{fg=iwmgray}
|
||||
|
||||
\newcommand{\vect}[1]{\mathbf{#1}}
|
||||
\newcommand{\mat}[1]{\mathbf{#1}}
|
||||
\newcommand{\gvect}[1]{\boldsymbol{#1}}
|
||||
\newcommand{\gmat}[1]{\boldsymbol{#1}}
|
||||
|
||||
\AtBeginSection[]{
|
||||
\frame{
|
||||
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
|
||||
|
||||
\setbeamertemplate{headline}{
|
||||
\begin{beamercolorbox}{section in head}
|
||||
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
|
||||
\end{beamercolorbox}
|
||||
}
|
||||
|
||||
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
|
||||
|
||||
\title{Data sharing}
|
||||
\author{Nora Wickelmaier}
|
||||
\date{June 24, 2024}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{frame}{}
|
||||
\thispagestyle{empty}
|
||||
\titlepage
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{What are the benefits of sharing your data?}
|
||||
% slido
|
||||
\centering
|
||||
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 4}
|
||||
|
||||
\url{https://app.sli.do/event/m5FEcBYkqtVAsjkdTsKsmd}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[<+->]{Benefits of sharing data}
|
||||
Sharing data
|
||||
\begin{itemize}
|
||||
\item[\dots] ensures that data are not ultimately lost (save data for posterity)
|
||||
\item[\dots] is consistent with scientific norms of openness and rigor
|
||||
\item[\dots] increases citation scores of papers
|
||||
\item[\dots] encourages more research because it enables secondary analyses
|
||||
\item[\dots] facilitates subsequent reanalyses (correct errors, emphasize
|
||||
robustness of original results)
|
||||
\item[\dots] is demanded by most third party funding agencies
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\hfill\tiny \citet{Wicherts2012}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Agenda}
|
||||
\centering
|
||||
\begin{tabular}{ll}
|
||||
\hline
|
||||
Date & Topic \\
|
||||
\hline
|
||||
2024-05-13 & Introduction to data management \\
|
||||
2024-05-27 & Workflow \\
|
||||
2024-06-10 & Data organisation\\
|
||||
\only<1>{2024-06-24}\only<2>{\bf 2024-06-24} & \only<1>{Data sharing}\only<2>{\bf Data sharing} \\
|
||||
2024-07-08 & Clean coding \\
|
||||
2024-07-22 & Version control \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{frame}
|
||||
|
||||
% uploading under a license (CC-BY....)
|
||||
% loading data on an archive, repository etc...
|
||||
% Doing the archive
|
||||
% mportant things before the open-access data
|
||||
% Where to store data for long-term accessibility (conventions?)
|
||||
% Tools, where I should upload my final data
|
||||
% Upload data before or after publishing a paper? Time mangement
|
||||
|
||||
\section{Data organisation}
|
||||
|
||||
\begin{frame}[<+->]{What we covered so far}
|
||||
\begin{itemize}
|
||||
\item What habits do we need for effective research data management?
|
||||
\item What is a workflow and why do we need one?
|
||||
\item What needs to be considered when naming files of a research project?
|
||||
\item How to organize folders for a research project?
|
||||
\item What metadata should be added to my research project?
|
||||
\item What are good ways to document a data set?
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Examples for documenting data sets}
|
||||
\begin{enumerate}
|
||||
\item A recent paper with published data by \citet{Ngo2023} investigating
|
||||
what cues are considered by Twitter users to identify social bots
|
||||
\item A multi-cohort, longitudinal study by the Hector Research Institute of
|
||||
Education Sciences and Psychology at the university of Tübingen:
|
||||
Transformation of the secondary school system and academic careers
|
||||
\citep[TOSCA,][]{Koeller2004}
|
||||
\item Editorial on why to publish your data with an accompanying data set
|
||||
by \citet{Wicherts2012}
|
||||
\end{enumerate}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{\citet{Ngo2023}}
|
||||
They provide
|
||||
\begin{itemize}
|
||||
\item A data set with 221 observations and 633 variables
|
||||
\item A PDF with all measures and the scenario used for collecting the data
|
||||
\end{itemize}
|
||||
\vspace{.3cm}
|
||||
\begin{block}{Exercise}
|
||||
\begin{itemize}
|
||||
\item Go to \url{https://osf.io/6y3nk/} and download the files
|
||||
\texttt{data.csv} and
|
||||
\texttt{Experimental-Study-Measures and scenario.pdf}
|
||||
\item Read the data into R using \texttt{read.csv()}
|
||||
\item Find out which variables in the data correspond to measure
|
||||
``(9)~Demographics''
|
||||
\end{itemize}
|
||||
\end{block}
|
||||
\vspace{.3cm}
|
||||
\pause
|
||||
(BTW: Sharing the data in this form is better than \emph{not} sharing them,
|
||||
in my opinion)
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{What additional information do we need to use these data?}
|
||||
% slido
|
||||
\centering
|
||||
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 4}
|
||||
|
||||
\url{https://app.sli.do/event/m5FEcBYkqtVAsjkdTsKsmd}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{TOSCA}
|
||||
\begin{itemize}
|
||||
\item Multi-cohort study that includes longitudinal data for several cohorts
|
||||
\item Broad spectrum of achievement test data and psycho-social variables
|
||||
\item Large number of publications on different topics using these data
|
||||
\item This is not the original data set, but a prepared version for teaching
|
||||
statistics (hence, proportions in the data and the codebook are not
|
||||
identical)
|
||||
\end{itemize}
|
||||
\begin{block}{Exercise}
|
||||
\begin{itemize}
|
||||
\item Read the data set \texttt{TOSCAtoTeach\_W123.sav} into R using
|
||||
\texttt{foreign::read.spss()} or \texttt{haven::read\_spss()}
|
||||
\item Create contingency tables for the variables \texttt{sform} and
|
||||
\texttt{szweig1} and compare the results to the codebook
|
||||
\texttt{Skalenhandbuch\_TOSCAtoTeachW123.pdf}
|
||||
\end{itemize}
|
||||
\end{block}
|
||||
\hfill{\tiny \url{https://uni-tuebingen.de/en/faculties/faculty-of-economics-and-social-sciences/subjects/department-of-social-sciences/education-sciences-and-psychology/research/current-studies/tosca}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{\citet{Wicherts2012}}
|
||||
They provide
|
||||
\begin{itemize}
|
||||
\item A data set with 537 observations and 79 variables
|
||||
(\texttt{1-s2.0-S0160289612000050-mmc2.xls})
|
||||
\item A codebook with variable names and some descriptive statistics for
|
||||
the scales (\texttt{1-s2.0-S0160289612000050-mmc1.doc})
|
||||
\item ``Publish (your data) or (let the data) perish! Why not publish your
|
||||
data too?''
|
||||
\item Data come from freshman-testing program called ``Testweek''
|
||||
\item (Try \texttt{readxl::read\_excel()} to read the data into R)
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{What is the single one thing that would make sharing these data
|
||||
indefinitely better?}
|
||||
% slido
|
||||
\centering
|
||||
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 4}
|
||||
|
||||
\url{https://app.sli.do/event/m5FEcBYkqtVAsjkdTsKsmd}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[<+->]{Non-anonymous data}
|
||||
\begin{itemize}
|
||||
\item Before putting data into any cloud, you should always take a moment to
|
||||
reflect if your data are anonymous
|
||||
\item No (third-party) cloud storage, even if it is not publicly accessible
|
||||
\item If your data contains personal data, it should always be stored
|
||||
locally, ideally on an encrypted device
|
||||
\item You should have a plan --- bofore ever collecting the data --- how,
|
||||
when, and by whom the data will be anonymized
|
||||
\item All data should eventually be anonymized! (Yes, even audio and video
|
||||
data)
|
||||
\item IWM servers can be considered local
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\section[Collaborative use]{Sharing data for collaborative use}
|
||||
|
||||
\begin{frame}[<+->]{Working together with the same data}
|
||||
\begin{itemize}
|
||||
\item Part of data organisation is to think about who needs access to
|
||||
your data
|
||||
\item Often these are colleagues from the same lab and there is
|
||||
infrastructure to share files and scripts easily
|
||||
\item The IWM offers several solutions for sharing your data (internally and
|
||||
externally)
|
||||
\item When the end goal is to make the data public, it might be a good idea
|
||||
to work together at a place where the data can go public at a certain
|
||||
point in time
|
||||
\item We will look at two possiblities: OSF and Github
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{IWM solutions}
|
||||
IWM servers
|
||||
\begin{itemize}
|
||||
\item Nextcloud: \url{https://nextcloud.iwm-tuebingen.de/}
|
||||
\item Gitea: \url{https://gitea.iwm-tuebingen.de/}
|
||||
\item Shared drive: \texttt{Y:/}
|
||||
\end{itemize}
|
||||
\vspace{.4cm}
|
||||
Microsoft servers
|
||||
\begin{itemize}
|
||||
\item OneDrive
|
||||
\item Teams
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\pause
|
||||
(Maybe check out the three tips of the week on this topic:
|
||||
{\tiny
|
||||
\url{https://iwmonline.sharepoint.com/sites/intranet/SitePages/direktorat/en/Interne-Kommunikation.aspx\#tip-of-the-week-tutorial-series}})
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Open Science Framework}
|
||||
{\url{https://osf.io/}}
|
||||
\begin{columns}
|
||||
\begin{column}{.4\textwidth}
|
||||
\begin{itemize}
|
||||
\item ``OSF is a free and open source project management tool that supports
|
||||
researchers throughout their entire project lifecycle.''
|
||||
\item Founded in 2012 and constantly developed: \url{https://www.cos.io/timeline}
|
||||
\item Meant to integrate all research steps
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\begin{column}{.7\textwidth}
|
||||
\includegraphics[scale = .2]{../figures/osf_workflow.png}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Let's try it out}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 5.1cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node (ex) at (0, 0) {\faIcon{folder} \verb+toyexample+};
|
||||
\node (n1) at (5, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (n2) at (5, -1.4) {\faIcon{folder} \verb+data+};
|
||||
\node (n3) at (5, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (ex.center) -- (n1.west);
|
||||
\path (ex.center) -- (n2.west);
|
||||
\path (ex.center) -- (n3.west);
|
||||
|
||||
\node (o1a) at (10, 0) {\faIcon[regular]{file} \verb+01_preprocessing.R+};
|
||||
\node (o1b) at (10, -0.7) {\faIcon[regular]{file} \verb+02_descriptives.R+};
|
||||
\node (o2) at (10, -1.4) {\faIcon{folder} \verb+processed+};
|
||||
\node (o3) at (10, -2.1) {\faIcon{folder} \verb+rawdata+};
|
||||
\node (o4) at (10, -2.8) {\faIcon[regular]{file} \verb+codebook.pdf+};
|
||||
\path (n1.center) -- (o1a.west);
|
||||
\path (n1.center) -- (o1b.west);
|
||||
\path (n2.center) -- (o2.west);
|
||||
\path (n2.center) -- (o3.west);
|
||||
\path (n2.center) -- (o4.west);
|
||||
\end{tikzpicture}
|
||||
Steps
|
||||
\begin{enumerate}
|
||||
\item You need an OSF account -- just sign up with an e-mail address or use ORCID
|
||||
\item Sign in
|
||||
\item Create a project
|
||||
\item Upload (or link) your files
|
||||
\item Invite contributors
|
||||
\end{enumerate}
|
||||
\end{frame}
|
||||
|
||||
% TODO:
|
||||
|
||||
% Show different cases on OSF:
|
||||
% 1. OSF with handmade codebook, all in one folder
|
||||
% 2. OSF with different components (show that they can all have different
|
||||
% licenses)
|
||||
% 3. OSF with Github integrated
|
||||
|
||||
% Show selection of servers (GDPR)
|
||||
|
||||
\begin{frame}{Licenses}
|
||||
\begin{columns}
|
||||
\begin{column}{.3\textwidth}
|
||||
\includegraphics[scale = .4]{../figures/licenses_osf.png}
|
||||
\end{column}
|
||||
\begin{column}{.7\textwidth}
|
||||
\begin{itemize}
|
||||
\item OSF offers you several options for licenses
|
||||
\item For data the Creative Common (CC) licenses are usually a good option
|
||||
\item For software, other options might be better suited
|
||||
\item For code (e.\,g., analysis scripts) CC licenses are also a good
|
||||
choice
|
||||
\end{itemize}
|
||||
\vspace{1cm}
|
||||
|
||||
\hfill{\footnotesize \url{https://creativecommons.org/}}\\
|
||||
\hfill{\footnotesize \url{https://help.osf.io/article/288-license-your-project}}\\
|
||||
\hfill{\footnotesize \url{https://choosealicense.com/}}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Github}
|
||||
{\url{https://github.com/}}
|
||||
\begin{columns}
|
||||
\begin{column}{.8\textwidth}
|
||||
\begin{itemize}
|
||||
\item Developer platform that allows developers to create, store, manage and
|
||||
share code
|
||||
\item Based on Git software providing version control
|
||||
\begin{itemize}
|
||||
\item[+] access control
|
||||
\item[+] bug tracking
|
||||
\item[+] software feature requests
|
||||
\item[+] task management
|
||||
\item[+] continuous integration
|
||||
\item[+] wikis
|
||||
\end{itemize}
|
||||
\item Commonly used to host open source software development projects
|
||||
\item Bought by Microsoft in 2018
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\begin{column}{.3\textwidth}
|
||||
\includegraphics[scale = .2]{../figures/github.png}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Github workflow}
|
||||
\begin{center}
|
||||
\includegraphics[scale = .3]{../figures/workflow_git-github.png}
|
||||
\end{center}
|
||||
\hfill{\tiny \url{https://carpentries-incubator.github.io/open-science-with-r/09-collaborating}}
|
||||
\end{frame}
|
||||
|
||||
% TODO:
|
||||
|
||||
% READMEs:
|
||||
% https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-readmes
|
||||
|
||||
\section[Repositories]{Sharing data in repositories}
|
||||
|
||||
\begin{frame}[allowframebreaks]{Data publication}
|
||||
Which data should I share?
|
||||
\begin{itemize}
|
||||
\item In general, all data that are used in publications
|
||||
\item Data for your dissertation
|
||||
\item Data that you collected but know that you will never come around to
|
||||
analyzing
|
||||
\end{itemize}
|
||||
\vspace{.3cm}
|
||||
What are the reasons to share data?
|
||||
\begin{itemize}
|
||||
\item Transparency
|
||||
\item Data safety
|
||||
\item Cumulative research process
|
||||
\item Visibility
|
||||
\end{itemize}
|
||||
\framebreak
|
||||
For whom are you sharing data?
|
||||
\begin{itemize}
|
||||
\item Yourself
|
||||
\item Reviewers
|
||||
\item People who read your papers
|
||||
\item Other scientists
|
||||
\item Colleagues and collaboraters
|
||||
\end{itemize}
|
||||
\vspace{.3cm}
|
||||
How should you share your data?
|
||||
\begin{itemize}
|
||||
\item On a public platform (or website), i.\,e., no account needed if
|
||||
possible
|
||||
\item Together with a codebook or at least an informative README
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Data repositories (suggested in our Research Data Policy)}
|
||||
National
|
||||
\begin{itemize}
|
||||
\item \url{https://www.psycharchives.org/}
|
||||
\item \url{https://www.forschungsdaten-bildung.de/}
|
||||
\item \url{https://datorium.gesis.org/}
|
||||
\item \url{https://www.iqb.hu-berlin.de/fdz}
|
||||
\end{itemize}
|
||||
\vspace{.4cm}
|
||||
International
|
||||
\begin{itemize}
|
||||
\item \url{https://datadryad.org/}
|
||||
\item \url{https://osf.io/}
|
||||
\item \url{https://zenodo.org/}
|
||||
\end{itemize}
|
||||
\vfill
|
||||
|
||||
\hfill{\footnotesize \url{https://datamanagement.hms.harvard.edu/share-publish/data-repositories}}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[<+->]{Zenodo}{https://zenodo.org/}
|
||||
\begin{itemize}
|
||||
\item General-purpose open repository launched in 2015
|
||||
\item Financed by the EU (European OpenAIRE program)
|
||||
\item Operated by CERN
|
||||
\item All disciplines
|
||||
\item Suitable for
|
||||
\begin{itemize}
|
||||
\item Data sets
|
||||
\item Papers / Preprints
|
||||
\item Research software
|
||||
\item Reports
|
||||
\item Any other digital research objects
|
||||
\end{itemize}
|
||||
\item Upload up to 50 GB possible
|
||||
\item Easily citable since all objects get DOI
|
||||
\item Open source code is available on Github
|
||||
\item IWM example: \url{https://doi.org/10.5281/zenodo.2532411}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[<+->]{PsychArchives}{https://psycharchives.org/}
|
||||
\begin{itemize}
|
||||
\item Disciplinary repository for psychological science (and neighboring
|
||||
disciplines)
|
||||
\item Developed and operated by ZPID (Leibniz-Institut für Psycholgie)
|
||||
\item Accommodating 20 different digital research object (DRO) types
|
||||
\begin{itemize}
|
||||
\item Articles
|
||||
\item Preprints
|
||||
\item Research data
|
||||
\item Code
|
||||
\item Supplements
|
||||
\item Preregistrations
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\item Searchable by ``IWM'': \url{https://psycharchives.org/en/browse/?q=iwm}
|
||||
\item Easily citable since all objects get DOI
|
||||
\item Different objects can be linked together (e.\,g., data und code)
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\appendix
|
||||
%%\begin{frame}[allowframebreaks]{References}
|
||||
\begin{frame}{References}
|
||||
%\renewcommand{\bibfont}{\small}
|
||||
\printbibliography
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
||||
|
||||
@@ -0,0 +1,503 @@
|
||||
\documentclass[aspectratio=169]{beamer}
|
||||
|
||||
\usepackage{listings}
|
||||
%\usepackage[utf8]{inputenc}
|
||||
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
|
||||
\addbibresource{../literature/lit.bib}
|
||||
|
||||
\usepackage{fancyvrb}
|
||||
\usepackage{fontawesome5} % get icons
|
||||
\usepackage{multirow}
|
||||
\usepackage{color, colortbl}
|
||||
|
||||
\usepackage{tikz}
|
||||
\usetikzlibrary{fit}
|
||||
\usepackage[edges]{forest}
|
||||
|
||||
\lstset{language = R,%
|
||||
basicstyle = \ttfamily\color{iwmgray},
|
||||
frame = single,
|
||||
rulecolor = \color{iwmgray},
|
||||
commentstyle = \slshape\color{iwmgreen},
|
||||
keywordstyle = \bfseries\color{iwmgray},
|
||||
identifierstyle = \color{iwmpurple},
|
||||
stringstyle = \color{iwmblue},
|
||||
numbers = none,%left,numberstyle = \tiny,
|
||||
basewidth = {.5em, .4em},
|
||||
showstringspaces = false,
|
||||
emphstyle = \color{red!50!white}}
|
||||
|
||||
\makeatletter \def\newblock{\beamer@newblock} \makeatother
|
||||
|
||||
\beamertemplatenavigationsymbolsempty
|
||||
\setbeamertemplate{itemize items}[circle]
|
||||
\setbeamertemplate{section in toc}[circle]
|
||||
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
|
||||
\setbeamercolor{block body}{bg=iwmorange!50!white}
|
||||
\setbeamercolor{block title}{fg=white, bg=iwmorange}
|
||||
% Definitions for biblatex
|
||||
\setbeamercolor{bibliography entry note}{fg=iwmgray}
|
||||
\setbeamercolor{bibliography entry author}{fg=iwmgray}
|
||||
\setbeamertemplate{bibliography item}{}
|
||||
|
||||
\definecolor{iwmorange}{RGB}{255,105,0}
|
||||
\definecolor{iwmgray}{RGB}{67,79,79}
|
||||
\definecolor{iwmblue}{RGB}{60,180,220}
|
||||
\definecolor{iwmgreen}{RGB}{145,200,110}
|
||||
\definecolor{iwmpurple}{RGB}{120,0,75}
|
||||
|
||||
\setbeamercolor{title}{fg=iwmorange}
|
||||
\setbeamercolor{frametitle}{fg=iwmorange}
|
||||
\setbeamercolor{structure}{fg=iwmorange}
|
||||
\setbeamercolor{normal text}{fg=iwmgray}
|
||||
\setbeamercolor{author}{fg=iwmgray}
|
||||
\setbeamercolor{date}{fg=iwmgray}
|
||||
|
||||
\newcommand{\vect}[1]{\mathbf{#1}}
|
||||
\newcommand{\mat}[1]{\mathbf{#1}}
|
||||
\newcommand{\gvect}[1]{\boldsymbol{#1}}
|
||||
\newcommand{\gmat}[1]{\boldsymbol{#1}}
|
||||
|
||||
\AtBeginSection[]{
|
||||
\frame{
|
||||
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
|
||||
|
||||
\setbeamertemplate{headline}{
|
||||
\begin{beamercolorbox}{section in head}
|
||||
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
|
||||
\end{beamercolorbox}
|
||||
}
|
||||
|
||||
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
|
||||
|
||||
\title{Clean coding}
|
||||
\author{Nora Wickelmaier}
|
||||
\date{July 8, 2024}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{frame}{}
|
||||
\thispagestyle{empty}
|
||||
\titlepage
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{What is needed to make code reproducible?}
|
||||
% slido
|
||||
\centering
|
||||
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 5}
|
||||
|
||||
\url{https://app.sli.do/event/uEz8fJWkLBNm1sthQovXNH}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Programming resources}
|
||||
\footnotesize
|
||||
\begin{tabular}{ll}
|
||||
Learning statistics with R & {\url{https://learningstatisticswithr.com/book/}} \\
|
||||
&\\
|
||||
R for Data Science & {\url{https://r4ds.hadley.nz/}} \\
|
||||
&\\
|
||||
Advanced R & {\url{https://adv-r.hadley.nz/}} \\
|
||||
&\\
|
||||
Happy Git and GitHub for the useR & {\url{https://happygitwithr.com/}} \\
|
||||
&\\
|
||||
R Programming for Research & {\url{https://geanders.github.io/RProgrammingForResearch/}} \\
|
||||
&\\
|
||||
Building reproducible analytical pipelines with R & {\url{https://raps-with-r.dev/}} \\
|
||||
&\\
|
||||
Data Skills for Reproducible Science & {\url{https://psyteachr.github.io/msc-data-skills/}} \\
|
||||
\end{tabular}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Agenda}
|
||||
\centering
|
||||
\begin{tabular}{ll}
|
||||
\hline
|
||||
Date & Topic \\
|
||||
\hline
|
||||
2024-05-13 & Introduction to data management \\
|
||||
2024-05-27 & Workflow \\
|
||||
2024-06-10 & Data organisation\\
|
||||
2024-06-24 & Data sharing \\
|
||||
\only<1>{2024-07-08}\only<2>{\bf 2024-07-08} &
|
||||
\only<1>{Clean coding}\only<2>{\bf Clean coding} \\
|
||||
2024-07-22 & Version control \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{frame}
|
||||
|
||||
% Understandable coding
|
||||
% Cleaning up R code for readability
|
||||
% Documentation of a final R script
|
||||
% Reproducible code
|
||||
|
||||
\section{Style guidelines}
|
||||
|
||||
\begin{frame}[<+->]{Style guidelines in R}
|
||||
\begin{itemize}
|
||||
\item R has no mandatory or commonly accepted style guide
|
||||
\item However, Hadley Wickham and Google developed style guides which are
|
||||
now widely accepted
|
||||
\begin{itemize}
|
||||
\item \url{https://google.github.io/styleguide/Rguide.html}
|
||||
\item \url{https://style.tidyverse.org/}
|
||||
\end{itemize}
|
||||
\item It is always a good idea to follow a style guide and not ``create''
|
||||
your own rules (if you deviate, be consistent!)
|
||||
\item A style guide helps with
|
||||
\begin{itemize}
|
||||
\item Keeping code clean which is easier to read and interpret
|
||||
\item Making it easier to catch and fix mistakes
|
||||
\item Making it easier for others to follow and adapt your code
|
||||
\item Preventing possible problems, e.\,g., avoiding dots in function
|
||||
names
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\nocite{Wickham_styleguide, Anderson2023}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile, allowframebreaks]{File names}
|
||||
\begin{itemize}
|
||||
\item File names should be meaningful and end in .R
|
||||
\item Avoid using special characters in file names
|
||||
\item Stick with numbers, letters, \verb+-+, and \verb+_+
|
||||
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
|
||||
# Good
|
||||
fit_models.R
|
||||
utility_functions.R
|
||||
|
||||
# Bad
|
||||
fit models.R
|
||||
foo.r
|
||||
stuff.r
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item If files should be run in a particular order, prefix them with numbers
|
||||
\item If it seems likely you’ll have more than 10 files, left pad with zero
|
||||
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
|
||||
00_download.R
|
||||
01_explore.R
|
||||
...
|
||||
09_model.R
|
||||
10_visualize.R
|
||||
\end{lstlisting}
|
||||
\item If you later realize that you missed some steps, it’s tempting to use
|
||||
02a, 02b, etc.
|
||||
\item However, it is generally better to bite the bullet and rename all
|
||||
files
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile, allowframebreaks]{Object names}
|
||||
\begin{itemize}
|
||||
\item Variable and function names should use only lowercase letters,
|
||||
numbers, and \verb+_+
|
||||
\item Use underscores (\verb+_+) (so called snake case) to separate words
|
||||
within a name
|
||||
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
|
||||
# Good
|
||||
day_one
|
||||
day_1
|
||||
|
||||
# Bad
|
||||
DayOne
|
||||
dayone
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item Generally, variable names should be nouns and function names should be
|
||||
verbs
|
||||
\item Strive for names that are concise and meaningful
|
||||
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
|
||||
# Good
|
||||
day_one
|
||||
|
||||
# Bad
|
||||
first_day_of_the_month
|
||||
djm1
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item Avoid re-using names of common functions and variables
|
||||
\begin{lstlisting}
|
||||
# Bad
|
||||
T <- FALSE
|
||||
c <- 10
|
||||
mean <- function(x) sum(x)
|
||||
\end{lstlisting}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile, allowframebreaks]{Spacing}
|
||||
\begin{itemize}
|
||||
\item Always put a space after a comma, never before
|
||||
\begin{lstlisting}
|
||||
# Good
|
||||
x[, 1]
|
||||
|
||||
# Bad
|
||||
x[,1]
|
||||
x[ ,1]
|
||||
x[ , 1]
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item Do not put spaces inside or outside parentheses for regular function
|
||||
calls
|
||||
\begin{lstlisting}
|
||||
# Good
|
||||
mean(x, na.rm = TRUE)
|
||||
|
||||
# Bad
|
||||
mean (x, na.rm = TRUE)
|
||||
mean( x, na.rm = TRUE )
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item Place a space before and after \texttt{()} when used with \texttt{if},
|
||||
\texttt{for}, or \texttt{while}
|
||||
\begin{lstlisting}
|
||||
# Good
|
||||
if (debug) {
|
||||
show(x)
|
||||
}
|
||||
|
||||
# Bad
|
||||
if(debug){
|
||||
show(x)
|
||||
}
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item Place a space after \texttt{()} used for function arguments
|
||||
\begin{lstlisting}
|
||||
# Good
|
||||
function(x) {}
|
||||
|
||||
# Bad
|
||||
function (x) {}
|
||||
function(x){}
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item Most infix operators (\verb+==+, \verb|+|, \verb+-+, \verb+<-+, etc.)
|
||||
should always be surrounded by spaces
|
||||
\begin{lstlisting}
|
||||
# Good
|
||||
height <- (feet * 12) + inches
|
||||
mean(x, na.rm = TRUE)
|
||||
|
||||
# Bad
|
||||
height<-feet*12+inches
|
||||
mean(x, na.rm=TRUE)
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item There are a few exceptions, which should never be surrounded by
|
||||
spaces: \verb+::+, \verb+:::+, \verb+$+, \verb+@+, \verb+[+, \verb+[[+,
|
||||
\verb+?+, \verb+^+, and \verb+:+
|
||||
{\small
|
||||
\begin{lstlisting}
|
||||
# Good
|
||||
sqrt(x^2 + y^2)
|
||||
df$z
|
||||
x <- 1:10
|
||||
package?stats
|
||||
?mean
|
||||
|
||||
# Bad
|
||||
sqrt(x ^ 2 + y ^ 2)
|
||||
df $ z
|
||||
x <- 1 : 10
|
||||
package ? stats
|
||||
? mean
|
||||
\end{lstlisting}
|
||||
}
|
||||
\item Adding extra spaces is ok if it improves alignment of \verb+=+ or
|
||||
\verb+<-+
|
||||
\begin{lstlisting}
|
||||
# Good
|
||||
list(
|
||||
total = a + b + c,
|
||||
mean = (a + b + c) / n
|
||||
)
|
||||
|
||||
# Also fine
|
||||
list(
|
||||
total = a + b + c,
|
||||
mean = (a + b + c) / n
|
||||
)
|
||||
\end{lstlisting}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% CITE:
|
||||
% https://style.tidyverse.org/index.html
|
||||
% R Programming for Reserach: https://geanders.github.io/RProgrammingForResearch/
|
||||
% Building reproducible analytical pipelines with R: https://raps-with-r.dev/
|
||||
|
||||
\section{Script organisation}
|
||||
|
||||
\begin{frame}[fragile]{Script header}
|
||||
\begin{itemize}
|
||||
\item It can be very helpful to have some general information right at the
|
||||
top when opening a script
|
||||
\begin{lstlisting}
|
||||
# 01_preprocessing.R
|
||||
#
|
||||
# Cleaning up toy data set (Methods Seminar SS2024)
|
||||
#
|
||||
# Input: rawdata/RDM_MS_SS2024_download_2024-06-07.csv
|
||||
# Output: processed/data_rdm-ms-ss2024_cleaned.csv
|
||||
# processed/data_rdm-ms-ss2024_cleaned.RData
|
||||
#
|
||||
# Created: 2024-06-03, NW
|
||||
\end{lstlisting}
|
||||
\item These metadata help you remember faster what you did
|
||||
\item Might not be necessary when using consistent version control (but does
|
||||
not hurt either)
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Line length}
|
||||
{}
|
||||
\begin{center}
|
||||
{\Large\bf Keep lines to 80 characters or less!}
|
||||
\end{center}
|
||||
\begin{lstlisting}
|
||||
# Good
|
||||
my_df <- data.frame(n = 1:3,
|
||||
letter = c("a", "b", "c"),
|
||||
cap_letter = c("A", "B", "C"))
|
||||
|
||||
# Bad
|
||||
my_df <- data.frame(n = 1:3, letter = c("a", "b", "c"), cap_letter = c("A", "B", "C"))
|
||||
\end{lstlisting}
|
||||
\begin{itemize}
|
||||
\item Ensures that your code is formatted in a way that you can see all of
|
||||
the code without scrolling horizontally
|
||||
\item To set your script pane to be limited to 80 characters, go to\\
|
||||
\verb+RStudio -> Preferences -> Code -> Display+\\
|
||||
and set ``Margin Column'' to 80
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile, allowframebreaks]{File organisation}
|
||||
\begin{itemize}
|
||||
\item Try to write scripts that are concerned with one (major) task
|
||||
\item If you can find a name, that captures the content, it is usually a
|
||||
good way to start
|
||||
\item Some (random) examples
|
||||
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
|
||||
download-data.R
|
||||
data-cleaning.R
|
||||
cluster_analysis_exp1.R
|
||||
visualization_logistic-model.R
|
||||
anova_h1.R
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item Export data sets for new scripts (do not make yourself run all scripts
|
||||
up to script 5 each time, just because you need the data in a certain
|
||||
format)
|
||||
\begin{lstlisting}
|
||||
# Interoperable
|
||||
write.table(dat,
|
||||
file = "data_exp1_cleaned.csv",
|
||||
sep = ";",
|
||||
quote = FALSE,
|
||||
row.names = FALSE)
|
||||
|
||||
# Preserve order of factor levels, date formats, etc.
|
||||
save(dat, file = "data_exp1_cleaned.RData")
|
||||
\end{lstlisting}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile, allowframebreaks]{Internal structure}
|
||||
\begin{itemize}
|
||||
\item Use commented lines with \texttt{-} or \texttt{=} to break your file
|
||||
up into chunks
|
||||
\item Load additional packages at the beginning of the script
|
||||
\begin{lstlisting}
|
||||
library(lme4)
|
||||
library(sjPlot)
|
||||
|
||||
# Load data ---------------------------
|
||||
|
||||
# Plot data ---------------------------
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item If you load several packages, be aware that the order of loading
|
||||
matters!
|
||||
\item If you use only one or two functions from a package, get the function
|
||||
with \verb+::+ instead of loading the whole package
|
||||
\begin{lstlisting}
|
||||
library(lme4)
|
||||
...
|
||||
|
||||
# Fit mixed-effects model to test Hypothesis 1
|
||||
lme1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
|
||||
summary(lme1)
|
||||
sjPlot::tab_model(lme1)
|
||||
\end{lstlisting}
|
||||
\framebreak
|
||||
|
||||
\item Group related pieces of code together
|
||||
\item Separate blocks of code by empty spaces
|
||||
\begin{lstlisting}
|
||||
# Load data
|
||||
library(faraway)
|
||||
data(nepali)
|
||||
|
||||
# Relabel sex variable
|
||||
nepali$sex <- factor(nepali$sex,
|
||||
levels = c(1, 2),
|
||||
labels = c("Male", "Female"))
|
||||
\end{lstlisting}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
||||
|
||||
\begin{frame}{How can I test if my code is reproducible?}
|
||||
% slido
|
||||
\centering
|
||||
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 5}
|
||||
|
||||
\url{https://app.sli.do/event/uEz8fJWkLBNm1sthQovXNH}
|
||||
\end{frame}
|
||||
|
||||
\section{Code reviews}
|
||||
|
||||
\begin{frame}[<+->]{Use your peers}
|
||||
\begin{itemize}
|
||||
\item Do not overthink it!
|
||||
\item Just give your data and code to a colleague and ask them to reproduce
|
||||
what you did (this sounds easy, but it is actually not!)
|
||||
\item This will give you tons of insights about your workflow
|
||||
\begin{itemize}
|
||||
\item Can this person (in general) understand what you did?
|
||||
\item Is this person able to easily put your data on their machine and
|
||||
run the code right away?
|
||||
\item Anything this person would have done differently?
|
||||
\item Discuss why and which things you do differently
|
||||
\end{itemize}
|
||||
\item Reading other peoples's code is the best way to learn about how things
|
||||
can be done differently than you do them
|
||||
\item You can review code by printing it out and adding comments by hand\\
|
||||
(I highly recommend this!)
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\appendix
|
||||
%\begin{frame}[allowframebreaks]{References}
|
||||
\begin{frame}{References}
|
||||
%\renewcommand{\bibfont}{\small}
|
||||
\printbibliography
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
# Last session: Wrap-up
|
||||
|
||||
How to apply all we learned this semester to archiving our research data at the
|
||||
IWM:
|
||||
|
||||
|
||||
* Show how to fill in the input mask
|
||||
|
||||
* Details on how to prepare data and folders
|
||||
|
||||
Show slide set "How to know what to archive"
|
||||
|
||||
* How to upload research data
|
||||
|
||||
* How to create new version of research data
|
||||
|
||||
* How to update meta data
|
||||
|
||||
* Explain the "Status" mechanism
|
||||
|
||||
* Show that only one person per data set has access to the meta data entry
|
||||
|
||||
|
||||
@@ -0,0 +1,408 @@
|
||||
\documentclass[aspectratio=169]{beamer}
|
||||
|
||||
\usepackage{tikz}
|
||||
\usetikzlibrary{fit}
|
||||
|
||||
\usepackage{listings}
|
||||
\usepackage[utf8,latin1]{inputenc}
|
||||
\usepackage{multirow}
|
||||
\usepackage{color, colortbl}
|
||||
\usepackage{fontawesome5} % get icons
|
||||
|
||||
\makeatletter \def\newblock{\beamer@newblock} \makeatother
|
||||
|
||||
\beamertemplatenavigationsymbolsempty
|
||||
\setbeamertemplate{itemize items}[circle]
|
||||
\setbeamertemplate{section in toc}[circle]
|
||||
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgrau}}
|
||||
\setbeamercolor{block body}{bg=iwmorange!50!white}
|
||||
\setbeamercolor{block title}{fg=white, bg=iwmorange}
|
||||
|
||||
\definecolor{iwmorange}{RGB}{255,105,0}
|
||||
\definecolor{iwmgray}{RGB}{67,79,79}
|
||||
\definecolor{iwmblue}{RGB}{60,180,220}
|
||||
\definecolor{iwmpurple}{RGB}{120,0,75}
|
||||
|
||||
\definecolor{iwmorange}{RGB}{255,105,0}
|
||||
\definecolor{iwmgrau}{RGB}{67,79,79}
|
||||
\setbeamercolor{title}{fg=iwmorange}
|
||||
\setbeamercolor{frametitle}{fg=iwmorange}
|
||||
\setbeamercolor{structure}{fg=iwmorange}
|
||||
\setbeamercolor{normal text}{fg=iwmgrau}
|
||||
\setbeamercolor{author}{fg=iwmgrau}
|
||||
\setbeamercolor{date}{fg=iwmgrau}
|
||||
|
||||
\lstset{language=bash,%
|
||||
literate={Ü}{{\"U}}1
|
||||
{ü}{{\"u}}1,
|
||||
backgroundcolor=\color{iwmgrau!15!white},
|
||||
basicstyle=\ttfamily\color{iwmgrau},
|
||||
frame=none,
|
||||
basicstyle=\ttfamily\color{iwmgrau},
|
||||
commentstyle=\slshape\color{iwmgrau},
|
||||
keywordstyle=\bfseries\color{iwmgrau},
|
||||
identifierstyle=\color{iwmgrau},
|
||||
stringstyle=\color{iwmgrau},
|
||||
numbers=none,%left,numberstyle=\tiny,
|
||||
basewidth={.5em, .4em},
|
||||
showstringspaces=false,
|
||||
emphstyle=\color{red!50!white}}
|
||||
|
||||
\pgfmathdeclarefunction{gauss}{2}{%
|
||||
\pgfmathparse{1/(#2*sqrt(2*pi))*exp(-((x-#1)^2)/(2*#2^2))}%
|
||||
}
|
||||
|
||||
\AtBeginSection[]{
|
||||
\frame{
|
||||
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
|
||||
|
||||
% \setbeamertemplate{headline}{
|
||||
% \begin{beamercolorbox}{section in head}
|
||||
% \vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
|
||||
% \end{beamercolorbox}
|
||||
% }
|
||||
|
||||
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
|
||||
|
||||
|
||||
\title{How to know what to archive}
|
||||
\author{Nora Wickelmaier}
|
||||
\date{\today}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{frame}{}
|
||||
\thispagestyle{empty}
|
||||
\titlepage
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{The basics}
|
||||
Data
|
||||
\begin{itemize}
|
||||
\item Keep your raw data in a separate folder
|
||||
\item If your raw data \emph{do not contain any personal data} submit them
|
||||
with your analysis-ready data files
|
||||
\item Submit the analysis script that got your raw data to
|
||||
``analysis-ready`` (or at least a description of the process)\pause\hfill
|
||||
{\bf\color{iwmpurple} Check out our Research Data Policy!}
|
||||
\end{itemize}
|
||||
\pause
|
||||
Material
|
||||
\begin{itemize}
|
||||
\item Submit all materials that have been used to create these data
|
||||
\begin{itemize}
|
||||
\item Questionnaires
|
||||
\item Program code for experiments
|
||||
\item Stimuli presented, e.\,g., pictures or similar
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\pause
|
||||
Documentation
|
||||
\begin{itemize}
|
||||
\item Add a description of your data and experiment
|
||||
\item In its simplest form this can just be a README file, ideally as text
|
||||
or PDF file
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Previous folder structure on \texttt{Y:/}}
|
||||
\begin{columns}
|
||||
\begin{column}{.4\textwidth}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 3.6cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+study+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+data+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+ethik+};
|
||||
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+material+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+pub+};
|
||||
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+scans+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (n4.west);
|
||||
\path (top.east) -- (n5.west);
|
||||
\end{tikzpicture}
|
||||
\end{column}
|
||||
\begin{column}{.6\textwidth}
|
||||
\begin{itemize}[<+->]
|
||||
\item Very basic (and good) folder structure for a research project
|
||||
\item Submitting the approval document of our \emph{local} ethics
|
||||
committee is now optional, since the connection will be possible via
|
||||
your meta data\\
|
||||
% $\to$ If the project was evaluated by an external ethics committee it
|
||||
% might still be a good idea to submit the approval document
|
||||
\item Data and material are the most important folders to help other
|
||||
people make sense of your data
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Some additional suggestions}
|
||||
\vspace{.2cm}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 3.6cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+study+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+material+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+preregistration+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (n4.west);
|
||||
\path (top.east) -- (file.west);
|
||||
\end{tikzpicture}
|
||||
\pause
|
||||
\vfill
|
||||
Separate your code from your data folder and add your preregistration for the
|
||||
project
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Some additional suggestions}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 3.6cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+study+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+material+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+preregistration+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (n4.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node[text width = 5cm] (o1) at (10, 0) {\faIcon[regular]{file} \verb+01_preprocessing.R+};
|
||||
\node[text width = 5cm] (o2) at (10, -0.7) {\faIcon[regular]{file} \verb+02_descriptives.R+};
|
||||
\node[text width = 5cm] (o3) at (10, -1.4) {\faIcon[regular]{file} \verb+03_modeling.R+};
|
||||
\node[text width = 5cm] (o4) at (10, -2.1) {\faIcon[regular]{file} \verb+04_plots.R+};
|
||||
\path (n1.east) -- (o1.west);
|
||||
\path (n1.east) -- (o2.west);
|
||||
\path (n1.east) -- (o3.west);
|
||||
\path (n1.east) -- (o4.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
Separate your code from your data folder and add your preregistration for the
|
||||
project
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Some additional suggestions}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 3.6cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+study+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+material+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+preregistration+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (n4.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node[text width = 6cm] (o1) at (10.5, -0.7) {\faIcon{folder} \verb+01_raw-data+};
|
||||
\node[text width = 6cm] (o2) at (10.5, -1.4) {\faIcon{folder} \verb+02_processed-data+};
|
||||
\node[text width = 6cm] (o3) at (10.5, -2.1) {\faIcon[regular]{file}
|
||||
\verb+codebook_processed-data.pdf+};
|
||||
\path (n2.east) -- (o1.west);
|
||||
\path (n2.east) -- (o2.west);
|
||||
\path (n2.east) -- (o3.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
Separate your code from your data folder and add your preregistration for the
|
||||
project
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Some additional suggestions}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 3.6cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+study+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+material+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+preregistration+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (n4.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node[text width = 5cm] (o1) at (10, -1.4) {\faIcon{folder} \verb+stimuli+};
|
||||
\node[text width = 5cm] (o2) at (10, -2.1) {\faIcon[regular]{file} \verb+survey01.pdf+};
|
||||
\node[text width = 5cm] (o3) at (10, -2.8) {\faIcon[regular]{file} \verb+survey02.pdf+};
|
||||
\path (n3.east) -- (o1.west);
|
||||
\path (n3.east) -- (o2.west);
|
||||
\path (n3.east) -- (o3.west);
|
||||
\end{tikzpicture}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Some additional suggestions}
|
||||
\vspace{.4cm}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 3.6cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+study+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+material+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+preregistration+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (n4.west);
|
||||
\path (top.east) -- (file.west);
|
||||
% second level
|
||||
\node[text width = 6cm] (o1) at (10.5, -2.1) {\faIcon[regular]{file} \verb+2024-06-11_prereg_study.pdf+};
|
||||
\path (n4.east) -- (o1.west);
|
||||
\end{tikzpicture}
|
||||
\pause
|
||||
\vfill
|
||||
A preregistration usually contains all important information like what
|
||||
variables have been collected, what were the hypotheses, etc.
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Some additional suggestions}
|
||||
\begin{columns}
|
||||
\begin{column}{.45\textwidth}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 3.6cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+study+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+material+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+preregistration+};
|
||||
\node (file) at (4, -2.8) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (n4.west);
|
||||
\path (top.east) -- (file.west);
|
||||
\end{tikzpicture}
|
||||
\end{column}
|
||||
\begin{column}{.55\textwidth}
|
||||
\vspace{-3cm}
|
||||
\begin{itemize}
|
||||
\item Add any additional folders that you need
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}[fragile]{Some additional suggestions}
|
||||
\vspace{1cm}
|
||||
\begin{columns}
|
||||
\begin{column}{.45\textwidth}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 3.6cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+study+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \bf background};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
|
||||
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+material+};
|
||||
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+preregistration+};
|
||||
\node (file) at (4, -3.5) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (n4.west);
|
||||
\path (top.east) -- (n5.west);
|
||||
\path (top.east) -- (file.west);
|
||||
\end{tikzpicture}
|
||||
\end{column}
|
||||
\begin{column}{.55\textwidth}
|
||||
\vspace{-2.5cm}
|
||||
\begin{itemize}
|
||||
\item Add any additional folders that you need
|
||||
\item Maybe a folder \verb+background+ with a PhD expos{\'e} or a grant
|
||||
application (or both)
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{Some additional suggestions}
|
||||
\vspace{.5cm}
|
||||
\begin{columns}
|
||||
\begin{column}{.45\textwidth}
|
||||
\begin{tikzpicture}[
|
||||
every node/.style = {text width = 3.6cm, align = left},
|
||||
every path/.style = {thick, draw}
|
||||
]
|
||||
\node[text width = 2cm] (top) at (0, 0) {\faIcon{folder} \verb+study+};
|
||||
% first level
|
||||
\node (n1) at (4, 0) {\faIcon{folder} \verb+code+};
|
||||
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+data+};
|
||||
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+material+};
|
||||
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+preregistration+};
|
||||
\node (n5) at (4, -2.8) {\faIcon{folder} \bf software};
|
||||
\node (file) at (4, -3.5) {\faIcon[regular]{file} \verb+README.md+};
|
||||
\path (top.east) -- (n1.west);
|
||||
\path (top.east) -- (n2.west);
|
||||
\path (top.east) -- (n3.west);
|
||||
\path (top.east) -- (n4.west);
|
||||
\path (top.east) -- (n5.west);
|
||||
\path (top.east) -- (file.west);
|
||||
\end{tikzpicture}
|
||||
\end{column}
|
||||
\begin{column}{.55\textwidth}
|
||||
\begin{itemize}
|
||||
\item Add any additional folders that you need
|
||||
\item Maybe a folder \verb+background+ with a PhD expos{\'e} or a grant
|
||||
application (or both)
|
||||
\item Or a folder \verb+software+ with Python or Matlab code that you
|
||||
wrote for your experiment
|
||||
\item \dots
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}{}
|
||||
{}
|
||||
\begin{center}
|
||||
{\Huge\color{iwmblue} Don't overthink it and use common sense.\\
|
||||
\vspace{1cm}\pause
|
||||
Then add everything that you think a colleague needs to understand what you
|
||||
did.}
|
||||
\end{center}
|
||||
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| 2024-06-10 | Data organisation |
|
||||
| 2024-06-24 | Data sharing |
|
||||
| 2024-07-08 | Clean coding |
|
||||
| 2024-07-22 | Version control |
|
||||
| 2024-07-22 | Archiving data at the IWM |
|
||||
|
||||
# Literature
|
||||
|
||||
@@ -20,6 +20,10 @@ Frazier, M. R., O'Hara, C. C., Jiang, N., & Halpern, B. S. (2017). Our path
|
||||
to better science in less time using open data science tools. _Nature
|
||||
Ecology & Evolution, 1_(6), 1-7. https://doi.org/10.1038/s41559-017-0160
|
||||
|
||||
Wicherts, J. M., & Bakker, M. (2012).Publish (your data) or (let the data)
|
||||
perish! Why not publish your data too? _Intelligence, 40_(2), 73–76.
|
||||
https://doi.org/10.1016/j.intell.2012.01.004
|
||||
|
||||
Wilbrandt, J. (2023). Research Data Management Intro Series: Coffee Lectures &
|
||||
Espresso Shots. https://doi.org/10.5281/zenodo.7573695
|
||||
|
||||
|
||||
|
After Width: | Height: | Size: 5.5 KiB |
|
After Width: | Height: | Size: 5.6 KiB |
|
After Width: | Height: | Size: 5.7 KiB |
|
After Width: | Height: | Size: 47 KiB |
|
After Width: | Height: | Size: 35 KiB |
|
After Width: | Height: | Size: 63 KiB |
|
After Width: | Height: | Size: 24 KiB |
|
After Width: | Height: | Size: 11 KiB |
|
After Width: | Height: | Size: 43 KiB |
|
After Width: | Height: | Size: 146 KiB |
|
After Width: | Height: | Size: 529 KiB |
@@ -1,3 +1,11 @@
|
||||
@book{Anderson2023,
|
||||
title = {R programming for research},
|
||||
author = {Brooke Anderson and Rachel Severson and Nicholas Good},
|
||||
year = {2023},
|
||||
publisher = {Colorado State University, ERHS 535},
|
||||
url = {https://geanders.github.io/RProgrammingForResearch/}
|
||||
}
|
||||
|
||||
@article{Kathawalla2021,
|
||||
title = {Easing into open science: {A} guide for graduate students and their advisors},
|
||||
author = {Kathawalla, Ummul-Kiram and Silverstein, Priya and Syed, Moin},
|
||||
@@ -8,6 +16,14 @@
|
||||
doi = {10.1525/collabra.18684}
|
||||
}
|
||||
|
||||
@book{Koeller2004,
|
||||
title = {Wege zur {H}ochschulreife in {B}aden-{W}{\"u}rttemberg: {TOSCA} -- {E}ine {U}ntersuchung an allgemein bildenden und beruflichen {G}ymnasien},
|
||||
author = {K{\"o}ller, Olaf and Watermann, Ralf and Trautwein, Ulrich and L{\"u}dtke, Oliver},
|
||||
year = {2004},
|
||||
publisher = {Springer},
|
||||
doi = {10.1007/978-3-322-80906-3}
|
||||
}
|
||||
|
||||
@article{Lowndes2017,
|
||||
title = {Our path to better science in less time using open data science tools},
|
||||
author = {Lowndes, Julia S Stewart and Best, Benjamin D and Scarborough, Courtney and Afflerbach, Jamie C and Frazier, Melanie R and O'Hara, Casey C and Jiang, Ning and Halpern, Benjamin S},
|
||||
@@ -30,6 +46,35 @@
|
||||
doi = {10.1177/2515245917747656}
|
||||
}
|
||||
|
||||
@article{Ngo2023,
|
||||
title = {Spot the bot: Investigating user's detection cues for social bots and their willingness to verify Twitter profiles},
|
||||
journal = {Computers in Human Behavior},
|
||||
volume = {146},
|
||||
pages = {107819},
|
||||
year = {2023},
|
||||
issn = {0747-5632},
|
||||
doi = {https://doi.org/10.1016/j.chb.2023.107819},
|
||||
url = {https://www.sciencedirect.com/science/article/pii/S074756322300170X},
|
||||
author = {Thao Ngo and Magdalena Wischnewski and Rebecca Bernemann and Martin Jansen and Nicole Kr{\"a}mer}
|
||||
}
|
||||
|
||||
@article{Wicherts2012,
|
||||
title = {Publish (your data) or (let the data) perish! {W}hy not publish your data too?},
|
||||
author = {Wicherts, Jelte M and Bakker, Marjan},
|
||||
journal = {Intelligence},
|
||||
volume = {40},
|
||||
number = {2},
|
||||
pages = {73--76},
|
||||
year = {2012},
|
||||
doi = {10.1016/j.intell.2012.01.004}
|
||||
}
|
||||
|
||||
@misc{Wickham_styleguide,
|
||||
author = {Hadley Wickham},
|
||||
title = {The tidyverse style guide},
|
||||
url = {https://style.tidyverse.org/}
|
||||
}
|
||||
|
||||
@misc{Wilbrandt2023,
|
||||
author = {Wilbrandt, Jeanne},
|
||||
title = {{Research Data Management Intro Series: Coffee Lectures \& Espresso Shots}},
|
||||
|
||||