1046 lines
36 KiB
TeX
1046 lines
36 KiB
TeX
\documentclass[aspectratio=169]{beamer}
|
|
|
|
\usepackage{listings}
|
|
\usepackage[utf8,latin1]{inputenc}
|
|
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
|
|
\addbibresource{../literature/lit.bib}
|
|
|
|
\usepackage{fancyvrb}
|
|
\usepackage{fontawesome5} % get icons
|
|
\usepackage{multirow}
|
|
\usepackage{color, colortbl}
|
|
|
|
\usepackage{tikz}
|
|
\usetikzlibrary{fit}
|
|
\usepackage[edges]{forest}
|
|
|
|
\lstset{language=bash,%
|
|
backgroundcolor=\color{iwmgray!15!white},
|
|
basicstyle=\ttfamily\color{iwmgray},
|
|
frame=none,
|
|
basicstyle=\ttfamily\color{iwmgray},
|
|
commentstyle=\slshape\color{iwmgray},
|
|
keywordstyle=\bfseries\color{iwmgray},
|
|
identifierstyle=\color{iwmgray},
|
|
stringstyle=\color{iwmgray},
|
|
numbers=none,%left,numberstyle=\tiny,
|
|
basewidth={.5em, .4em},
|
|
showstringspaces=false,
|
|
emphstyle=\color{red!50!white}}
|
|
|
|
\makeatletter \def\newblock{\beamer@newblock} \makeatother
|
|
|
|
\beamertemplatenavigationsymbolsempty
|
|
\setbeamertemplate{itemize items}[circle]
|
|
\setbeamertemplate{section in toc}[circle]
|
|
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
|
|
\setbeamercolor{block body}{bg=iwmorange!50!white}
|
|
\setbeamercolor{block title}{fg=white, bg=iwmorange}
|
|
% Definitions for biblatex
|
|
\setbeamercolor{bibliography entry note}{fg=iwmgray}
|
|
\setbeamercolor{bibliography entry author}{fg=iwmgray}
|
|
\setbeamertemplate{bibliography item}{}
|
|
|
|
\definecolor{iwmorange}{RGB}{255,105,0}
|
|
\definecolor{iwmgray}{RGB}{67,79,79}
|
|
\definecolor{iwmblue}{RGB}{60,180,220}
|
|
|
|
\setbeamercolor{title}{fg=iwmorange}
|
|
\setbeamercolor{frametitle}{fg=iwmorange}
|
|
\setbeamercolor{structure}{fg=iwmorange}
|
|
\setbeamercolor{normal text}{fg=iwmgray}
|
|
\setbeamercolor{author}{fg=iwmgray}
|
|
\setbeamercolor{date}{fg=iwmgray}
|
|
|
|
\newcommand{\vect}[1]{\mathbf{#1}}
|
|
\newcommand{\mat}[1]{\mathbf{#1}}
|
|
\newcommand{\gvect}[1]{\boldsymbol{#1}}
|
|
\newcommand{\gmat}[1]{\boldsymbol{#1}}
|
|
|
|
\AtBeginSection[]{
|
|
\frame{
|
|
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
|
|
|
|
\setbeamertemplate{headline}{
|
|
\begin{beamercolorbox}{section in head}
|
|
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
|
|
\end{beamercolorbox}
|
|
}
|
|
|
|
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
|
|
|
|
\title{Workflows for effective research data management}
|
|
\author{Nora Wickelmaier}
|
|
\date{May 27, 2024}
|
|
|
|
\begin{document}
|
|
|
|
\begin{frame}{}
|
|
\thispagestyle{empty}
|
|
\titlepage
|
|
\end{frame}
|
|
|
|
\begin{frame}{Not kiddin'}
|
|
I received this e-mail right after our last session\dots
|
|
\begin{center}
|
|
\includegraphics[scale = .5]{../figures/email_data_request_2024_01}
|
|
\end{center}
|
|
I finished my dissertation over a decade ago\dots
|
|
\end{frame}
|
|
|
|
\begin{frame}{Not kiddin'}
|
|
Again, definitely not what I would have liked to answer\dots
|
|
\begin{center}
|
|
\includegraphics[scale = .55]{../figures/email_data_request_2024_02}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}[<+->]{Some general rules}
|
|
\begin{itemize}
|
|
\item This e-mail will never hit you in a week where you have any free time
|
|
\item This e-mail will never be about data that is already published or at
|
|
least preprocessed and documented in a clean way
|
|
\item It will usually be sent by someone that you really want to answer to
|
|
(who would not want to answer to one of the four persons that has actually
|
|
read parts of your dissertation?)
|
|
\item This e-mail will trigger a tremendous amount of guilt\dots
|
|
\item Would a better workflow have prevented this?
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}[allowframebreaks]{Results slido surveys: Habits}
|
|
\footnotesize
|
|
\begin{tabular}{ll}
|
|
\hline
|
|
What habits would help with good data management? & Habit \\
|
|
\hline
|
|
pseudonymizing/anonymizing data & data organisation \\
|
|
avoid redundancy & data organisation \\
|
|
have one place where you store the data & data organisation \\
|
|
uploading under a license (CC-BY....) & data sharing \\
|
|
loading data on an archive, repository etc... & data sharing \\
|
|
Doing the archive & data sharing \\
|
|
report changes to dataset & documentation \\
|
|
codebook & documentation \\
|
|
readme & documentation \\
|
|
report deviations from preregistration & documentation \\
|
|
Read-Me & documentation \\
|
|
Document data collection in Details & documentation \\
|
|
Document yout code & documentation \\
|
|
Documentation & documentation \\
|
|
preregistration & documentation \\
|
|
recording the steps (taken through analysis) & documentation \\
|
|
github documentation & documentation \\
|
|
%discipline & workflow \\
|
|
%Do not do it in your spare time? & workflow \\
|
|
%Brainpower & workflow \\
|
|
%Trink about file names & workflow \\
|
|
%regular cleaning & workflow \\
|
|
%Special time slot in calendar & workflow \\
|
|
%clarity & workflow \\
|
|
%consistency & workflow \\
|
|
%checklists & workflow \\
|
|
%clear workflow & workflow \\
|
|
%Structure Structure Structure & workflow \\
|
|
%Be consistent & workflow \\
|
|
%consitency & workflow \\
|
|
%Reproducible code & workflow \\
|
|
%Time Investment & workflow \\
|
|
\hline
|
|
\end{tabular}
|
|
\begin{tabular}{ll}
|
|
\hline
|
|
What habits would help with good data management? & Habit \\
|
|
\hline
|
|
%pseudonymizing/anonymizing data & data organisation \\
|
|
%avoid redundancy & data organisation \\
|
|
%have one place where you store the data & data organisation \\
|
|
%uploading under a license (CC-BY....) & data sharing \\
|
|
%loading data on an archive, repository etc... & data sharing \\
|
|
%Doing the archive & data sharing \\
|
|
%report changes to dataset & documentation \\
|
|
%codebook & documentation \\
|
|
%readme & documentation \\
|
|
%report deviations from preregistration & documentation \\
|
|
%Read-Me & documentation \\
|
|
%Document data collection in Details & documentation \\
|
|
%Document yout code & documentation \\
|
|
%Documentation & documentation \\
|
|
%preregistration & documentation \\
|
|
%recording the steps (taken through analysis) & documentation \\
|
|
%github documentation & documentation \\
|
|
discipline & workflow \\
|
|
Do not do it in your spare time? & workflow \\
|
|
Brainpower & workflow \\
|
|
Trink about file names & workflow \\
|
|
regular cleaning & workflow \\
|
|
Special time slot in calendar & workflow \\
|
|
clarity & workflow \\
|
|
consistency & workflow \\
|
|
checklists & workflow \\
|
|
clear workflow & workflow \\
|
|
Structure Structure Structure & workflow \\
|
|
Be consistent & workflow \\
|
|
consitency & workflow \\
|
|
Reproducible code & workflow \\
|
|
Time Investment & workflow \\
|
|
\hline
|
|
\end{tabular}
|
|
\end{frame}
|
|
|
|
\begin{frame}[allowframebreaks]{Results slido surveys: Barriers}
|
|
\footnotesize
|
|
\vspace{1cm}
|
|
\begin{tabular}{ll}
|
|
\hline
|
|
What are possible barriers for good data management? & Barrier \\
|
|
\hline
|
|
Remember the strategy used over time & lack of consistency \\
|
|
Keeping multiple copies consistent & lack of consistency \\
|
|
don't know the best tools for it & lack of skills \\
|
|
no idea where to start & lack of skills \\
|
|
complex research design & lack of skills \\
|
|
public security & lack of skills \\
|
|
expectation of presenting results fast (time) & lack of time \\
|
|
When should I do this task? & lack of time \\
|
|
Lack of planning & lack of time \\
|
|
too much other work & lack of time \\
|
|
%procrastination & low priority \\
|
|
%its not fun & low priority \\
|
|
%never having thought of it & low priority \\
|
|
%Other Priorities & low priority \\
|
|
%boring task & low priority \\
|
|
%forget it & low priority \\
|
|
%bad time management & low priority \\
|
|
%never gets perfected & perfectionism \\
|
|
%Fear of missing something & perfectionism \\
|
|
%Defining a good concept from the beginning on & perfectionism \\
|
|
%too many people in one project & responsibility diffusion \\
|
|
\hline
|
|
\end{tabular}
|
|
|
|
\newpage
|
|
\begin{tabular}{ll}
|
|
\hline
|
|
What are possible barriers for good data management? & Barrier \\
|
|
\hline
|
|
%Remember the strategy used over time & lack of consistency \\
|
|
%Keeping multiple copies consistent & lack of consistency \\
|
|
%don't know the best tools for it & lack of skills \\
|
|
%no idea where to start & lack of skills \\
|
|
%complex research design & lack of skills \\
|
|
%public security & lack of skills \\
|
|
%expectation of presenting results fast (time) & lack of time \\
|
|
%When should I do this task? & lack of time \\
|
|
%Lack of planning & lack of time \\
|
|
%too much other work & lack of time \\
|
|
procrastination & low priority \\
|
|
its not fun & low priority \\
|
|
never having thought of it & low priority \\
|
|
Other Priorities & low priority \\
|
|
boring task & low priority \\
|
|
forget it & low priority \\
|
|
bad time management & low priority \\
|
|
never gets perfected & perfectionism \\
|
|
Fear of missing something & perfectionism \\
|
|
Defining a good concept from the beginning on & perfectionism \\
|
|
too many people in one project & responsibility diffusion \\
|
|
\hline
|
|
\end{tabular}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Results slido surveys: Topics}
|
|
\footnotesize
|
|
\centering
|
|
\begin{tabular}{p{11cm}l}
|
|
\hline
|
|
What topics would you like to cover this semester? & Topic \\
|
|
\hline
|
|
Understandable coding & clean coding \\
|
|
Cleaning up R code for readability & clean coding \\
|
|
Documentation of a final R script & clean coding \\
|
|
How to manage different data sources in one experiment\\ (e.g.\ eye tracking, performance, questionnaire..) & data organisation \\
|
|
understanding what should always go into a readme file. & data organisation \\
|
|
How to best arrange the data & data organisation \\
|
|
important things before the open-access data & data sharing \\
|
|
Where to store data for long-term accessibility (conventions?) & data sharing \\
|
|
Tools, where I should upload my final data & data sharing \\
|
|
how to integrate gitHub in workflow & version control \\
|
|
Introduction into available tools & workflow \\
|
|
Upload data before or after publishing a paper? Time mangement & workflow \\
|
|
going over guidelines/best practice on how to name files, folders and data as well as folder structure. & workflow \\
|
|
understanding where redundancy is needed (raw data?) and where to avoid it. & workflow \\
|
|
Steps and when to do what & workflow \\
|
|
\hline
|
|
\end{tabular}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Topics for this semester}
|
|
\centering
|
|
\begin{tabular}{ll}
|
|
\hline
|
|
Date & Topic \\
|
|
\hline
|
|
2024-05-13 & Introduction to data management \\
|
|
\only<1>{2024-05-27}\only<2>{\bf 2024-05-27} & \only<1>{Workflow}\only<2>{\bf Workflow} \\
|
|
2024-06-10 & Data organisation \\
|
|
2024-06-24 & Data sharing \\
|
|
2024-07-08 & Clean coding \\
|
|
2024-07-22 & Version control \\
|
|
\hline
|
|
\end{tabular}
|
|
\end{frame}
|
|
|
|
\section{Workflow}
|
|
|
|
\begin{frame}{What is a workflow and why do I need one?}
|
|
% slido
|
|
\centering
|
|
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 2}
|
|
|
|
\url{https://app.sli.do/event/qgqz43GC9EYZ3RbQG5QfvU}
|
|
\end{frame}
|
|
|
|
\begin{frame}{What is a workflow?}
|
|
%\pause
|
|
\begin{quote}
|
|
A workflow consists of an orchestrated and repeatable pattern of
|
|
activity, enabled by the systematic organization of resources into
|
|
processes that transform materials, provide services, or process
|
|
information.
|
|
\end{quote}
|
|
\vspace{-.3cm}
|
|
\flushright{\footnotesize \url{https://en.wikipedia.org/wiki/Workflow}}
|
|
\pause
|
|
\begin{columns}
|
|
\begin{column}[c]{.5\textwidth}
|
|
\flushleft
|
|
Important aspects:
|
|
\begin{itemize}
|
|
\item Repeatable pattern
|
|
\item Systematic organization
|
|
\item Transformation processes
|
|
\end{itemize}
|
|
\pause
|
|
\end{column}
|
|
\begin{column}[c]{.5\textwidth}
|
|
|
|
In short:\\
|
|
\begin{itemize}
|
|
\item A workflow answers the question:\\
|
|
\color{iwmorange}{\bf What's the most efficient way to get this work done?}
|
|
\end{itemize}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[<+->]{Why do I need a workflow?}
|
|
%\pause
|
|
\begin{itemize}
|
|
\item It boosts productivity
|
|
\item It reduces mental load
|
|
\item A truly optimized workflow will:
|
|
\begin{itemize}
|
|
\item Identify and remove unnecessary steps and processes that lead to slowdowns
|
|
\item Provide a sequential (chronological) order for accomplishing tasks
|
|
\item Automate some decisions and processes (freeing up time)
|
|
\item Reduce communication burdens (fewer e-mails, meetings, etc.)
|
|
\item Encourage collaboration
|
|
\item Track progress and assess performance
|
|
\item Keep records of previous processes and make future processes repeatable
|
|
\item Eliminate decision fatigue
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{But let's start much smaller than this}
|
|
% slido
|
|
% What can you answer with "yes"?
|
|
% * I know more than 3 RStudio shortcuts
|
|
% * I have never updated my R packages
|
|
% * I know what the ISO 8601 date format is
|
|
% * I regularly delete duplicate files
|
|
% * I usually have a clean e-mail inbox
|
|
% * I use file naming conventions
|
|
\centering
|
|
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 2}
|
|
|
|
\url{https://app.sli.do/event/qgqz43GC9EYZ3RbQG5QfvU}
|
|
\end{frame}
|
|
|
|
\begin{frame}{The bare minimum (IMHO)}
|
|
\begin{itemize}
|
|
\item Shortcuts
|
|
\begin{itemize}
|
|
\item \texttt{CTRL+C} and \texttt{CTRL+P}
|
|
\item \texttt{ALT+Tab} to switch between applications
|
|
\item In your browser: \texttt{CTRL+L}, \texttt{CTRL+T}, \texttt{CTRL+W},
|
|
\texttt{CTRL+Tab}, \texttt{CTRL+Page Up/Down}
|
|
\item Using \texttt{Alt} to open up ``File''
|
|
\item Sending code chunks with \texttt{CTRL+Enter} to the console in RStudio
|
|
\end{itemize}
|
|
\item Making file endings visible
|
|
\item Associating TXT-files with a proper text editor
|
|
\item Making sure that CSV-files are \emph{never} opened by EXCEL accidentily
|
|
\item Setting List or Details View for files
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{One (baby) step up}
|
|
\begin{itemize}
|
|
\item Shortcuts for efficient text editing
|
|
\begin{itemize}
|
|
\item Jumping to next word
|
|
\item Jumping to next instance of a word
|
|
\item Findind and replacing a certain word
|
|
\item Deleting/copying complete line
|
|
\item Commenting in/out of complete code chunks
|
|
\item \dots
|
|
\end{itemize}
|
|
\item Update R packages once a week
|
|
\item Update R and RStudio at least twice a year
|
|
\item If you use R outside of RStudio, use SDI
|
|
%\item Consistent date format (preferably ISO 8601)
|
|
%\item Deleting duplicates
|
|
%\item Cleaning out e-mails
|
|
%\item Self-sorting files
|
|
\end{itemize}
|
|
\vfill
|
|
\end{frame}
|
|
|
|
\begin{frame}{Project workflow}
|
|
\begin{itemize}
|
|
\item Project workflow refers to how you organize projects and move
|
|
through the various stages of the research cycle
|
|
\item \citet{Kathawalla2021} say that a project workflow includes:
|
|
\begin{itemize}
|
|
\item File folder structure
|
|
\item Document naming conventions
|
|
\item Version control
|
|
\item Cloud storage
|
|
\item Choice of who has access to a project and when (Collaborators? Public?)
|
|
\end{itemize}
|
|
\item Developing a clear project workflow is much easier for PhD
|
|
students than later career scholars who have many more projects to
|
|
organize
|
|
\end{itemize}
|
|
\vfill
|
|
\end{frame}
|
|
|
|
\section{Naming conventions}
|
|
|
|
\begin{frame}[fragile]{Examples}
|
|
\begin{columns}
|
|
\begin{column}[c]{.6\textwidth}
|
|
\begin{itemize}
|
|
\item Files with no naming convention:
|
|
\begin{lstlisting}
|
|
Test data 2016.xlsx
|
|
Meeting notes Jan 17.doc
|
|
Notes Eric.txt
|
|
Final FINAL last version.docx
|
|
\end{lstlisting}
|
|
\end{itemize}
|
|
\end{column}
|
|
\begin{column}[c]{.3\textwidth}
|
|
\includegraphics[scale = .3]{../figures/xkcd_naming_conventions}
|
|
\end{column}
|
|
\end{columns}
|
|
\begin{itemize}
|
|
\item Files with naming convention:
|
|
\begin{lstlisting}
|
|
20160104_ProjectA_Ex1Test1_SmithE_v1.xlsx
|
|
20160104_ProjectA_MeetingNotes_SmithE_v2.docx
|
|
Experiment1_PANAS_20231011-140811_Image04.tif
|
|
\end{lstlisting}
|
|
\end{itemize}
|
|
{\tiny
|
|
\url{https://xkcd.com/1459/}\hfill
|
|
\url{https://datamanagement.hms.harvard.edu/collect/file-naming-conventions}
|
|
}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{3\,am in the morning before a deadline...}
|
|
\begin{columns}
|
|
\begin{column}[c]{.5\textwidth}
|
|
These?\\[1ex]
|
|
\hrule\vspace{.2cm}
|
|
\begin{Verbatim}[commandchars=\\\{\}]
|
|
01_marshal-data.md
|
|
01_marshal-data.R
|
|
02_pre-dea-filtering.md
|
|
02_pre-dea-filtering.R
|
|
03_dea-with-limma-voom.md
|
|
03_dea-with-limma-voom.R
|
|
90_limma-model-term-name-fiasco.md
|
|
90_limma-model-term-name-fiasco.R
|
|
helper01_load-counts.R
|
|
helper02_load-exp-des.R
|
|
helper03_load-focus-statinf.R
|
|
helper04_extract-and tidy.R
|
|
\end{Verbatim}
|
|
\end{column}
|
|
\begin{column}[c]{.5\textwidth}
|
|
Or these?\\[1ex]
|
|
\hrule\vspace{.2cm}
|
|
\begin{Verbatim}[commandchars=\\\{\}]
|
|
01.md
|
|
01.R
|
|
02.md
|
|
02.R
|
|
03.md
|
|
03.R
|
|
90.md
|
|
90.R
|
|
helper01.R
|
|
helper02.R
|
|
helper03.R
|
|
helper04.R
|
|
\end{Verbatim}
|
|
\end{column}
|
|
\end{columns}
|
|
{\hfill\tiny \citet{Wilbrandt2023}}
|
|
\end{frame}
|
|
|
|
\begin{frame}{The basics}
|
|
\begin{itemize}
|
|
\item File names should contain only letters, numbers, underscores, and dashes
|
|
\pause
|
|
\item A dash or underscore should be used instead of a space
|
|
\pause
|
|
\item No special characters (\& ' " ; : * ! \# \$, etc.)
|
|
\pause
|
|
\item Maybe decide on a convention like
|
|
\begin{itemize}
|
|
\item camel{\bf\color{iwmorange}C}ase
|
|
\item snake{\bf\color{iwmorange}\_}case
|
|
\item {\bf\color{iwmorange}P}ascal{\bf\color{iwmorange}C}ase
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\pause
|
|
\begin{block}{Three principles for file names}
|
|
\begin{enumerate}
|
|
\item Machine readable
|
|
\item Human readable
|
|
\item Plays well with default ordering
|
|
\end{enumerate}
|
|
\end{block}
|
|
\vfill
|
|
\end{frame}
|
|
|
|
\begin{frame}{Example from website project}
|
|
\centering
|
|
\only<1>{\includegraphics[width = .7\textwidth]{../figures/ex_filenaming_website_01}}
|
|
\only<2>{\includegraphics[width = .7\textwidth]{../figures/ex_filenaming_website_02}}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Steps to consider}
|
|
\begin{enumerate}
|
|
\item Think about your files
|
|
\item Identify metadata
|
|
\item Abbreviate or encode metadata
|
|
\item Deliberately separate metadata elements
|
|
\item How will you search for your files?
|
|
\item Write down your naming conventions
|
|
\item Use versioning (include numbering, dates)
|
|
\end{enumerate}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Think about your files, identify and encode metadata}
|
|
\begin{columns}
|
|
\begin{column}[c]{.6\textwidth}
|
|
\begin{itemize}
|
|
\item What kind of files will I have in my project?
|
|
\begin{itemize}
|
|
\item Data files
|
|
\item Analysis files
|
|
\item Files including stimuli (maybe pictures or similar)
|
|
\item Documentation files
|
|
\item WORD documents like a paper etc.
|
|
\item \dots
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{column}\pause
|
|
\begin{column}[c]{.4\textwidth}
|
|
Mabey pick prefixes:\\
|
|
\verb+DATA_[...].csv+
|
|
\verb+ANALYSIS_[...].R+
|
|
\verb+PAPER_[...].docx+
|
|
\end{column}
|
|
\end{columns}\pause
|
|
\begin{columns}
|
|
\begin{column}[c]{.6\textwidth}
|
|
\begin{itemize}
|
|
\item What kind of metadata will I have?
|
|
\begin{itemize}
|
|
\item Subject identifier
|
|
\item Session identifier
|
|
\item Different conditions
|
|
\item \dots
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{column}\pause
|
|
\begin{column}[c]{.4\textwidth}
|
|
Encode metadata:\\
|
|
\verb+DATA_vp01_load_ses01.csv+
|
|
\verb+ANALYSIS_01_model-selection.R+
|
|
\verb+ANALYSIS_02_plots.R+
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{How do you want your files to be ordered?}
|
|
\begin{enumerate}
|
|
\item Sort by type\\
|
|
\verb+ANALYSIS_01_model-selection.R+\\
|
|
\verb+ANALYSIS_02_plots.R+\\
|
|
\verb+DATA_vp01_load_ses01.csv+
|
|
\item Sort by date\\
|
|
\verb+2022-09-29_exp1_vpall.txt+\\
|
|
\verb+2022-09-30_analysis.txt+
|
|
\item Sort in my order\\
|
|
\verb+01_data-cleaning_study1.R+\\
|
|
\verb+02_analysis_study1.Rmd+
|
|
\end{enumerate}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Zero left padding}
|
|
\begin{columns}
|
|
\begin{column}[c]{.5\textwidth}
|
|
Without left padding\\[1ex]
|
|
\hrule\vspace{.2cm}
|
|
\begin{Verbatim}[commandchars=\\\{\}]
|
|
2016_11_14-11_13_52.log
|
|
2016_11_14-11_23_52.log
|
|
\textcolor{iwmblue}{2016_11_14-11_3_52.log}
|
|
2016_11_14-11_33_52.log
|
|
2016_11_14-11_57_58.log
|
|
2016_11_14-12_17_58.log
|
|
2016_11_14-12_27_58.log
|
|
2016_11_14-12_37_58.log
|
|
2016_11_14-12_47_58.log
|
|
2016_11_14-12_57_58.log
|
|
\textcolor{iwmblue}{2016_11_14-12_7_58.log}
|
|
\end{Verbatim}
|
|
\end{column}
|
|
\begin{column}[c]{.5\textwidth}
|
|
With left padding\\[1ex]
|
|
\hrule\vspace{.2cm}
|
|
\begin{Verbatim}[commandchars=\\\{\}]
|
|
\textcolor{iwmblue}{2016_11_14-11_03_52.log}
|
|
2016_11_14-11_13_52.log
|
|
2016_11_14-11_23_52.log
|
|
2016_11_14-11_33_52.log
|
|
2016_11_14-11_57_58.log
|
|
\textcolor{iwmblue}{2016_11_14-12_07_58.log}
|
|
2016_11_14-12_17_58.log
|
|
2016_11_14-12_27_58.log
|
|
2016_11_14-12_37_58.log
|
|
2016_11_14-12_47_58.log
|
|
2016_11_14-12_57_58.log
|
|
\end{Verbatim}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Date format convention}
|
|
\begin{columns}
|
|
\begin{column}{.5\textwidth}
|
|
\begin{center}
|
|
\includegraphics[scale = .4]{../figures/xkcd_iso_8601_2x}
|
|
\end{center}
|
|
\end{column}
|
|
\begin{column}{.5\textwidth}
|
|
\begin{itemize}
|
|
\item Stick to conventions if possible (even if you prefer something
|
|
else personally)
|
|
\item This can be read easily by machines (working with it in R)
|
|
\item It is inclusive: Americans interpret this the same way as
|
|
Europeans
|
|
\end{itemize}
|
|
\end{column}
|
|
\end{columns}
|
|
\vfill
|
|
\flushright{\tiny{\url{https://xkcd.com/1179/}}}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Write down your naming conventions}
|
|
\small{
|
|
\begin{tabular}{@{}lll@{}}
|
|
\hline
|
|
& Example & Documentation \\
|
|
\hline
|
|
Content-specific & \verb+DATA_vp01_load_ses01.csv+ & \verb+DATA_[ID]_[cond]_[ses].csv+\\
|
|
Descriptive & \verb+ANALYSIS_01_model-selection.R+ & \verb+ANALYSIS_[#]_[descrp].R+\\
|
|
Consistent & \verb+ANALYSIS_02_plots.R+ & \verb+ANALYSIS_[#]_[descrp].R+\\
|
|
Leading date & \verb+2022-09-29_exp1_vpall.txt+ & \verb+[yyyy-mm-dd]_[exp]_[type].txt+\\
|
|
Leading zero & \verb+01_data-cleaning_study1.Rmd+ & \verb+[##]_[descrp]_[study].[R/Rmd]+\\
|
|
\hline
|
|
\end{tabular}
|
|
}
|
|
\begin{itemize}
|
|
\item Documenting is key and becomes second nature after awhile
|
|
\item Create a README file and write down everything that could be
|
|
useful to remember
|
|
\item Update this README file regularly
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Version control}
|
|
\begin{center}
|
|
\includegraphics[scale = .38]{../figures/phd101212s}
|
|
\end{center}
|
|
\vfill
|
|
{\hfill \tiny \url{https://phdcomics.com/comics/archive.php?comicid=1531}}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Version control}
|
|
\begin{itemize}
|
|
\item Version control is a systematic approach to record changes made
|
|
in a file, or set of files, over time
|
|
\item File versioning can be as simple as using file naming conventions
|
|
like suffixes \verb+*_v1+, \verb+*_v2+, \verb+*_vn+
|
|
\end{itemize}
|
|
\vspace{.3cm}
|
|
\begin{enumerate}
|
|
\item Create files -- these may contain text, code or both
|
|
\item Work on these files, by changing, deleting or adding new content
|
|
\item Create a snapshot of the file status (also known as version) at this time
|
|
\item Document versions (e.\,g., in a README file)
|
|
\end{enumerate}
|
|
\vfill
|
|
{\hfill \tiny
|
|
\url{https://the-turing-way.netlify.app/reproducible-research/vcs.html}}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Example master thesis}
|
|
\centering
|
|
\includegraphics[width = .6\textwidth]{../figures/ex_filenaming_ma_01}
|
|
\end{frame}
|
|
|
|
\begin{frame}{}
|
|
\centering
|
|
{\Huge
|
|
\color{iwmblue}{There is no right or wrong -- only what works best
|
|
for you!\\\vspace{.5cm}\pause
|
|
AND: You can change your file names whenever you feel like something else
|
|
might work even better!}}
|
|
\vfill\pause
|
|
Cecklist for good file names: \url{https://osf.io/dpu45}
|
|
\end{frame}
|
|
|
|
\section{Folder structure}
|
|
|
|
\begin{frame}{The basics}
|
|
\begin{itemize}
|
|
\item One project, one folder
|
|
\pause
|
|
\item Consistent pattern for each project
|
|
\pause
|
|
\item Do not nest too deep!\\
|
|
$\to$ depth vs.\ width (maximum path length on Windows is 255 characters)
|
|
\pause
|
|
\item Add README file at top level
|
|
\pause
|
|
\item Good naming conventions also apply to folders:
|
|
\begin{itemize}
|
|
\item Folder names should contain only letters, numbers, underscores, and dashes
|
|
\item A dash or underscore should be used instead of a space
|
|
\item No special characters (\& ' " ; : * ! \# \$, etc.)
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Folder structure for a master thesis project}
|
|
\begin{tikzpicture}[
|
|
every node/.style = {text width = 4cm, align = left},
|
|
every path/.style = {thick, draw}
|
|
]
|
|
\node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+};
|
|
% first level
|
|
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
|
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
|
|
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
|
|
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+};
|
|
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+};
|
|
\node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+};
|
|
\node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+};
|
|
\node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+};
|
|
\node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+};
|
|
\path (top.east) -- (n1.west);
|
|
\path (top.east) -- (n2.west);
|
|
\path (top.east) -- (n3.west);
|
|
\path (top.east) -- (n4.west);
|
|
\path (top.east) -- (n5.west);
|
|
\path (top.east) -- (n6.west);
|
|
\path (top.east) -- (n7.west);
|
|
\path (top.east) -- (n8.west);
|
|
\path (top.east) -- (file.west);
|
|
\end{tikzpicture}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Folder structure for a master thesis project}
|
|
\begin{tikzpicture}[
|
|
every node/.style = {text width = 4cm, align = left},
|
|
every path/.style = {thick, draw}
|
|
]
|
|
\node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+};
|
|
% first level
|
|
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
|
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
|
|
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
|
|
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+};
|
|
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+};
|
|
\node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+};
|
|
\node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+};
|
|
\node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+};
|
|
\node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+};
|
|
\path (top.east) -- (n1.west);
|
|
\path (top.east) -- (n2.west);
|
|
\path (top.east) -- (n3.west);
|
|
\path (top.east) -- (n4.west);
|
|
\path (top.east) -- (n5.west);
|
|
\path (top.east) -- (n6.west);
|
|
\path (top.east) -- (n7.west);
|
|
\path (top.east) -- (n8.west);
|
|
\path (top.east) -- (file.west);
|
|
% second level
|
|
\node[text width = 7cm] (o1) at (10, 0) {\faIcon[regular]{file-pdf} \verb+master-thesis_forms_2022.pdf+};
|
|
\node[text width = 7cm] (o2) at (10, -0.7) {\faIcon[regular]{file} \verb+infoveranstaltung.md+};
|
|
\path (n1.center) -- (o1.west);
|
|
\path (n1.center) -- (o2.west);
|
|
\end{tikzpicture}
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}[fragile]{Folder structure for a master thesis project}
|
|
\begin{tikzpicture}[
|
|
every node/.style = {text width = 4cm, align = left},
|
|
every path/.style = {thick, draw}
|
|
]
|
|
\node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+};
|
|
% first level
|
|
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
|
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
|
|
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
|
|
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+};
|
|
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+};
|
|
\node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+};
|
|
\node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+};
|
|
\node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+};
|
|
\node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+};
|
|
\path (top.east) -- (n1.west);
|
|
\path (top.east) -- (n2.west);
|
|
\path (top.east) -- (n3.west);
|
|
\path (top.east) -- (n4.west);
|
|
\path (top.east) -- (n5.west);
|
|
\path (top.east) -- (n6.west);
|
|
\path (top.east) -- (n7.west);
|
|
\path (top.east) -- (n8.west);
|
|
\path (top.east) -- (file.west);
|
|
% second level
|
|
\node[text width = 7cm] (c1) at (10, -0.7) {\faIcon[regular]{file-code}
|
|
\verb+01_preprocessing.R+};
|
|
\node[text width = 7cm] (c2) at (10, -1.4) {\faIcon[regular]{file-code}
|
|
\verb+02_modeling.R+};
|
|
\node[text width = 7cm] (c3) at (10, -2.1) {\faIcon[regular]{file-code}
|
|
\verb+03_plots.Rmd+};
|
|
\path (n2.center) -- (c1.west);
|
|
\path (n2.center) -- (c2.west);
|
|
\path (n2.center) -- (c3.west);
|
|
\end{tikzpicture}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Folder structure for a master thesis project}
|
|
\begin{tikzpicture}[
|
|
every node/.style = {text width = 4cm, align = left},
|
|
every path/.style = {thick, draw}
|
|
]
|
|
\node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+};
|
|
% first level
|
|
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
|
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
|
|
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
|
|
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+};
|
|
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+};
|
|
\node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+};
|
|
\node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+};
|
|
\node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+};
|
|
\node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+};
|
|
\path (top.east) -- (n1.west);
|
|
\path (top.east) -- (n2.west);
|
|
\path (top.east) -- (n3.west);
|
|
\path (top.east) -- (n4.west);
|
|
\path (top.east) -- (n5.west);
|
|
\path (top.east) -- (n6.west);
|
|
\path (top.east) -- (n7.west);
|
|
\path (top.east) -- (n8.west);
|
|
\path (top.east) -- (file.west);
|
|
% second level
|
|
\node[text width = 7cm] (m1) at (10, -1.4) {\faIcon{folder} \verb+raw_data+};
|
|
\node[text width = 7cm] (m2) at (10, -2.1) {\faIcon[regular]{file}
|
|
\verb+DATA_vpall_exp1.csv+};
|
|
\path (n3.center) -- (m1.west);
|
|
\path (n3.center) -- (m2.west);
|
|
\end{tikzpicture}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Folder structure for a master thesis project}
|
|
\begin{tikzpicture}[
|
|
every node/.style = {text width = 4cm, align = left},
|
|
every path/.style = {thick, draw}
|
|
]
|
|
\node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+};
|
|
% first level
|
|
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
|
|
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
|
|
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
|
|
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+};
|
|
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+};
|
|
\node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+};
|
|
\node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+};
|
|
\node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+};
|
|
\node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+};
|
|
\path (top.east) -- (n1.west);
|
|
\path (top.east) -- (n2.west);
|
|
\path (top.east) -- (n3.west);
|
|
\path (top.east) -- (n4.west);
|
|
\path (top.east) -- (n5.west);
|
|
\path (top.east) -- (n6.west);
|
|
\path (top.east) -- (n7.west);
|
|
\path (top.east) -- (n8.west);
|
|
\path (top.east) -- (file.west);
|
|
% second level
|
|
\node[text width = 6cm] (m3) at (8, -4.2) {\faIcon{folder} \verb+2023-05-05+};
|
|
\node[text width = 6cm] (m4) at (8, -4.9) {\faIcon{folder} \verb+2023-10-12+};
|
|
\node[text width = 6cm] (t1) at (13, -4.2) {\faIcon[regular]{file-powerpoint}
|
|
\verb+colloq_230505.pptx+};
|
|
\node[text width = 6cm] (t2) at (13, -4.9) {\faIcon[regular]{file-word} \verb+notes.docx+};
|
|
\path (n7.center) -- (m3.west);
|
|
\path (n7.center) -- (m4.west);
|
|
\path (m3.center) -- (t1.west);
|
|
\path (m3.center) -- (t2.west);
|
|
\end{tikzpicture}
|
|
\end{frame}
|
|
|
|
\begin{frame}{TONIC: Structured Template}
|
|
\begin{itemize}
|
|
\item Different research projects might have different structures
|
|
\item However, there are certain similarities for most of them
|
|
\item You can find structured templates on the internet
|
|
\item One pretty generic one is TONIC
|
|
\end{itemize}
|
|
\vfill
|
|
\url{https://github.com/tonic-team/Tonic-Research-Project-Template}\\
|
|
\url{https://gin-tonic.netlify.app/}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{TONIC: Structured Template}
|
|
\begin{tikzpicture}[
|
|
every node/.style = {text width = 7cm, align = left},
|
|
every path/.style = {thick, draw}
|
|
]
|
|
\node (top) at (0, 0) {\faIcon{folder}
|
|
\verb+project_name+};
|
|
% first level
|
|
\node (n1) at (7, 0) {\faIcon{folder} \verb+01_project_management+};
|
|
\node (n2) at (7, -0.7) {\faIcon{folder} \verb+02_material_and_methods+};
|
|
\node (n3) at (7, -1.4) {\faIcon{folder} \verb+03_data+};
|
|
\node (n4) at (7, -2.1) {\faIcon{folder} \verb+04_data_analysis+};
|
|
\node (n5) at (7, -2.8) {\faIcon{folder} \verb+05_figures+};
|
|
\node (n6) at (7, -3.5) {\faIcon{folder} \verb+06_dissemination+};
|
|
\node (n7) at (7, -4.2) {\faIcon{folder} \verb+07_misc+};
|
|
\node (f1) at (7, -4.9) {\faIcon[regular]{file} \verb+LICENSE-CC-BY+};
|
|
\node (f2) at (7, -5.6) {\faIcon[regular]{file} \verb+README.md+};
|
|
\path (top.center) -- (n1.west);
|
|
\path (top.center) -- (n2.west);
|
|
\path (top.center) -- (n3.west);
|
|
\path (top.center) -- (n4.west);
|
|
\path (top.center) -- (n5.west);
|
|
\path (top.center) -- (n6.west);
|
|
\path (top.center) -- (n7.west);
|
|
\path (top.center) -- (f1.west);
|
|
\path (top.center) -- (f2.west);
|
|
\end{tikzpicture}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{TONIC: Structured Template}
|
|
{Subfolders}
|
|
\begin{tikzpicture}[
|
|
every node/.style = {text width = 5.5cm, align = left},
|
|
every path/.style = {thick, draw}
|
|
]
|
|
\node (top) at (0, 0) {\faIcon{folder}
|
|
\verb+01_project_management+};
|
|
% first level
|
|
\node (n1) at (7, 0) {\faIcon{folder} \verb+01_administration_files+};
|
|
\node (n2) at (7, -0.7) {\faIcon{folder} \verb+02_accepted_grants+};
|
|
\node (n3) at (7, -1.4) {\faIcon{folder} \verb+03_meeting_minutes+};
|
|
\node (n4) at (7, -2.1) {\faIcon{folder} \verb+04_related_literature+};
|
|
\node (n5) at (7, -2.8) {\faIcon{folder} \verb+05_data_management_plans+};
|
|
\node (n6) at (7, -3.5) {\faIcon{folder} \verb+06_notebook+};
|
|
\path (top.east) -- (n1.west);
|
|
\path (top.east) -- (n2.west);
|
|
\path (top.east) -- (n3.west);
|
|
\path (top.east) -- (n4.west);
|
|
\path (top.east) -- (n5.west);
|
|
\path (top.east) -- (n6.west);
|
|
\end{tikzpicture}
|
|
\hrule
|
|
\begin{tikzpicture}[
|
|
every node/.style = {text width = 5.5cm, align = left},
|
|
every path/.style = {thick, draw}
|
|
]
|
|
\node (top) at (0, 0) {\faIcon{folder}
|
|
\verb+02_material_and_methods+};
|
|
% first level
|
|
\node (n1) at (7, 0) {\faIcon{folder} \verb+01_protocols+};
|
|
\node (n2) at (7, -0.7) {\faIcon{folder} \verb+02_code+};
|
|
\node (n3) at (7, -1.4) {\faIcon{folder} \verb+03_hardware+};
|
|
\node (f2) at (7, -2.1) {\faIcon[regular]{file} \verb+README_MM.md+};
|
|
\path (top.east) -- (n1.west);
|
|
\path (top.east) -- (n2.west);
|
|
\path (top.east) -- (n3.west);
|
|
\path (top.east) -- (f2.west);
|
|
\end{tikzpicture}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Additional tips}
|
|
\begin{itemize}
|
|
\item Dump incoming files not fitting your conventions in a prespecified
|
|
folder, e.\,g.,
|
|
\begin{tikzpicture}[
|
|
every node/.style = {text width = 7cm, align = left, color = iwmorange},
|
|
every path/.style = {thick, draw}
|
|
]
|
|
\node (top) at (0, 0) {\faIcon{folder} \verb+z_from-nora+};
|
|
\end{tikzpicture}
|
|
\item Then adapt files from there and document changes/provenance
|
|
\item Dump older files cluttering your working directory
|
|
\begin{tikzpicture}[
|
|
every node/.style = {text width = 7cm, align = left, color = iwmorange},
|
|
every path/.style = {thick, draw}
|
|
]
|
|
\node (top) at (0, 0) {\faIcon{folder} \verb+zzz+};
|
|
\end{tikzpicture}
|
|
\item Delete files when the project is finished
|
|
\end{itemize}
|
|
\vfill
|
|
\pause
|
|
\begin{center}
|
|
{\Huge\color{iwmblue}{There is no right or wrong -- only what works best
|
|
for you!}}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\appendix
|
|
|
|
%\begin{frame}[allowframebreaks]{References}
|
|
\begin{frame}{References}
|
|
\printbibliography
|
|
\vfill
|
|
\end{frame}
|
|
|
|
\end{document}
|
|
|