data_management/02_workflow/02_workflow.tex

1046 lines
36 KiB
TeX

\documentclass[aspectratio=169]{beamer}
\usepackage{listings}
\usepackage[utf8,latin1]{inputenc}
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
\addbibresource{../literature/lit.bib}
\usepackage{fancyvrb}
\usepackage{fontawesome5} % get icons
\usepackage{multirow}
\usepackage{color, colortbl}
\usepackage{tikz}
\usetikzlibrary{fit}
\usepackage[edges]{forest}
\lstset{language=bash,%
backgroundcolor=\color{iwmgray!15!white},
basicstyle=\ttfamily\color{iwmgray},
frame=none,
basicstyle=\ttfamily\color{iwmgray},
commentstyle=\slshape\color{iwmgray},
keywordstyle=\bfseries\color{iwmgray},
identifierstyle=\color{iwmgray},
stringstyle=\color{iwmgray},
numbers=none,%left,numberstyle=\tiny,
basewidth={.5em, .4em},
showstringspaces=false,
emphstyle=\color{red!50!white}}
\makeatletter \def\newblock{\beamer@newblock} \makeatother
\beamertemplatenavigationsymbolsempty
\setbeamertemplate{itemize items}[circle]
\setbeamertemplate{section in toc}[circle]
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
\setbeamercolor{block body}{bg=iwmorange!50!white}
\setbeamercolor{block title}{fg=white, bg=iwmorange}
% Definitions for biblatex
\setbeamercolor{bibliography entry note}{fg=iwmgray}
\setbeamercolor{bibliography entry author}{fg=iwmgray}
\setbeamertemplate{bibliography item}{}
\definecolor{iwmorange}{RGB}{255,105,0}
\definecolor{iwmgray}{RGB}{67,79,79}
\definecolor{iwmblue}{RGB}{60,180,220}
\setbeamercolor{title}{fg=iwmorange}
\setbeamercolor{frametitle}{fg=iwmorange}
\setbeamercolor{structure}{fg=iwmorange}
\setbeamercolor{normal text}{fg=iwmgray}
\setbeamercolor{author}{fg=iwmgray}
\setbeamercolor{date}{fg=iwmgray}
\newcommand{\vect}[1]{\mathbf{#1}}
\newcommand{\mat}[1]{\mathbf{#1}}
\newcommand{\gvect}[1]{\boldsymbol{#1}}
\newcommand{\gmat}[1]{\boldsymbol{#1}}
\AtBeginSection[]{
\frame{
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
\setbeamertemplate{headline}{
\begin{beamercolorbox}{section in head}
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
\end{beamercolorbox}
}
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
\title{Workflows for effective research data management}
\author{Nora Wickelmaier}
\date{May 27, 2024}
\begin{document}
\begin{frame}{}
\thispagestyle{empty}
\titlepage
\end{frame}
\begin{frame}{Not kiddin'}
I received this e-mail right after our last session\dots
\begin{center}
\includegraphics[scale = .5]{../figures/email_data_request_2024_01}
\end{center}
I finished my dissertation over a decade ago\dots
\end{frame}
\begin{frame}{Not kiddin'}
Again, definitely not what I would have liked to answer\dots
\begin{center}
\includegraphics[scale = .55]{../figures/email_data_request_2024_02}
\end{center}
\end{frame}
\begin{frame}[<+->]{Some general rules}
\begin{itemize}
\item This e-mail will never hit you in a week where you have any free time
\item This e-mail will never be about data that is already published or at
least preprocessed and documented in a clean way
\item It will usually be sent by someone that you really want to answer to
(who would not want to answer to one of the four persons that has actually
read parts of your dissertation?)
\item This e-mail will trigger a tremendous amount of guilt\dots
\item Would a better workflow have prevented this?
\end{itemize}
\end{frame}
\begin{frame}[allowframebreaks]{Results slido surveys: Habits}
\footnotesize
\begin{tabular}{ll}
\hline
What habits would help with good data management? & Habit \\
\hline
pseudonymizing/anonymizing data & data organisation \\
avoid redundancy & data organisation \\
have one place where you store the data & data organisation \\
uploading under a license (CC-BY....) & data sharing \\
loading data on an archive, repository etc... & data sharing \\
Doing the archive & data sharing \\
report changes to dataset & documentation \\
codebook & documentation \\
readme & documentation \\
report deviations from preregistration & documentation \\
Read-Me & documentation \\
Document data collection in Details & documentation \\
Document yout code & documentation \\
Documentation & documentation \\
preregistration & documentation \\
recording the steps (taken through analysis) & documentation \\
github documentation & documentation \\
%discipline & workflow \\
%Do not do it in your spare time? & workflow \\
%Brainpower & workflow \\
%Trink about file names & workflow \\
%regular cleaning & workflow \\
%Special time slot in calendar & workflow \\
%clarity & workflow \\
%consistency & workflow \\
%checklists & workflow \\
%clear workflow & workflow \\
%Structure Structure Structure & workflow \\
%Be consistent & workflow \\
%consitency & workflow \\
%Reproducible code & workflow \\
%Time Investment & workflow \\
\hline
\end{tabular}
\begin{tabular}{ll}
\hline
What habits would help with good data management? & Habit \\
\hline
%pseudonymizing/anonymizing data & data organisation \\
%avoid redundancy & data organisation \\
%have one place where you store the data & data organisation \\
%uploading under a license (CC-BY....) & data sharing \\
%loading data on an archive, repository etc... & data sharing \\
%Doing the archive & data sharing \\
%report changes to dataset & documentation \\
%codebook & documentation \\
%readme & documentation \\
%report deviations from preregistration & documentation \\
%Read-Me & documentation \\
%Document data collection in Details & documentation \\
%Document yout code & documentation \\
%Documentation & documentation \\
%preregistration & documentation \\
%recording the steps (taken through analysis) & documentation \\
%github documentation & documentation \\
discipline & workflow \\
Do not do it in your spare time? & workflow \\
Brainpower & workflow \\
Trink about file names & workflow \\
regular cleaning & workflow \\
Special time slot in calendar & workflow \\
clarity & workflow \\
consistency & workflow \\
checklists & workflow \\
clear workflow & workflow \\
Structure Structure Structure & workflow \\
Be consistent & workflow \\
consitency & workflow \\
Reproducible code & workflow \\
Time Investment & workflow \\
\hline
\end{tabular}
\end{frame}
\begin{frame}[allowframebreaks]{Results slido surveys: Barriers}
\footnotesize
\vspace{1cm}
\begin{tabular}{ll}
\hline
What are possible barriers for good data management? & Barrier \\
\hline
Remember the strategy used over time & lack of consistency \\
Keeping multiple copies consistent & lack of consistency \\
don't know the best tools for it & lack of skills \\
no idea where to start & lack of skills \\
complex research design & lack of skills \\
public security & lack of skills \\
expectation of presenting results fast (time) & lack of time \\
When should I do this task? & lack of time \\
Lack of planning & lack of time \\
too much other work & lack of time \\
%procrastination & low priority \\
%its not fun & low priority \\
%never having thought of it & low priority \\
%Other Priorities & low priority \\
%boring task & low priority \\
%forget it & low priority \\
%bad time management & low priority \\
%never gets perfected & perfectionism \\
%Fear of missing something & perfectionism \\
%Defining a good concept from the beginning on & perfectionism \\
%too many people in one project & responsibility diffusion \\
\hline
\end{tabular}
\newpage
\begin{tabular}{ll}
\hline
What are possible barriers for good data management? & Barrier \\
\hline
%Remember the strategy used over time & lack of consistency \\
%Keeping multiple copies consistent & lack of consistency \\
%don't know the best tools for it & lack of skills \\
%no idea where to start & lack of skills \\
%complex research design & lack of skills \\
%public security & lack of skills \\
%expectation of presenting results fast (time) & lack of time \\
%When should I do this task? & lack of time \\
%Lack of planning & lack of time \\
%too much other work & lack of time \\
procrastination & low priority \\
its not fun & low priority \\
never having thought of it & low priority \\
Other Priorities & low priority \\
boring task & low priority \\
forget it & low priority \\
bad time management & low priority \\
never gets perfected & perfectionism \\
Fear of missing something & perfectionism \\
Defining a good concept from the beginning on & perfectionism \\
too many people in one project & responsibility diffusion \\
\hline
\end{tabular}
\end{frame}
\begin{frame}{Results slido surveys: Topics}
\footnotesize
\centering
\begin{tabular}{p{11cm}l}
\hline
What topics would you like to cover this semester? & Topic \\
\hline
Understandable coding & clean coding \\
Cleaning up R code for readability & clean coding \\
Documentation of a final R script & clean coding \\
How to manage different data sources in one experiment\\ (e.g.\ eye tracking, performance, questionnaire..) & data organisation \\
understanding what should always go into a readme file. & data organisation \\
How to best arrange the data & data organisation \\
important things before the open-access data & data sharing \\
Where to store data for long-term accessibility (conventions?) & data sharing \\
Tools, where I should upload my final data & data sharing \\
how to integrate gitHub in workflow & version control \\
Introduction into available tools & workflow \\
Upload data before or after publishing a paper? Time mangement & workflow \\
going over guidelines/best practice on how to name files, folders and data as well as folder structure. & workflow \\
understanding where redundancy is needed (raw data?) and where to avoid it. & workflow \\
Steps and when to do what & workflow \\
\hline
\end{tabular}
\end{frame}
\begin{frame}{Topics for this semester}
\centering
\begin{tabular}{ll}
\hline
Date & Topic \\
\hline
2024-05-13 & Introduction to data management \\
\only<1>{2024-05-27}\only<2>{\bf 2024-05-27} & \only<1>{Workflow}\only<2>{\bf Workflow} \\
2024-06-10 & Data organisation \\
2024-06-24 & Data sharing \\
2024-07-08 & Clean coding \\
2024-07-22 & Version control \\
\hline
\end{tabular}
\end{frame}
\section{Workflow}
\begin{frame}{What is a workflow and why do I need one?}
% slido
\centering
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 2}
\url{https://app.sli.do/event/qgqz43GC9EYZ3RbQG5QfvU}
\end{frame}
\begin{frame}{What is a workflow?}
%\pause
\begin{quote}
A workflow consists of an orchestrated and repeatable pattern of
activity, enabled by the systematic organization of resources into
processes that transform materials, provide services, or process
information.
\end{quote}
\vspace{-.3cm}
\flushright{\footnotesize \url{https://en.wikipedia.org/wiki/Workflow}}
\pause
\begin{columns}
\begin{column}[c]{.5\textwidth}
\flushleft
Important aspects:
\begin{itemize}
\item Repeatable pattern
\item Systematic organization
\item Transformation processes
\end{itemize}
\pause
\end{column}
\begin{column}[c]{.5\textwidth}
In short:\\
\begin{itemize}
\item A workflow answers the question:\\
\color{iwmorange}{\bf What's the most efficient way to get this work done?}
\end{itemize}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[<+->]{Why do I need a workflow?}
%\pause
\begin{itemize}
\item It boosts productivity
\item It reduces mental load
\item A truly optimized workflow will:
\begin{itemize}
\item Identify and remove unnecessary steps and processes that lead to slowdowns
\item Provide a sequential (chronological) order for accomplishing tasks
\item Automate some decisions and processes (freeing up time)
\item Reduce communication burdens (fewer e-mails, meetings, etc.)
\item Encourage collaboration
\item Track progress and assess performance
\item Keep records of previous processes and make future processes repeatable
\item Eliminate decision fatigue
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{But let's start much smaller than this}
% slido
% What can you answer with "yes"?
% * I know more than 3 RStudio shortcuts
% * I have never updated my R packages
% * I know what the ISO 8601 date format is
% * I regularly delete duplicate files
% * I usually have a clean e-mail inbox
% * I use file naming conventions
\centering
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 2}
\url{https://app.sli.do/event/qgqz43GC9EYZ3RbQG5QfvU}
\end{frame}
\begin{frame}{The bare minimum (IMHO)}
\begin{itemize}
\item Shortcuts
\begin{itemize}
\item \texttt{CTRL+C} and \texttt{CTRL+P}
\item \texttt{ALT+Tab} to switch between applications
\item In your browser: \texttt{CTRL+L}, \texttt{CTRL+T}, \texttt{CTRL+W},
\texttt{CTRL+Tab}, \texttt{CTRL+Page Up/Down}
\item Using \texttt{Alt} to open up ``File''
\item Sending code chunks with \texttt{CTRL+Enter} to the console in RStudio
\end{itemize}
\item Making file endings visible
\item Associating TXT-files with a proper text editor
\item Making sure that CSV-files are \emph{never} opened by EXCEL accidentily
\item Setting List or Details View for files
\end{itemize}
\end{frame}
\begin{frame}{One (baby) step up}
\begin{itemize}
\item Shortcuts for efficient text editing
\begin{itemize}
\item Jumping to next word
\item Jumping to next instance of a word
\item Findind and replacing a certain word
\item Deleting/copying complete line
\item Commenting in/out of complete code chunks
\item \dots
\end{itemize}
\item Update R packages once a week
\item Update R and RStudio at least twice a year
\item If you use R outside of RStudio, use SDI
%\item Consistent date format (preferably ISO 8601)
%\item Deleting duplicates
%\item Cleaning out e-mails
%\item Self-sorting files
\end{itemize}
\vfill
\end{frame}
\begin{frame}{Project workflow}
\begin{itemize}
\item Project workflow refers to how you organize projects and move
through the various stages of the research cycle
\item \citet{Kathawalla2021} say that a project workflow includes:
\begin{itemize}
\item File folder structure
\item Document naming conventions
\item Version control
\item Cloud storage
\item Choice of who has access to a project and when (Collaborators? Public?)
\end{itemize}
\item Developing a clear project workflow is much easier for PhD
students than later career scholars who have many more projects to
organize
\end{itemize}
\vfill
\end{frame}
\section{Naming conventions}
\begin{frame}[fragile]{Examples}
\begin{columns}
\begin{column}[c]{.6\textwidth}
\begin{itemize}
\item Files with no naming convention:
\begin{lstlisting}
Test data 2016.xlsx
Meeting notes Jan 17.doc
Notes Eric.txt
Final FINAL last version.docx
\end{lstlisting}
\end{itemize}
\end{column}
\begin{column}[c]{.3\textwidth}
\includegraphics[scale = .3]{../figures/xkcd_naming_conventions}
\end{column}
\end{columns}
\begin{itemize}
\item Files with naming convention:
\begin{lstlisting}
20160104_ProjectA_Ex1Test1_SmithE_v1.xlsx
20160104_ProjectA_MeetingNotes_SmithE_v2.docx
Experiment1_PANAS_20231011-140811_Image04.tif
\end{lstlisting}
\end{itemize}
{\tiny
\url{https://xkcd.com/1459/}\hfill
\url{https://datamanagement.hms.harvard.edu/collect/file-naming-conventions}
}
\end{frame}
\begin{frame}[fragile]{3\,am in the morning before a deadline...}
\begin{columns}
\begin{column}[c]{.5\textwidth}
These?\\[1ex]
\hrule\vspace{.2cm}
\begin{Verbatim}[commandchars=\\\{\}]
01_marshal-data.md
01_marshal-data.R
02_pre-dea-filtering.md
02_pre-dea-filtering.R
03_dea-with-limma-voom.md
03_dea-with-limma-voom.R
90_limma-model-term-name-fiasco.md
90_limma-model-term-name-fiasco.R
helper01_load-counts.R
helper02_load-exp-des.R
helper03_load-focus-statinf.R
helper04_extract-and tidy.R
\end{Verbatim}
\end{column}
\begin{column}[c]{.5\textwidth}
Or these?\\[1ex]
\hrule\vspace{.2cm}
\begin{Verbatim}[commandchars=\\\{\}]
01.md
01.R
02.md
02.R
03.md
03.R
90.md
90.R
helper01.R
helper02.R
helper03.R
helper04.R
\end{Verbatim}
\end{column}
\end{columns}
{\hfill\tiny \citet{Wilbrandt2023}}
\end{frame}
\begin{frame}{The basics}
\begin{itemize}
\item File names should contain only letters, numbers, underscores, and dashes
\pause
\item A dash or underscore should be used instead of a space
\pause
\item No special characters (\& ' " ; : * ! \# \$, etc.)
\pause
\item Maybe decide on a convention like
\begin{itemize}
\item camel{\bf\color{iwmorange}C}ase
\item snake{\bf\color{iwmorange}\_}case
\item {\bf\color{iwmorange}P}ascal{\bf\color{iwmorange}C}ase
\end{itemize}
\end{itemize}
\pause
\begin{block}{Three principles for file names}
\begin{enumerate}
\item Machine readable
\item Human readable
\item Plays well with default ordering
\end{enumerate}
\end{block}
\vfill
\end{frame}
\begin{frame}{Example from website project}
\centering
\only<1>{\includegraphics[width = .7\textwidth]{../figures/ex_filenaming_website_01}}
\only<2>{\includegraphics[width = .7\textwidth]{../figures/ex_filenaming_website_02}}
\end{frame}
\begin{frame}{Steps to consider}
\begin{enumerate}
\item Think about your files
\item Identify metadata
\item Abbreviate or encode metadata
\item Deliberately separate metadata elements
\item How will you search for your files?
\item Write down your naming conventions
\item Use versioning (include numbering, dates)
\end{enumerate}
\end{frame}
\begin{frame}[fragile]{Think about your files, identify and encode metadata}
\begin{columns}
\begin{column}[c]{.6\textwidth}
\begin{itemize}
\item What kind of files will I have in my project?
\begin{itemize}
\item Data files
\item Analysis files
\item Files including stimuli (maybe pictures or similar)
\item Documentation files
\item WORD documents like a paper etc.
\item \dots
\end{itemize}
\end{itemize}
\end{column}\pause
\begin{column}[c]{.4\textwidth}
Mabey pick prefixes:\\
\verb+DATA_[...].csv+
\verb+ANALYSIS_[...].R+
\verb+PAPER_[...].docx+
\end{column}
\end{columns}\pause
\begin{columns}
\begin{column}[c]{.6\textwidth}
\begin{itemize}
\item What kind of metadata will I have?
\begin{itemize}
\item Subject identifier
\item Session identifier
\item Different conditions
\item \dots
\end{itemize}
\end{itemize}
\end{column}\pause
\begin{column}[c]{.4\textwidth}
Encode metadata:\\
\verb+DATA_vp01_load_ses01.csv+
\verb+ANALYSIS_01_model-selection.R+
\verb+ANALYSIS_02_plots.R+
\end{column}
\end{columns}
\end{frame}
\begin{frame}[fragile]{How do you want your files to be ordered?}
\begin{enumerate}
\item Sort by type\\
\verb+ANALYSIS_01_model-selection.R+\\
\verb+ANALYSIS_02_plots.R+\\
\verb+DATA_vp01_load_ses01.csv+
\item Sort by date\\
\verb+2022-09-29_exp1_vpall.txt+\\
\verb+2022-09-30_analysis.txt+
\item Sort in my order\\
\verb+01_data-cleaning_study1.R+\\
\verb+02_analysis_study1.Rmd+
\end{enumerate}
\end{frame}
\begin{frame}[fragile]{Zero left padding}
\begin{columns}
\begin{column}[c]{.5\textwidth}
Without left padding\\[1ex]
\hrule\vspace{.2cm}
\begin{Verbatim}[commandchars=\\\{\}]
2016_11_14-11_13_52.log
2016_11_14-11_23_52.log
\textcolor{iwmblue}{2016_11_14-11_3_52.log}
2016_11_14-11_33_52.log
2016_11_14-11_57_58.log
2016_11_14-12_17_58.log
2016_11_14-12_27_58.log
2016_11_14-12_37_58.log
2016_11_14-12_47_58.log
2016_11_14-12_57_58.log
\textcolor{iwmblue}{2016_11_14-12_7_58.log}
\end{Verbatim}
\end{column}
\begin{column}[c]{.5\textwidth}
With left padding\\[1ex]
\hrule\vspace{.2cm}
\begin{Verbatim}[commandchars=\\\{\}]
\textcolor{iwmblue}{2016_11_14-11_03_52.log}
2016_11_14-11_13_52.log
2016_11_14-11_23_52.log
2016_11_14-11_33_52.log
2016_11_14-11_57_58.log
\textcolor{iwmblue}{2016_11_14-12_07_58.log}
2016_11_14-12_17_58.log
2016_11_14-12_27_58.log
2016_11_14-12_37_58.log
2016_11_14-12_47_58.log
2016_11_14-12_57_58.log
\end{Verbatim}
\end{column}
\end{columns}
\end{frame}
\begin{frame}{Date format convention}
\begin{columns}
\begin{column}{.5\textwidth}
\begin{center}
\includegraphics[scale = .4]{../figures/xkcd_iso_8601_2x}
\end{center}
\end{column}
\begin{column}{.5\textwidth}
\begin{itemize}
\item Stick to conventions if possible (even if you prefer something
else personally)
\item This can be read easily by machines (working with it in R)
\item It is inclusive: Americans interpret this the same way as
Europeans
\end{itemize}
\end{column}
\end{columns}
\vfill
\flushright{\tiny{\url{https://xkcd.com/1179/}}}
\end{frame}
\begin{frame}[fragile]{Write down your naming conventions}
\small{
\begin{tabular}{@{}lll@{}}
\hline
& Example & Documentation \\
\hline
Content-specific & \verb+DATA_vp01_load_ses01.csv+ & \verb+DATA_[ID]_[cond]_[ses].csv+\\
Descriptive & \verb+ANALYSIS_01_model-selection.R+ & \verb+ANALYSIS_[#]_[descrp].R+\\
Consistent & \verb+ANALYSIS_02_plots.R+ & \verb+ANALYSIS_[#]_[descrp].R+\\
Leading date & \verb+2022-09-29_exp1_vpall.txt+ & \verb+[yyyy-mm-dd]_[exp]_[type].txt+\\
Leading zero & \verb+01_data-cleaning_study1.Rmd+ & \verb+[##]_[descrp]_[study].[R/Rmd]+\\
\hline
\end{tabular}
}
\begin{itemize}
\item Documenting is key and becomes second nature after awhile
\item Create a README file and write down everything that could be
useful to remember
\item Update this README file regularly
\end{itemize}
\end{frame}
\begin{frame}{Version control}
\begin{center}
\includegraphics[scale = .38]{../figures/phd101212s}
\end{center}
\vfill
{\hfill \tiny \url{https://phdcomics.com/comics/archive.php?comicid=1531}}
\end{frame}
\begin{frame}[fragile]{Version control}
\begin{itemize}
\item Version control is a systematic approach to record changes made
in a file, or set of files, over time
\item File versioning can be as simple as using file naming conventions
like suffixes \verb+*_v1+, \verb+*_v2+, \verb+*_vn+
\end{itemize}
\vspace{.3cm}
\begin{enumerate}
\item Create files -- these may contain text, code or both
\item Work on these files, by changing, deleting or adding new content
\item Create a snapshot of the file status (also known as version) at this time
\item Document versions (e.\,g., in a README file)
\end{enumerate}
\vfill
{\hfill \tiny
\url{https://the-turing-way.netlify.app/reproducible-research/vcs.html}}
\end{frame}
\begin{frame}{Example master thesis}
\centering
\includegraphics[width = .6\textwidth]{../figures/ex_filenaming_ma_01}
\end{frame}
\begin{frame}{}
\centering
{\Huge
\color{iwmblue}{There is no right or wrong -- only what works best
for you!\\\vspace{.5cm}\pause
AND: You can change your file names whenever you feel like something else
might work even better!}}
\vfill\pause
Cecklist for good file names: \url{https://osf.io/dpu45}
\end{frame}
\section{Folder structure}
\begin{frame}{The basics}
\begin{itemize}
\item One project, one folder
\pause
\item Consistent pattern for each project
\pause
\item Do not nest too deep!\\
$\to$ depth vs.\ width (maximum path length on Windows is 255 characters)
\pause
\item Add README file at top level
\pause
\item Good naming conventions also apply to folders:
\begin{itemize}
\item Folder names should contain only letters, numbers, underscores, and dashes
\item A dash or underscore should be used instead of a space
\item No special characters (\& ' " ; : * ! \# \$, etc.)
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile]{Folder structure for a master thesis project}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+};
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+};
\node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+};
\node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+};
\node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+};
\node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (n4.west);
\path (top.east) -- (n5.west);
\path (top.east) -- (n6.west);
\path (top.east) -- (n7.west);
\path (top.east) -- (n8.west);
\path (top.east) -- (file.west);
\end{tikzpicture}
\end{frame}
\begin{frame}[fragile]{Folder structure for a master thesis project}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+};
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+};
\node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+};
\node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+};
\node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+};
\node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (n4.west);
\path (top.east) -- (n5.west);
\path (top.east) -- (n6.west);
\path (top.east) -- (n7.west);
\path (top.east) -- (n8.west);
\path (top.east) -- (file.west);
% second level
\node[text width = 7cm] (o1) at (10, 0) {\faIcon[regular]{file-pdf} \verb+master-thesis_forms_2022.pdf+};
\node[text width = 7cm] (o2) at (10, -0.7) {\faIcon[regular]{file} \verb+infoveranstaltung.md+};
\path (n1.center) -- (o1.west);
\path (n1.center) -- (o2.west);
\end{tikzpicture}
\end{frame}
\begin{frame}[fragile]{Folder structure for a master thesis project}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+};
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+};
\node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+};
\node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+};
\node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+};
\node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (n4.west);
\path (top.east) -- (n5.west);
\path (top.east) -- (n6.west);
\path (top.east) -- (n7.west);
\path (top.east) -- (n8.west);
\path (top.east) -- (file.west);
% second level
\node[text width = 7cm] (c1) at (10, -0.7) {\faIcon[regular]{file-code}
\verb+01_preprocessing.R+};
\node[text width = 7cm] (c2) at (10, -1.4) {\faIcon[regular]{file-code}
\verb+02_modeling.R+};
\node[text width = 7cm] (c3) at (10, -2.1) {\faIcon[regular]{file-code}
\verb+03_plots.Rmd+};
\path (n2.center) -- (c1.west);
\path (n2.center) -- (c2.west);
\path (n2.center) -- (c3.west);
\end{tikzpicture}
\end{frame}
\begin{frame}[fragile]{Folder structure for a master thesis project}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+};
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+};
\node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+};
\node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+};
\node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+};
\node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (n4.west);
\path (top.east) -- (n5.west);
\path (top.east) -- (n6.west);
\path (top.east) -- (n7.west);
\path (top.east) -- (n8.west);
\path (top.east) -- (file.west);
% second level
\node[text width = 7cm] (m1) at (10, -1.4) {\faIcon{folder} \verb+raw_data+};
\node[text width = 7cm] (m2) at (10, -2.1) {\faIcon[regular]{file}
\verb+DATA_vpall_exp1.csv+};
\path (n3.center) -- (m1.west);
\path (n3.center) -- (m2.west);
\end{tikzpicture}
\end{frame}
\begin{frame}[fragile]{Folder structure for a master thesis project}
\begin{tikzpicture}[
every node/.style = {text width = 4cm, align = left},
every path/.style = {thick, draw}
]
\node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+};
% first level
\node (n1) at (4, 0) {\faIcon{folder} \verb+admin+};
\node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+};
\node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+};
\node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+};
\node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+};
\node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+};
\node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+};
\node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+};
\node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (n4.west);
\path (top.east) -- (n5.west);
\path (top.east) -- (n6.west);
\path (top.east) -- (n7.west);
\path (top.east) -- (n8.west);
\path (top.east) -- (file.west);
% second level
\node[text width = 6cm] (m3) at (8, -4.2) {\faIcon{folder} \verb+2023-05-05+};
\node[text width = 6cm] (m4) at (8, -4.9) {\faIcon{folder} \verb+2023-10-12+};
\node[text width = 6cm] (t1) at (13, -4.2) {\faIcon[regular]{file-powerpoint}
\verb+colloq_230505.pptx+};
\node[text width = 6cm] (t2) at (13, -4.9) {\faIcon[regular]{file-word} \verb+notes.docx+};
\path (n7.center) -- (m3.west);
\path (n7.center) -- (m4.west);
\path (m3.center) -- (t1.west);
\path (m3.center) -- (t2.west);
\end{tikzpicture}
\end{frame}
\begin{frame}{TONIC: Structured Template}
\begin{itemize}
\item Different research projects might have different structures
\item However, there are certain similarities for most of them
\item You can find structured templates on the internet
\item One pretty generic one is TONIC
\end{itemize}
\vfill
\url{https://github.com/tonic-team/Tonic-Research-Project-Template}\\
\url{https://gin-tonic.netlify.app/}
\end{frame}
\begin{frame}[fragile]{TONIC: Structured Template}
\begin{tikzpicture}[
every node/.style = {text width = 7cm, align = left},
every path/.style = {thick, draw}
]
\node (top) at (0, 0) {\faIcon{folder}
\verb+project_name+};
% first level
\node (n1) at (7, 0) {\faIcon{folder} \verb+01_project_management+};
\node (n2) at (7, -0.7) {\faIcon{folder} \verb+02_material_and_methods+};
\node (n3) at (7, -1.4) {\faIcon{folder} \verb+03_data+};
\node (n4) at (7, -2.1) {\faIcon{folder} \verb+04_data_analysis+};
\node (n5) at (7, -2.8) {\faIcon{folder} \verb+05_figures+};
\node (n6) at (7, -3.5) {\faIcon{folder} \verb+06_dissemination+};
\node (n7) at (7, -4.2) {\faIcon{folder} \verb+07_misc+};
\node (f1) at (7, -4.9) {\faIcon[regular]{file} \verb+LICENSE-CC-BY+};
\node (f2) at (7, -5.6) {\faIcon[regular]{file} \verb+README.md+};
\path (top.center) -- (n1.west);
\path (top.center) -- (n2.west);
\path (top.center) -- (n3.west);
\path (top.center) -- (n4.west);
\path (top.center) -- (n5.west);
\path (top.center) -- (n6.west);
\path (top.center) -- (n7.west);
\path (top.center) -- (f1.west);
\path (top.center) -- (f2.west);
\end{tikzpicture}
\end{frame}
\begin{frame}[fragile]{TONIC: Structured Template}
{Subfolders}
\begin{tikzpicture}[
every node/.style = {text width = 5.5cm, align = left},
every path/.style = {thick, draw}
]
\node (top) at (0, 0) {\faIcon{folder}
\verb+01_project_management+};
% first level
\node (n1) at (7, 0) {\faIcon{folder} \verb+01_administration_files+};
\node (n2) at (7, -0.7) {\faIcon{folder} \verb+02_accepted_grants+};
\node (n3) at (7, -1.4) {\faIcon{folder} \verb+03_meeting_minutes+};
\node (n4) at (7, -2.1) {\faIcon{folder} \verb+04_related_literature+};
\node (n5) at (7, -2.8) {\faIcon{folder} \verb+05_data_management_plans+};
\node (n6) at (7, -3.5) {\faIcon{folder} \verb+06_notebook+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (n4.west);
\path (top.east) -- (n5.west);
\path (top.east) -- (n6.west);
\end{tikzpicture}
\hrule
\begin{tikzpicture}[
every node/.style = {text width = 5.5cm, align = left},
every path/.style = {thick, draw}
]
\node (top) at (0, 0) {\faIcon{folder}
\verb+02_material_and_methods+};
% first level
\node (n1) at (7, 0) {\faIcon{folder} \verb+01_protocols+};
\node (n2) at (7, -0.7) {\faIcon{folder} \verb+02_code+};
\node (n3) at (7, -1.4) {\faIcon{folder} \verb+03_hardware+};
\node (f2) at (7, -2.1) {\faIcon[regular]{file} \verb+README_MM.md+};
\path (top.east) -- (n1.west);
\path (top.east) -- (n2.west);
\path (top.east) -- (n3.west);
\path (top.east) -- (f2.west);
\end{tikzpicture}
\end{frame}
\begin{frame}[fragile]{Additional tips}
\begin{itemize}
\item Dump incoming files not fitting your conventions in a prespecified
folder, e.\,g.,
\begin{tikzpicture}[
every node/.style = {text width = 7cm, align = left, color = iwmorange},
every path/.style = {thick, draw}
]
\node (top) at (0, 0) {\faIcon{folder} \verb+z_from-nora+};
\end{tikzpicture}
\item Then adapt files from there and document changes/provenance
\item Dump older files cluttering your working directory
\begin{tikzpicture}[
every node/.style = {text width = 7cm, align = left, color = iwmorange},
every path/.style = {thick, draw}
]
\node (top) at (0, 0) {\faIcon{folder} \verb+zzz+};
\end{tikzpicture}
\item Delete files when the project is finished
\end{itemize}
\vfill
\pause
\begin{center}
{\Huge\color{iwmblue}{There is no right or wrong -- only what works best
for you!}}
\end{center}
\end{frame}
\appendix
%\begin{frame}[allowframebreaks]{References}
\begin{frame}{References}
\printbibliography
\vfill
\end{frame}
\end{document}