diff --git a/02_workflow/02_workflow.tex b/02_workflow/02_workflow.tex new file mode 100644 index 0000000..67af6cb --- /dev/null +++ b/02_workflow/02_workflow.tex @@ -0,0 +1,1045 @@ +\documentclass[aspectratio=169]{beamer} + +\usepackage{listings} +\usepackage[utf8,latin1]{inputenc} +\usepackage[style = apa, backend = biber, natbib = true]{biblatex} +\addbibresource{../literature/lit.bib} + +\usepackage{fancyvrb} +\usepackage{fontawesome5} % get icons +\usepackage{multirow} +\usepackage{color, colortbl} + +\usepackage{tikz} +\usetikzlibrary{fit} +\usepackage[edges]{forest} + +\lstset{language=bash,% + backgroundcolor=\color{iwmgray!15!white}, + basicstyle=\ttfamily\color{iwmgray}, + frame=none, + basicstyle=\ttfamily\color{iwmgray}, + commentstyle=\slshape\color{iwmgray}, + keywordstyle=\bfseries\color{iwmgray}, + identifierstyle=\color{iwmgray}, + stringstyle=\color{iwmgray}, + numbers=none,%left,numberstyle=\tiny, + basewidth={.5em, .4em}, + showstringspaces=false, + emphstyle=\color{red!50!white}} + +\makeatletter \def\newblock{\beamer@newblock} \makeatother + +\beamertemplatenavigationsymbolsempty +\setbeamertemplate{itemize items}[circle] +\setbeamertemplate{section in toc}[circle] +\mode{\setbeamercolor{math text displayed}{fg=iwmgray}} +\setbeamercolor{block body}{bg=iwmorange!50!white} +\setbeamercolor{block title}{fg=white, bg=iwmorange} +% Definitions for biblatex +\setbeamercolor{bibliography entry note}{fg=iwmgray} +\setbeamercolor{bibliography entry author}{fg=iwmgray} +\setbeamertemplate{bibliography item}{} + +\definecolor{iwmorange}{RGB}{255,105,0} +\definecolor{iwmgray}{RGB}{67,79,79} +\definecolor{iwmblue}{RGB}{60,180,220} + +\setbeamercolor{title}{fg=iwmorange} +\setbeamercolor{frametitle}{fg=iwmorange} +\setbeamercolor{structure}{fg=iwmorange} +\setbeamercolor{normal text}{fg=iwmgray} +\setbeamercolor{author}{fg=iwmgray} +\setbeamercolor{date}{fg=iwmgray} + +\newcommand{\vect}[1]{\mathbf{#1}} +\newcommand{\mat}[1]{\mathbf{#1}} +\newcommand{\gvect}[1]{\boldsymbol{#1}} +\newcommand{\gmat}[1]{\boldsymbol{#1}} + +\AtBeginSection[]{ + \frame{ + \tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}} + +\setbeamertemplate{headline}{ + \begin{beamercolorbox}{section in head} + \vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt + \end{beamercolorbox} +} + +\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt} + +\title{Workflows for effective research data management} +\author{Nora Wickelmaier} +\date{May 27, 2024} + +\begin{document} + +\begin{frame}{} +\thispagestyle{empty} +\titlepage +\end{frame} + +\begin{frame}{Not kiddin'} +I received this e-mail right after our last session\dots + \begin{center} + \includegraphics[scale = .5]{../figures/email_data_request_2024_01} + \end{center} +I finished my dissertation over a decade ago\dots +\end{frame} + +\begin{frame}{Not kiddin'} +Again, definitely not what I would have liked to answer\dots + \begin{center} + \includegraphics[scale = .55]{../figures/email_data_request_2024_02} + \end{center} +\end{frame} + +\begin{frame}[<+->]{Some general rules} + \begin{itemize} + \item This e-mail will never hit you in a week where you have any free time + \item This e-mail will never be about data that is already published or at + least preprocessed and documented in a clean way + \item It will usually be sent by someone that you really want to answer to + (who would not want to answer to one of the four persons that has actually + read parts of your dissertation?) + \item This e-mail will trigger a tremendous amount of guilt\dots + \item Would a better workflow have prevented this? + \end{itemize} +\end{frame} + +\begin{frame}[allowframebreaks]{Results slido surveys: Habits} +\footnotesize +\begin{tabular}{ll} + \hline +What habits would help with good data management? & Habit \\ + \hline + pseudonymizing/anonymizing data & data organisation \\ + avoid redundancy & data organisation \\ + have one place where you store the data & data organisation \\ + uploading under a license (CC-BY....) & data sharing \\ + loading data on an archive, repository etc... & data sharing \\ + Doing the archive & data sharing \\ + report changes to dataset & documentation \\ + codebook & documentation \\ + readme & documentation \\ + report deviations from preregistration & documentation \\ + Read-Me & documentation \\ + Document data collection in Details & documentation \\ + Document yout code & documentation \\ + Documentation & documentation \\ + preregistration & documentation \\ + recording the steps (taken through analysis) & documentation \\ + github documentation & documentation \\ + %discipline & workflow \\ + %Do not do it in your spare time? & workflow \\ + %Brainpower & workflow \\ + %Trink about file names & workflow \\ + %regular cleaning & workflow \\ + %Special time slot in calendar & workflow \\ + %clarity & workflow \\ + %consistency & workflow \\ + %checklists & workflow \\ + %clear workflow & workflow \\ + %Structure Structure Structure & workflow \\ + %Be consistent & workflow \\ + %consitency & workflow \\ + %Reproducible code & workflow \\ + %Time Investment & workflow \\ + \hline +\end{tabular} +\begin{tabular}{ll} + \hline + What habits would help with good data management? & Habit \\ + \hline + %pseudonymizing/anonymizing data & data organisation \\ + %avoid redundancy & data organisation \\ + %have one place where you store the data & data organisation \\ + %uploading under a license (CC-BY....) & data sharing \\ + %loading data on an archive, repository etc... & data sharing \\ + %Doing the archive & data sharing \\ + %report changes to dataset & documentation \\ + %codebook & documentation \\ + %readme & documentation \\ + %report deviations from preregistration & documentation \\ + %Read-Me & documentation \\ + %Document data collection in Details & documentation \\ + %Document yout code & documentation \\ + %Documentation & documentation \\ + %preregistration & documentation \\ + %recording the steps (taken through analysis) & documentation \\ + %github documentation & documentation \\ + discipline & workflow \\ + Do not do it in your spare time? & workflow \\ + Brainpower & workflow \\ + Trink about file names & workflow \\ + regular cleaning & workflow \\ + Special time slot in calendar & workflow \\ + clarity & workflow \\ + consistency & workflow \\ + checklists & workflow \\ + clear workflow & workflow \\ + Structure Structure Structure & workflow \\ + Be consistent & workflow \\ + consitency & workflow \\ + Reproducible code & workflow \\ + Time Investment & workflow \\ + \hline +\end{tabular} +\end{frame} + +\begin{frame}[allowframebreaks]{Results slido surveys: Barriers} +\footnotesize +\vspace{1cm} +\begin{tabular}{ll} + \hline +What are possible barriers for good data management? & Barrier \\ + \hline + Remember the strategy used over time & lack of consistency \\ + Keeping multiple copies consistent & lack of consistency \\ + don't know the best tools for it & lack of skills \\ + no idea where to start & lack of skills \\ + complex research design & lack of skills \\ + public security & lack of skills \\ + expectation of presenting results fast (time) & lack of time \\ + When should I do this task? & lack of time \\ + Lack of planning & lack of time \\ + too much other work & lack of time \\ + %procrastination & low priority \\ + %its not fun & low priority \\ + %never having thought of it & low priority \\ + %Other Priorities & low priority \\ + %boring task & low priority \\ + %forget it & low priority \\ + %bad time management & low priority \\ + %never gets perfected & perfectionism \\ + %Fear of missing something & perfectionism \\ + %Defining a good concept from the beginning on & perfectionism \\ + %too many people in one project & responsibility diffusion \\ + \hline +\end{tabular} + +\newpage +\begin{tabular}{ll} + \hline +What are possible barriers for good data management? & Barrier \\ + \hline + %Remember the strategy used over time & lack of consistency \\ + %Keeping multiple copies consistent & lack of consistency \\ + %don't know the best tools for it & lack of skills \\ + %no idea where to start & lack of skills \\ + %complex research design & lack of skills \\ + %public security & lack of skills \\ + %expectation of presenting results fast (time) & lack of time \\ + %When should I do this task? & lack of time \\ + %Lack of planning & lack of time \\ + %too much other work & lack of time \\ + procrastination & low priority \\ + its not fun & low priority \\ + never having thought of it & low priority \\ + Other Priorities & low priority \\ + boring task & low priority \\ + forget it & low priority \\ + bad time management & low priority \\ + never gets perfected & perfectionism \\ + Fear of missing something & perfectionism \\ + Defining a good concept from the beginning on & perfectionism \\ + too many people in one project & responsibility diffusion \\ + \hline +\end{tabular} +\end{frame} + +\begin{frame}{Results slido surveys: Topics} +\footnotesize +\centering +\begin{tabular}{p{11cm}l} + \hline + What topics would you like to cover this semester? & Topic \\ + \hline + Understandable coding & clean coding \\ + Cleaning up R code for readability & clean coding \\ + Documentation of a final R script & clean coding \\ + How to manage different data sources in one experiment\\ (e.g.\ eye tracking, performance, questionnaire..) & data organisation \\ + understanding what should always go into a readme file. & data organisation \\ + How to best arrange the data & data organisation \\ + important things before the open-access data & data sharing \\ + Where to store data for long-term accessibility (conventions?) & data sharing \\ + Tools, where I should upload my final data & data sharing \\ + how to integrate gitHub in workflow & version control \\ + Introduction into available tools & workflow \\ + Upload data before or after publishing a paper? Time mangement & workflow \\ + going over guidelines/best practice on how to name files, folders and data as well as folder structure. & workflow \\ + understanding where redundancy is needed (raw data?) and where to avoid it. & workflow \\ + Steps and when to do what & workflow \\ + \hline +\end{tabular} +\end{frame} + +\begin{frame}{Topics for this semester} +\centering +\begin{tabular}{ll} +\hline +Date & Topic \\ +\hline +2024-05-13 & Introduction to data management \\ +\only<1>{2024-05-27}\only<2>{\bf 2024-05-27} & \only<1>{Workflow}\only<2>{\bf Workflow} \\ +2024-06-10 & Data organisation \\ +2024-06-24 & Data sharing \\ +2024-07-08 & Clean coding \\ +2024-07-22 & Version control \\ +\hline +\end{tabular} +\end{frame} + +\section{Workflow} + +\begin{frame}{What is a workflow and why do I need one?} + % slido + \centering + \includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 2} + + \url{https://app.sli.do/event/qgqz43GC9EYZ3RbQG5QfvU} +\end{frame} + +\begin{frame}{What is a workflow?} + %\pause + \begin{quote} + A workflow consists of an orchestrated and repeatable pattern of + activity, enabled by the systematic organization of resources into + processes that transform materials, provide services, or process + information. + \end{quote} + \vspace{-.3cm} + \flushright{\footnotesize \url{https://en.wikipedia.org/wiki/Workflow}} + \pause + \begin{columns} + \begin{column}[c]{.5\textwidth} +\flushleft + Important aspects: + \begin{itemize} + \item Repeatable pattern + \item Systematic organization + \item Transformation processes + \end{itemize} + \pause + \end{column} + \begin{column}[c]{.5\textwidth} + + In short:\\ + \begin{itemize} + \item A workflow answers the question:\\ + \color{iwmorange}{\bf What's the most efficient way to get this work done?} + \end{itemize} + \end{column} + \end{columns} +\end{frame} + +\begin{frame}[<+->]{Why do I need a workflow?} + %\pause + \begin{itemize} + \item It boosts productivity + \item It reduces mental load + \item A truly optimized workflow will: + \begin{itemize} + \item Identify and remove unnecessary steps and processes that lead to slowdowns + \item Provide a sequential (chronological) order for accomplishing tasks + \item Automate some decisions and processes (freeing up time) + \item Reduce communication burdens (fewer e-mails, meetings, etc.) + \item Encourage collaboration + \item Track progress and assess performance + \item Keep records of previous processes and make future processes repeatable + \item Eliminate decision fatigue + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame}{But let's start much smaller than this} + % slido + % What can you answer with "yes"? + % * I know more than 3 RStudio shortcuts + % * I have never updated my R packages + % * I know what the ISO 8601 date format is + % * I regularly delete duplicate files + % * I usually have a clean e-mail inbox + % * I use file naming conventions + \centering + \includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 2} + + \url{https://app.sli.do/event/qgqz43GC9EYZ3RbQG5QfvU} +\end{frame} + +\begin{frame}{The bare minimum (IMHO)} + \begin{itemize} + \item Shortcuts + \begin{itemize} + \item \texttt{CTRL+C} and \texttt{CTRL+P} + \item \texttt{ALT+Tab} to switch between applications + \item In your browser: \texttt{CTRL+L}, \texttt{CTRL+T}, \texttt{CTRL+W}, + \texttt{CTRL+Tab}, \texttt{CTRL+Page Up/Down} + \item Using \texttt{Alt} to open up ``File'' + \item Sending code chunks with \texttt{CTRL+Enter} to the console in RStudio + \end{itemize} + \item Making file endings visible + \item Associating TXT-files with a proper text editor + \item Making sure that CSV-files are \emph{never} opened by EXCEL accidentily + \item Setting List or Details View for files + \end{itemize} +\end{frame} + +\begin{frame}{One (baby) step up} + \begin{itemize} + \item Shortcuts for efficient text editing + \begin{itemize} + \item Jumping to next word + \item Jumping to next instance of a word + \item Findind and replacing a certain word + \item Deleting/copying complete line + \item Commenting in/out of complete code chunks + \item \dots + \end{itemize} + \item Update R packages once a week + \item Update R and RStudio at least twice a year + \item If you use R outside of RStudio, use SDI + %\item Consistent date format (preferably ISO 8601) + %\item Deleting duplicates + %\item Cleaning out e-mails + %\item Self-sorting files + \end{itemize} + \vfill +\end{frame} + +\begin{frame}{Project workflow} + \begin{itemize} + \item Project workflow refers to how you organize projects and move + through the various stages of the research cycle + \item \citet{Kathawalla2021} say that a project workflow includes: + \begin{itemize} + \item File folder structure + \item Document naming conventions + \item Version control + \item Cloud storage + \item Choice of who has access to a project and when (Collaborators? Public?) + \end{itemize} + \item Developing a clear project workflow is much easier for PhD + students than later career scholars who have many more projects to + organize + \end{itemize} + \vfill +\end{frame} + +\section{Naming conventions} + +\begin{frame}[fragile]{Examples} + \begin{columns} + \begin{column}[c]{.6\textwidth} + \begin{itemize} + \item Files with no naming convention: + \begin{lstlisting} +Test data 2016.xlsx +Meeting notes Jan 17.doc +Notes Eric.txt +Final FINAL last version.docx + \end{lstlisting} + \end{itemize} + \end{column} + \begin{column}[c]{.3\textwidth} + \includegraphics[scale = .3]{../figures/xkcd_naming_conventions} + \end{column} + \end{columns} + \begin{itemize} + \item Files with naming convention: + \begin{lstlisting} +20160104_ProjectA_Ex1Test1_SmithE_v1.xlsx +20160104_ProjectA_MeetingNotes_SmithE_v2.docx +Experiment1_PANAS_20231011-140811_Image04.tif + \end{lstlisting} + \end{itemize} + {\tiny + \url{https://xkcd.com/1459/}\hfill + \url{https://datamanagement.hms.harvard.edu/collect/file-naming-conventions} + } +\end{frame} + +\begin{frame}[fragile]{3\,am in the morning before a deadline...} + \begin{columns} + \begin{column}[c]{.5\textwidth} + These?\\[1ex] + \hrule\vspace{.2cm} +\begin{Verbatim}[commandchars=\\\{\}] +01_marshal-data.md +01_marshal-data.R +02_pre-dea-filtering.md +02_pre-dea-filtering.R +03_dea-with-limma-voom.md +03_dea-with-limma-voom.R +90_limma-model-term-name-fiasco.md +90_limma-model-term-name-fiasco.R +helper01_load-counts.R +helper02_load-exp-des.R +helper03_load-focus-statinf.R +helper04_extract-and tidy.R +\end{Verbatim} + \end{column} + \begin{column}[c]{.5\textwidth} + Or these?\\[1ex] + \hrule\vspace{.2cm} +\begin{Verbatim}[commandchars=\\\{\}] +01.md +01.R +02.md +02.R +03.md +03.R +90.md +90.R +helper01.R +helper02.R +helper03.R +helper04.R +\end{Verbatim} + \end{column} + \end{columns} + {\hfill\tiny \citet{Wilbrandt2023}} +\end{frame} + +\begin{frame}{The basics} + \begin{itemize} + \item File names should contain only letters, numbers, underscores, and dashes + \pause + \item A dash or underscore should be used instead of a space + \pause + \item No special characters (\& ' " ; : * ! \# \$, etc.) + \pause + \item Maybe decide on a convention like + \begin{itemize} + \item camel{\bf\color{iwmorange}C}ase + \item snake{\bf\color{iwmorange}\_}case + \item {\bf\color{iwmorange}P}ascal{\bf\color{iwmorange}C}ase + \end{itemize} + \end{itemize} + \pause + \begin{block}{Three principles for file names} + \begin{enumerate} + \item Machine readable + \item Human readable + \item Plays well with default ordering + \end{enumerate} + \end{block} + \vfill +\end{frame} + +\begin{frame}{Example from website project} + \centering + \only<1>{\includegraphics[width = .7\textwidth]{../figures/ex_filenaming_website_01}} + \only<2>{\includegraphics[width = .7\textwidth]{../figures/ex_filenaming_website_02}} +\end{frame} + +\begin{frame}{Steps to consider} + \begin{enumerate} + \item Think about your files + \item Identify metadata + \item Abbreviate or encode metadata + \item Deliberately separate metadata elements + \item How will you search for your files? + \item Write down your naming conventions + \item Use versioning (include numbering, dates) + \end{enumerate} +\end{frame} + +\begin{frame}[fragile]{Think about your files, identify and encode metadata} + \begin{columns} + \begin{column}[c]{.6\textwidth} + \begin{itemize} + \item What kind of files will I have in my project? + \begin{itemize} + \item Data files + \item Analysis files + \item Files including stimuli (maybe pictures or similar) + \item Documentation files + \item WORD documents like a paper etc. + \item \dots + \end{itemize} + \end{itemize} + \end{column}\pause + \begin{column}[c]{.4\textwidth} + Mabey pick prefixes:\\ + \verb+DATA_[...].csv+ + \verb+ANALYSIS_[...].R+ + \verb+PAPER_[...].docx+ + \end{column} + \end{columns}\pause + \begin{columns} + \begin{column}[c]{.6\textwidth} + \begin{itemize} + \item What kind of metadata will I have? + \begin{itemize} + \item Subject identifier + \item Session identifier + \item Different conditions + \item \dots + \end{itemize} + \end{itemize} + \end{column}\pause + \begin{column}[c]{.4\textwidth} + Encode metadata:\\ + \verb+DATA_vp01_load_ses01.csv+ + \verb+ANALYSIS_01_model-selection.R+ + \verb+ANALYSIS_02_plots.R+ + \end{column} + \end{columns} +\end{frame} + +\begin{frame}[fragile]{How do you want your files to be ordered?} + \begin{enumerate} + \item Sort by type\\ + \verb+ANALYSIS_01_model-selection.R+\\ + \verb+ANALYSIS_02_plots.R+\\ + \verb+DATA_vp01_load_ses01.csv+ + \item Sort by date\\ + \verb+2022-09-29_exp1_vpall.txt+\\ + \verb+2022-09-30_analysis.txt+ + \item Sort in my order\\ + \verb+01_data-cleaning_study1.R+\\ + \verb+02_analysis_study1.Rmd+ + \end{enumerate} +\end{frame} + +\begin{frame}[fragile]{Zero left padding} + \begin{columns} + \begin{column}[c]{.5\textwidth} + Without left padding\\[1ex] + \hrule\vspace{.2cm} +\begin{Verbatim}[commandchars=\\\{\}] +2016_11_14-11_13_52.log +2016_11_14-11_23_52.log +\textcolor{iwmblue}{2016_11_14-11_3_52.log} +2016_11_14-11_33_52.log +2016_11_14-11_57_58.log +2016_11_14-12_17_58.log +2016_11_14-12_27_58.log +2016_11_14-12_37_58.log +2016_11_14-12_47_58.log +2016_11_14-12_57_58.log +\textcolor{iwmblue}{2016_11_14-12_7_58.log} +\end{Verbatim} + \end{column} + \begin{column}[c]{.5\textwidth} + With left padding\\[1ex] + \hrule\vspace{.2cm} +\begin{Verbatim}[commandchars=\\\{\}] +\textcolor{iwmblue}{2016_11_14-11_03_52.log} +2016_11_14-11_13_52.log +2016_11_14-11_23_52.log +2016_11_14-11_33_52.log +2016_11_14-11_57_58.log +\textcolor{iwmblue}{2016_11_14-12_07_58.log} +2016_11_14-12_17_58.log +2016_11_14-12_27_58.log +2016_11_14-12_37_58.log +2016_11_14-12_47_58.log +2016_11_14-12_57_58.log +\end{Verbatim} + \end{column} + \end{columns} +\end{frame} + +\begin{frame}{Date format convention} + \begin{columns} + \begin{column}{.5\textwidth} + \begin{center} + \includegraphics[scale = .4]{../figures/xkcd_iso_8601_2x} + \end{center} + \end{column} + \begin{column}{.5\textwidth} + \begin{itemize} + \item Stick to conventions if possible (even if you prefer something + else personally) + \item This can be read easily by machines (working with it in R) + \item It is inclusive: Americans interpret this the same way as + Europeans + \end{itemize} + \end{column} + \end{columns} + \vfill + \flushright{\tiny{\url{https://xkcd.com/1179/}}} +\end{frame} + +\begin{frame}[fragile]{Write down your naming conventions} + \small{ + \begin{tabular}{@{}lll@{}} + \hline + & Example & Documentation \\ + \hline + Content-specific & \verb+DATA_vp01_load_ses01.csv+ & \verb+DATA_[ID]_[cond]_[ses].csv+\\ + Descriptive & \verb+ANALYSIS_01_model-selection.R+ & \verb+ANALYSIS_[#]_[descrp].R+\\ + Consistent & \verb+ANALYSIS_02_plots.R+ & \verb+ANALYSIS_[#]_[descrp].R+\\ + Leading date & \verb+2022-09-29_exp1_vpall.txt+ & \verb+[yyyy-mm-dd]_[exp]_[type].txt+\\ + Leading zero & \verb+01_data-cleaning_study1.Rmd+ & \verb+[##]_[descrp]_[study].[R/Rmd]+\\ + \hline + \end{tabular} + } + \begin{itemize} + \item Documenting is key and becomes second nature after awhile + \item Create a README file and write down everything that could be + useful to remember + \item Update this README file regularly + \end{itemize} +\end{frame} + +\begin{frame}{Version control} + \begin{center} + \includegraphics[scale = .38]{../figures/phd101212s} + \end{center} + \vfill + {\hfill \tiny \url{https://phdcomics.com/comics/archive.php?comicid=1531}} +\end{frame} + +\begin{frame}[fragile]{Version control} + \begin{itemize} + \item Version control is a systematic approach to record changes made + in a file, or set of files, over time + \item File versioning can be as simple as using file naming conventions + like suffixes \verb+*_v1+, \verb+*_v2+, \verb+*_vn+ + \end{itemize} + \vspace{.3cm} + \begin{enumerate} + \item Create files -- these may contain text, code or both + \item Work on these files, by changing, deleting or adding new content + \item Create a snapshot of the file status (also known as version) at this time + \item Document versions (e.\,g., in a README file) + \end{enumerate} + \vfill + {\hfill \tiny + \url{https://the-turing-way.netlify.app/reproducible-research/vcs.html}} +\end{frame} + +\begin{frame}{Example master thesis} + \centering + \includegraphics[width = .6\textwidth]{../figures/ex_filenaming_ma_01} +\end{frame} + +\begin{frame}{} + \centering + {\Huge + \color{iwmblue}{There is no right or wrong -- only what works best + for you!\\\vspace{.5cm}\pause + AND: You can change your file names whenever you feel like something else + might work even better!}} + \vfill\pause + Cecklist for good file names: \url{https://osf.io/dpu45} +\end{frame} + +\section{Folder structure} + +\begin{frame}{The basics} + \begin{itemize} + \item One project, one folder + \pause + \item Consistent pattern for each project + \pause + \item Do not nest too deep!\\ + $\to$ depth vs.\ width (maximum path length on Windows is 255 characters) + \pause + \item Add README file at top level + \pause + \item Good naming conventions also apply to folders: + \begin{itemize} + \item Folder names should contain only letters, numbers, underscores, and dashes + \item A dash or underscore should be used instead of a space + \item No special characters (\& ' " ; : * ! \# \$, etc.) + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Folder structure for a master thesis project} + \begin{tikzpicture}[ + every node/.style = {text width = 4cm, align = left}, + every path/.style = {thick, draw} + ] + \node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+}; + % first level + \node (n1) at (4, 0) {\faIcon{folder} \verb+admin+}; + \node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+}; + \node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+}; + \node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+}; + \node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+}; + \node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+}; + \node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+}; + \node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+}; + \node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+}; + \path (top.east) -- (n1.west); + \path (top.east) -- (n2.west); + \path (top.east) -- (n3.west); + \path (top.east) -- (n4.west); + \path (top.east) -- (n5.west); + \path (top.east) -- (n6.west); + \path (top.east) -- (n7.west); + \path (top.east) -- (n8.west); + \path (top.east) -- (file.west); + \end{tikzpicture} +\end{frame} + +\begin{frame}[fragile]{Folder structure for a master thesis project} + \begin{tikzpicture}[ + every node/.style = {text width = 4cm, align = left}, + every path/.style = {thick, draw} + ] + \node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+}; + % first level + \node (n1) at (4, 0) {\faIcon{folder} \verb+admin+}; + \node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+}; + \node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+}; + \node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+}; + \node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+}; + \node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+}; + \node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+}; + \node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+}; + \node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+}; + \path (top.east) -- (n1.west); + \path (top.east) -- (n2.west); + \path (top.east) -- (n3.west); + \path (top.east) -- (n4.west); + \path (top.east) -- (n5.west); + \path (top.east) -- (n6.west); + \path (top.east) -- (n7.west); + \path (top.east) -- (n8.west); + \path (top.east) -- (file.west); + % second level + \node[text width = 7cm] (o1) at (10, 0) {\faIcon[regular]{file-pdf} \verb+master-thesis_forms_2022.pdf+}; + \node[text width = 7cm] (o2) at (10, -0.7) {\faIcon[regular]{file} \verb+infoveranstaltung.md+}; + \path (n1.center) -- (o1.west); + \path (n1.center) -- (o2.west); + \end{tikzpicture} +\end{frame} + + +\begin{frame}[fragile]{Folder structure for a master thesis project} + \begin{tikzpicture}[ + every node/.style = {text width = 4cm, align = left}, + every path/.style = {thick, draw} + ] + \node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+}; + % first level + \node (n1) at (4, 0) {\faIcon{folder} \verb+admin+}; + \node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+}; + \node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+}; + \node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+}; + \node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+}; + \node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+}; + \node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+}; + \node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+}; + \node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+}; + \path (top.east) -- (n1.west); + \path (top.east) -- (n2.west); + \path (top.east) -- (n3.west); + \path (top.east) -- (n4.west); + \path (top.east) -- (n5.west); + \path (top.east) -- (n6.west); + \path (top.east) -- (n7.west); + \path (top.east) -- (n8.west); + \path (top.east) -- (file.west); + % second level + \node[text width = 7cm] (c1) at (10, -0.7) {\faIcon[regular]{file-code} + \verb+01_preprocessing.R+}; + \node[text width = 7cm] (c2) at (10, -1.4) {\faIcon[regular]{file-code} + \verb+02_modeling.R+}; + \node[text width = 7cm] (c3) at (10, -2.1) {\faIcon[regular]{file-code} + \verb+03_plots.Rmd+}; + \path (n2.center) -- (c1.west); + \path (n2.center) -- (c2.west); + \path (n2.center) -- (c3.west); + \end{tikzpicture} +\end{frame} + +\begin{frame}[fragile]{Folder structure for a master thesis project} + \begin{tikzpicture}[ + every node/.style = {text width = 4cm, align = left}, + every path/.style = {thick, draw} + ] + \node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+}; + % first level + \node (n1) at (4, 0) {\faIcon{folder} \verb+admin+}; + \node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+}; + \node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+}; + \node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+}; + \node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+}; + \node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+}; + \node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+}; + \node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+}; + \node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+}; + \path (top.east) -- (n1.west); + \path (top.east) -- (n2.west); + \path (top.east) -- (n3.west); + \path (top.east) -- (n4.west); + \path (top.east) -- (n5.west); + \path (top.east) -- (n6.west); + \path (top.east) -- (n7.west); + \path (top.east) -- (n8.west); + \path (top.east) -- (file.west); + % second level + \node[text width = 7cm] (m1) at (10, -1.4) {\faIcon{folder} \verb+raw_data+}; + \node[text width = 7cm] (m2) at (10, -2.1) {\faIcon[regular]{file} + \verb+DATA_vpall_exp1.csv+}; + \path (n3.center) -- (m1.west); + \path (n3.center) -- (m2.west); + \end{tikzpicture} +\end{frame} + +\begin{frame}[fragile]{Folder structure for a master thesis project} + \begin{tikzpicture}[ + every node/.style = {text width = 4cm, align = left}, + every path/.style = {thick, draw} + ] + \node[text width = 1.2cm] (top) at (0, 0) {\faIcon{folder} \verb+MA+}; + % first level + \node (n1) at (4, 0) {\faIcon{folder} \verb+admin+}; + \node (n2) at (4, -0.7) {\faIcon{folder} \verb+code+}; + \node (n3) at (4, -1.4) {\faIcon{folder} \verb+data+}; + \node (n4) at (4, -2.1) {\faIcon{folder} \verb+expose+}; + \node (n5) at (4, -2.8) {\faIcon{folder} \verb+figures+}; + \node (n6) at (4, -3.5) {\faIcon{folder} \verb+literature+}; + \node (n7) at (4, -4.2) {\faIcon{folder} \verb+talks+}; + \node (n8) at (4, -4.9) {\faIcon{folder} \verb+thesis+}; + \node (file) at (4, -5.6) {\faIcon[regular]{file} \verb+README+}; + \path (top.east) -- (n1.west); + \path (top.east) -- (n2.west); + \path (top.east) -- (n3.west); + \path (top.east) -- (n4.west); + \path (top.east) -- (n5.west); + \path (top.east) -- (n6.west); + \path (top.east) -- (n7.west); + \path (top.east) -- (n8.west); + \path (top.east) -- (file.west); + % second level + \node[text width = 6cm] (m3) at (8, -4.2) {\faIcon{folder} \verb+2023-05-05+}; + \node[text width = 6cm] (m4) at (8, -4.9) {\faIcon{folder} \verb+2023-10-12+}; + \node[text width = 6cm] (t1) at (13, -4.2) {\faIcon[regular]{file-powerpoint} + \verb+colloq_230505.pptx+}; + \node[text width = 6cm] (t2) at (13, -4.9) {\faIcon[regular]{file-word} \verb+notes.docx+}; + \path (n7.center) -- (m3.west); + \path (n7.center) -- (m4.west); + \path (m3.center) -- (t1.west); + \path (m3.center) -- (t2.west); + \end{tikzpicture} +\end{frame} + +\begin{frame}{TONIC: Structured Template} + \begin{itemize} + \item Different research projects might have different structures + \item However, there are certain similarities for most of them + \item You can find structured templates on the internet + \item One pretty generic one is TONIC + \end{itemize} + \vfill + \url{https://github.com/tonic-team/Tonic-Research-Project-Template}\\ + \url{https://gin-tonic.netlify.app/} +\end{frame} + +\begin{frame}[fragile]{TONIC: Structured Template} + \begin{tikzpicture}[ + every node/.style = {text width = 7cm, align = left}, + every path/.style = {thick, draw} + ] + \node (top) at (0, 0) {\faIcon{folder} + \verb+project_name+}; + % first level + \node (n1) at (7, 0) {\faIcon{folder} \verb+01_project_management+}; + \node (n2) at (7, -0.7) {\faIcon{folder} \verb+02_material_and_methods+}; + \node (n3) at (7, -1.4) {\faIcon{folder} \verb+03_data+}; + \node (n4) at (7, -2.1) {\faIcon{folder} \verb+04_data_analysis+}; + \node (n5) at (7, -2.8) {\faIcon{folder} \verb+05_figures+}; + \node (n6) at (7, -3.5) {\faIcon{folder} \verb+06_dissemination+}; + \node (n7) at (7, -4.2) {\faIcon{folder} \verb+07_misc+}; + \node (f1) at (7, -4.9) {\faIcon[regular]{file} \verb+LICENSE-CC-BY+}; + \node (f2) at (7, -5.6) {\faIcon[regular]{file} \verb+README.md+}; + \path (top.center) -- (n1.west); + \path (top.center) -- (n2.west); + \path (top.center) -- (n3.west); + \path (top.center) -- (n4.west); + \path (top.center) -- (n5.west); + \path (top.center) -- (n6.west); + \path (top.center) -- (n7.west); + \path (top.center) -- (f1.west); + \path (top.center) -- (f2.west); + \end{tikzpicture} +\end{frame} + +\begin{frame}[fragile]{TONIC: Structured Template} + {Subfolders} + \begin{tikzpicture}[ + every node/.style = {text width = 5.5cm, align = left}, + every path/.style = {thick, draw} + ] + \node (top) at (0, 0) {\faIcon{folder} + \verb+01_project_management+}; + % first level + \node (n1) at (7, 0) {\faIcon{folder} \verb+01_administration_files+}; + \node (n2) at (7, -0.7) {\faIcon{folder} \verb+02_accepted_grants+}; + \node (n3) at (7, -1.4) {\faIcon{folder} \verb+03_meeting_minutes+}; + \node (n4) at (7, -2.1) {\faIcon{folder} \verb+04_related_literature+}; + \node (n5) at (7, -2.8) {\faIcon{folder} \verb+05_data_management_plans+}; + \node (n6) at (7, -3.5) {\faIcon{folder} \verb+06_notebook+}; + \path (top.east) -- (n1.west); + \path (top.east) -- (n2.west); + \path (top.east) -- (n3.west); + \path (top.east) -- (n4.west); + \path (top.east) -- (n5.west); + \path (top.east) -- (n6.west); + \end{tikzpicture} + \hrule + \begin{tikzpicture}[ + every node/.style = {text width = 5.5cm, align = left}, + every path/.style = {thick, draw} + ] + \node (top) at (0, 0) {\faIcon{folder} + \verb+02_material_and_methods+}; + % first level + \node (n1) at (7, 0) {\faIcon{folder} \verb+01_protocols+}; + \node (n2) at (7, -0.7) {\faIcon{folder} \verb+02_code+}; + \node (n3) at (7, -1.4) {\faIcon{folder} \verb+03_hardware+}; + \node (f2) at (7, -2.1) {\faIcon[regular]{file} \verb+README_MM.md+}; + \path (top.east) -- (n1.west); + \path (top.east) -- (n2.west); + \path (top.east) -- (n3.west); + \path (top.east) -- (f2.west); + \end{tikzpicture} +\end{frame} + +\begin{frame}[fragile]{Additional tips} + \begin{itemize} + \item Dump incoming files not fitting your conventions in a prespecified + folder, e.\,g., + \begin{tikzpicture}[ + every node/.style = {text width = 7cm, align = left, color = iwmorange}, + every path/.style = {thick, draw} + ] + \node (top) at (0, 0) {\faIcon{folder} \verb+z_from-nora+}; + \end{tikzpicture} + \item Then adapt files from there and document changes/provenance + \item Dump older files cluttering your working directory + \begin{tikzpicture}[ + every node/.style = {text width = 7cm, align = left, color = iwmorange}, + every path/.style = {thick, draw} + ] + \node (top) at (0, 0) {\faIcon{folder} \verb+zzz+}; + \end{tikzpicture} + \item Delete files when the project is finished + \end{itemize} + \vfill + \pause + \begin{center} + {\Huge\color{iwmblue}{There is no right or wrong -- only what works best + for you!}} + \end{center} +\end{frame} + +\appendix + +%\begin{frame}[allowframebreaks]{References} +\begin{frame}{References} + \printbibliography + \vfill +\end{frame} + +\end{document} + diff --git a/README.md b/README.md index 6076352..7aa8cb8 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,12 @@ | Date | Topic | | ---------- | --------------------------------------- | -| 13.05.2024 | Introduction to data management | -| 27.05.2024 | Workflows | -| 10.06.2024 | | -| 24.06.2024 | | -| 08.07.2024 | | -| 22.07.2024 | | +| 2024-05-13 | Introduction to data management | +| 2024-05-27 | Workflow | +| 2024-06-10 | Data organisation | +| 2024-06-24 | Data sharing | +| 2024-07-08 | Clean coding | +| 2024-07-22 | Version control | # Literature @@ -20,3 +20,6 @@ Frazier, M. R., O'Hara, C. C., Jiang, N., & Halpern, B. S. (2017). Our path to better science in less time using open data science tools. _Nature Ecology & Evolution, 1_(6), 1-7. https://doi.org/10.1038/s41559-017-0160 +Wilbrandt, J. (2023). Research Data Management Intro Series: Coffee Lectures & +Espresso Shots. https://doi.org/10.5281/zenodo.7573695 + diff --git a/figures/QR Code for Methodenseminar SS 2024 - Session 2.png b/figures/QR Code for Methodenseminar SS 2024 - Session 2.png new file mode 100644 index 0000000..8123994 Binary files /dev/null and b/figures/QR Code for Methodenseminar SS 2024 - Session 2.png differ diff --git a/figures/email_data_request_2024_01.png b/figures/email_data_request_2024_01.png new file mode 100644 index 0000000..38e38e8 Binary files /dev/null and b/figures/email_data_request_2024_01.png differ diff --git a/figures/email_data_request_2024_02.png b/figures/email_data_request_2024_02.png new file mode 100644 index 0000000..ec3520f Binary files /dev/null and b/figures/email_data_request_2024_02.png differ diff --git a/figures/ex_filenaming_ma_01.png b/figures/ex_filenaming_ma_01.png new file mode 100644 index 0000000..c503456 Binary files /dev/null and b/figures/ex_filenaming_ma_01.png differ diff --git a/figures/ex_filenaming_website_01.png b/figures/ex_filenaming_website_01.png new file mode 100644 index 0000000..912cee9 Binary files /dev/null and b/figures/ex_filenaming_website_01.png differ diff --git a/figures/ex_filenaming_website_02.png b/figures/ex_filenaming_website_02.png new file mode 100644 index 0000000..fe8aec9 Binary files /dev/null and b/figures/ex_filenaming_website_02.png differ diff --git a/figures/xkcd_iso_8601_2x.png b/figures/xkcd_iso_8601_2x.png new file mode 100644 index 0000000..45b1d80 Binary files /dev/null and b/figures/xkcd_iso_8601_2x.png differ diff --git a/literature/lit.bib b/literature/lit.bib index 6d8bd2b..6d438f0 100644 --- a/literature/lit.bib +++ b/literature/lit.bib @@ -30,3 +30,12 @@ doi = {10.1177/2515245917747656} } +@misc{Wilbrandt2023, + author = {Wilbrandt, Jeanne}, + title = {{Research Data Management Intro Series: Coffee Lectures \& Espresso Shots}}, + year = {2023}, + publisher = {Zenodo}, + version = {1.0}, + url = {https://doi.org/10.5281/zenodo.7573695} +} +