\documentclass[aspectratio=169]{beamer} \usepackage{listings} %\usepackage[utf8]{inputenc} \usepackage[style = apa, backend = biber, natbib = true]{biblatex} \addbibresource{../literature/lit.bib} \usepackage{fancyvrb} \usepackage{fontawesome5} % get icons \usepackage{multirow} \usepackage{color, colortbl} \usepackage{tikz} \usetikzlibrary{fit} \usepackage[edges]{forest} \lstset{language = R,% basicstyle = \ttfamily\color{iwmgray}, frame = single, rulecolor = \color{iwmgray}, commentstyle = \slshape\color{iwmgreen}, keywordstyle = \bfseries\color{iwmgray}, identifierstyle = \color{iwmpurple}, stringstyle = \color{iwmblue}, numbers = none,%left,numberstyle = \tiny, basewidth = {.5em, .4em}, showstringspaces = false, emphstyle = \color{red!50!white}} \makeatletter \def\newblock{\beamer@newblock} \makeatother \beamertemplatenavigationsymbolsempty \setbeamertemplate{itemize items}[circle] \setbeamertemplate{section in toc}[circle] \mode{\setbeamercolor{math text displayed}{fg=iwmgray}} \setbeamercolor{block body}{bg=iwmorange!50!white} \setbeamercolor{block title}{fg=white, bg=iwmorange} % Definitions for biblatex \setbeamercolor{bibliography entry note}{fg=iwmgray} \setbeamercolor{bibliography entry author}{fg=iwmgray} \setbeamertemplate{bibliography item}{} \definecolor{iwmorange}{RGB}{255,105,0} \definecolor{iwmgray}{RGB}{67,79,79} \definecolor{iwmblue}{RGB}{60,180,220} \definecolor{iwmgreen}{RGB}{145,200,110} \definecolor{iwmpurple}{RGB}{120,0,75} \setbeamercolor{title}{fg=iwmorange} \setbeamercolor{frametitle}{fg=iwmorange} \setbeamercolor{structure}{fg=iwmorange} \setbeamercolor{normal text}{fg=iwmgray} \setbeamercolor{author}{fg=iwmgray} \setbeamercolor{date}{fg=iwmgray} \newcommand{\vect}[1]{\mathbf{#1}} \newcommand{\mat}[1]{\mathbf{#1}} \newcommand{\gvect}[1]{\boldsymbol{#1}} \newcommand{\gmat}[1]{\boldsymbol{#1}} \AtBeginSection[]{ \frame{ \tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}} \setbeamertemplate{headline}{ \begin{beamercolorbox}{section in head} \vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt \end{beamercolorbox} } \setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt} \title{Clean coding} \author{Nora Wickelmaier} \date{July 8, 2024} \begin{document} \begin{frame}{} \thispagestyle{empty} \titlepage \end{frame} \begin{frame}{What is needed to make code reproducible?} % slido \centering \includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 5} \url{https://app.sli.do/event/uEz8fJWkLBNm1sthQovXNH} \end{frame} \begin{frame}[fragile]{Programming resources} \footnotesize \begin{tabular}{ll} Learning statistics with R & {\url{https://learningstatisticswithr.com/book/}} \\ &\\ R for Data Science & {\url{https://r4ds.hadley.nz/}} \\ &\\ Advanced R & {\url{https://adv-r.hadley.nz/}} \\ &\\ Happy Git and GitHub for the useR & {\url{https://happygitwithr.com/}} \\ &\\ R Programming for Research & {\url{https://geanders.github.io/RProgrammingForResearch/}} \\ &\\ Building reproducible analytical pipelines with R & {\url{https://raps-with-r.dev/}} \\ &\\ Data Skills for Reproducible Science & {\url{https://psyteachr.github.io/msc-data-skills/}} \\ \end{tabular} \end{frame} \begin{frame}{Agenda} \centering \begin{tabular}{ll} \hline Date & Topic \\ \hline 2024-05-13 & Introduction to data management \\ 2024-05-27 & Workflow \\ 2024-06-10 & Data organisation\\ 2024-06-24 & Data sharing \\ \only<1>{2024-07-08}\only<2>{\bf 2024-07-08} & \only<1>{Clean coding}\only<2>{\bf Clean coding} \\ 2024-07-22 & Version control \\ \hline \end{tabular} \end{frame} % Understandable coding % Cleaning up R code for readability % Documentation of a final R script % Reproducible code \section{Style guidelines} \begin{frame}[<+->]{Style guidelines in R} \begin{itemize} \item R has no mandatory or commonly accepted style guide \item However, Hadley Wickham and Google developed style guides which are now widely accepted \begin{itemize} \item \url{https://google.github.io/styleguide/Rguide.html} \item \url{https://style.tidyverse.org/} \end{itemize} \item It is always a good idea to follow a style guide and not ``create'' your own rules (if you deviate, be consistent!) \item A style guide helps with \begin{itemize} \item Keeping code clean which is easier to read and interpret \item Making it easier to catch and fix mistakes \item Making it easier for others to follow and adapt your code \item Preventing possible problems, e.\,g., avoiding dots in function names \end{itemize} \end{itemize} \nocite{Wickham_styleguide, Anderson2023} \end{frame} \begin{frame}[fragile, allowframebreaks]{File names} \begin{itemize} \item File names should be meaningful and end in .R \item Avoid using special characters in file names \item Stick with numbers, letters, \verb+-+, and \verb+_+ \begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}] # Good fit_models.R utility_functions.R # Bad fit models.R foo.r stuff.r \end{lstlisting} \framebreak \item If files should be run in a particular order, prefix them with numbers \item If it seems likely you’ll have more than 10 files, left pad with zero \begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}] 00_download.R 01_explore.R ... 09_model.R 10_visualize.R \end{lstlisting} \item If you later realize that you missed some steps, it’s tempting to use 02a, 02b, etc. \item However, it is generally better to bite the bullet and rename all files \end{itemize} \end{frame} \begin{frame}[fragile, allowframebreaks]{Object names} \begin{itemize} \item Variable and function names should use only lowercase letters, numbers, and \verb+_+ \item Use underscores (\verb+_+) (so called snake case) to separate words within a name \begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}] # Good day_one day_1 # Bad DayOne dayone \end{lstlisting} \framebreak \item Generally, variable names should be nouns and function names should be verbs \item Strive for names that are concise and meaningful \begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}] # Good day_one # Bad first_day_of_the_month djm1 \end{lstlisting} \framebreak \item Avoid re-using names of common functions and variables \begin{lstlisting} # Bad T <- FALSE c <- 10 mean <- function(x) sum(x) \end{lstlisting} \end{itemize} \end{frame} \begin{frame}[fragile, allowframebreaks]{Spacing} \begin{itemize} \item Always put a space after a comma, never before \begin{lstlisting} # Good x[, 1] # Bad x[,1] x[ ,1] x[ , 1] \end{lstlisting} \framebreak \item Do not put spaces inside or outside parentheses for regular function calls \begin{lstlisting} # Good mean(x, na.rm = TRUE) # Bad mean (x, na.rm = TRUE) mean( x, na.rm = TRUE ) \end{lstlisting} \framebreak \item Place a space before and after \texttt{()} when used with \texttt{if}, \texttt{for}, or \texttt{while} \begin{lstlisting} # Good if (debug) { show(x) } # Bad if(debug){ show(x) } \end{lstlisting} \framebreak \item Place a space after \texttt{()} used for function arguments \begin{lstlisting} # Good function(x) {} # Bad function (x) {} function(x){} \end{lstlisting} \framebreak \item Most infix operators (\verb+==+, \verb|+|, \verb+-+, \verb+<-+, etc.) should always be surrounded by spaces \begin{lstlisting} # Good height <- (feet * 12) + inches mean(x, na.rm = TRUE) # Bad height<-feet*12+inches mean(x, na.rm=TRUE) \end{lstlisting} \framebreak \item There are a few exceptions, which should never be surrounded by spaces: \verb+::+, \verb+:::+, \verb+$+, \verb+@+, \verb+[+, \verb+[[+, \verb+?+, \verb+^+, and \verb+:+ {\small \begin{lstlisting} # Good sqrt(x^2 + y^2) df$z x <- 1:10 package?stats ?mean # Bad sqrt(x ^ 2 + y ^ 2) df $ z x <- 1 : 10 package ? stats ? mean \end{lstlisting} } \item Adding extra spaces is ok if it improves alignment of \verb+=+ or \verb+<-+ \begin{lstlisting} # Good list( total = a + b + c, mean = (a + b + c) / n ) # Also fine list( total = a + b + c, mean = (a + b + c) / n ) \end{lstlisting} \end{itemize} \end{frame} % CITE: % https://style.tidyverse.org/index.html % R Programming for Reserach: https://geanders.github.io/RProgrammingForResearch/ % Building reproducible analytical pipelines with R: https://raps-with-r.dev/ \section{Script organisation} \begin{frame}[fragile]{Script header} \begin{itemize} \item It can be very helpful to have some general information right at the top when opening a script \begin{lstlisting} # 01_preprocessing.R # # Cleaning up toy data set (Methods Seminar SS2024) # # Input: rawdata/RDM_MS_SS2024_download_2024-06-07.csv # Output: processed/data_rdm-ms-ss2024_cleaned.csv # processed/data_rdm-ms-ss2024_cleaned.RData # # Created: 2024-06-03, NW \end{lstlisting} \item These metadata help you remember faster what you did \item Might not be necessary when using consistent version control (but does not hurt either) \end{itemize} \end{frame} \begin{frame}[fragile]{Line length} {} \begin{center} {\Large\bf Keep lines to 80 characters or less!} \end{center} \begin{lstlisting} # Good my_df <- data.frame(n = 1:3, letter = c("a", "b", "c"), cap_letter = c("A", "B", "C")) # Bad my_df <- data.frame(n = 1:3, letter = c("a", "b", "c"), cap_letter = c("A", "B", "C")) \end{lstlisting} \begin{itemize} \item Ensures that your code is formatted in a way that you can see all of the code without scrolling horizontally \item To set your script pane to be limited to 80 characters, go to\\ \verb+RStudio -> Preferences -> Code -> Display+\\ and set ``Margin Column'' to 80 \end{itemize} \end{frame} \begin{frame}[fragile, allowframebreaks]{File organisation} \begin{itemize} \item Try to write scripts that are concerned with one (major) task \item If you can find a name, that captures the content, it is usually a good way to start \item Some (random) examples \begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}] download-data.R data-cleaning.R cluster_analysis_exp1.R visualization_logistic-model.R anova_h1.R \end{lstlisting} \framebreak \item Export data sets for new scripts (do not make yourself run all scripts up to script 5 each time, just because you need the data in a certain format) \begin{lstlisting} # Interoperable write.table(dat, file = "data_exp1_cleaned.csv", sep = ";", quote = FALSE, row.names = FALSE) # Preserve order of factor levels, date formats, etc. save(dat, file = "data_exp1_cleaned.RData") \end{lstlisting} \end{itemize} \end{frame} \begin{frame}[fragile, allowframebreaks]{Internal structure} \begin{itemize} \item Use commented lines with \texttt{-} or \texttt{=} to break your file up into chunks \item Load additional packages at the beginning of the script \begin{lstlisting} library(lme4) library(sjPlot) # Load data --------------------------- # Plot data --------------------------- \end{lstlisting} \framebreak \item If you load several packages, be aware that the order of loading matters! \item If you use only one or two functions from a package, get the function with \verb+::+ instead of loading the whole package \begin{lstlisting} library(lme4) ... # Fit mixed-effects model to test Hypothesis 1 lme1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) summary(lme1) sjPlot::tab_model(lme1) \end{lstlisting} \framebreak \item Group related pieces of code together \item Separate blocks of code by empty spaces \begin{lstlisting} # Load data library(faraway) data(nepali) # Relabel sex variable nepali$sex <- factor(nepali$sex, levels = c(1, 2), labels = c("Male", "Female")) \end{lstlisting} \end{itemize} \end{frame} \begin{frame}{How can I test if my code is reproducible?} % slido \centering \includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 5} \url{https://app.sli.do/event/uEz8fJWkLBNm1sthQovXNH} \end{frame} \section{Code reviews} \begin{frame}[<+->]{Use your peers} \begin{itemize} \item Do not overthink it! \item Just give your data and code to a colleague and ask them to reproduce what you did (this sounds easy, but it is actually not!) \item This will give you tons of insights about your workflow \begin{itemize} \item Can this person (in general) understand what you did? \item Is this person able to easily put your data on their machine and run the code right away? \item Anything this person would have done differently? \item Discuss why and which things you do differently \end{itemize} \item Reading other peoples's code is the best way to learn about how things can be done differently than you do them \item You can review code by printing it out and adding comments by hand\\ (I highly recommend this!) \end{itemize} \end{frame} \appendix %\begin{frame}[allowframebreaks]{References} \begin{frame}{References} %\renewcommand{\bibfont}{\small} \printbibliography \vfill \end{frame} \end{document}