Slides for fifth session

2024-07-05 10:49:14 +02:00
parent 66c9711d45
commit 7a2327aba3
3 changed files with 529 additions and 12 deletions
@@ -0,0 +1,503 @@
+\documentclass[aspectratio=169]{beamer}
+
+\usepackage{listings}
+%\usepackage[utf8]{inputenc}
+\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
+\addbibresource{../literature/lit.bib}
+
+\usepackage{fancyvrb}
+\usepackage{fontawesome5}                % get icons
+\usepackage{multirow}
+\usepackage{color, colortbl}
+
+\usepackage{tikz}
+\usetikzlibrary{fit}
+\usepackage[edges]{forest}
+
+\lstset{language = R,%
+  basicstyle = \ttfamily\color{iwmgray},
+  frame = single,
+  rulecolor = \color{iwmgray},
+  commentstyle = \slshape\color{iwmgreen},
+  keywordstyle = \bfseries\color{iwmgray},
+  identifierstyle = \color{iwmpurple},
+  stringstyle = \color{iwmblue},
+  numbers = none,%left,numberstyle = \tiny,
+  basewidth = {.5em, .4em},
+  showstringspaces = false,
+  emphstyle = \color{red!50!white}}
+
+\makeatletter \def\newblock{\beamer@newblock} \makeatother
+
+\beamertemplatenavigationsymbolsempty
+\setbeamertemplate{itemize items}[circle]
+\setbeamertemplate{section in toc}[circle]
+\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
+\setbeamercolor{block body}{bg=iwmorange!50!white}
+\setbeamercolor{block title}{fg=white, bg=iwmorange}
+% Definitions for biblatex
+\setbeamercolor{bibliography entry note}{fg=iwmgray}
+\setbeamercolor{bibliography entry author}{fg=iwmgray}
+\setbeamertemplate{bibliography item}{}
+
+\definecolor{iwmorange}{RGB}{255,105,0}
+\definecolor{iwmgray}{RGB}{67,79,79}
+\definecolor{iwmblue}{RGB}{60,180,220}
+\definecolor{iwmgreen}{RGB}{145,200,110}
+\definecolor{iwmpurple}{RGB}{120,0,75}
+
+\setbeamercolor{title}{fg=iwmorange}
+\setbeamercolor{frametitle}{fg=iwmorange}
+\setbeamercolor{structure}{fg=iwmorange}
+\setbeamercolor{normal text}{fg=iwmgray}
+\setbeamercolor{author}{fg=iwmgray}
+\setbeamercolor{date}{fg=iwmgray}
+
+\newcommand{\vect}[1]{\mathbf{#1}}
+\newcommand{\mat}[1]{\mathbf{#1}}
+\newcommand{\gvect}[1]{\boldsymbol{#1}}
+\newcommand{\gmat}[1]{\boldsymbol{#1}}
+
+\AtBeginSection[]{
+  \frame{
+    \tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
+
+\setbeamertemplate{headline}{
+ \begin{beamercolorbox}{section in head}
+   \vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
+ \end{beamercolorbox}
+}
+
+\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
+
+\title{Clean coding}
+\author{Nora Wickelmaier}
+\date{July 8, 2024}
+
+\begin{document}
+
+\begin{frame}{}
+\thispagestyle{empty}
+\titlepage
+\end{frame}
+
+\begin{frame}{What is needed to make code reproducible?}
+  % slido
+  \centering
+  \includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 5}
+
+  \url{https://app.sli.do/event/uEz8fJWkLBNm1sthQovXNH}
+\end{frame}
+
+\begin{frame}[fragile]{Programming resources}
+  \footnotesize
+  \begin{tabular}{ll}
+    Learning statistics with R  & {\url{https://learningstatisticswithr.com/book/}} \\
+    &\\
+    R for Data Science & {\url{https://r4ds.hadley.nz/}} \\
+    &\\
+    Advanced R  & {\url{https://adv-r.hadley.nz/}} \\
+    &\\
+    Happy Git and GitHub for the useR & {\url{https://happygitwithr.com/}} \\
+    &\\
+    R Programming for Research & {\url{https://geanders.github.io/RProgrammingForResearch/}} \\
+    &\\
+    Building reproducible analytical pipelines with R & {\url{https://raps-with-r.dev/}} \\
+    &\\
+    Data Skills for Reproducible Science & {\url{https://psyteachr.github.io/msc-data-skills/}} \\
+  \end{tabular}
+\end{frame}
+
+\begin{frame}{Agenda}
+\centering
+\begin{tabular}{ll}
+\hline
+Date & Topic \\
+\hline
+2024-05-13 & Introduction to data management \\
+2024-05-27 & Workflow \\
+2024-06-10 & Data organisation\\
+2024-06-24 & Data sharing                    \\
+\only<1>{2024-07-08}\only<2>{\bf 2024-07-08} &
+\only<1>{Clean coding}\only<2>{\bf Clean coding} \\
+2024-07-22 & Version control                 \\
+\hline
+\end{tabular}
+\end{frame}
+
+% Understandable coding
+% Cleaning up R code for readability
+% Documentation of a final R script
+% Reproducible code
+
+\section{Style guidelines}
+
+\begin{frame}[<+->]{Style guidelines in R}
+  \begin{itemize}
+    \item R has no mandatory or commonly accepted style guide
+    \item However, Hadley Wickham and Google developed style guides which are
+      now widely accepted
+      \begin{itemize}
+        \item \url{https://google.github.io/styleguide/Rguide.html}
+        \item \url{https://style.tidyverse.org/}
+      \end{itemize}
+    \item It is always a good idea to follow a style guide and not ``create''
+      your own rules (if you deviate, be consistent!)
+    \item A style guide helps with
+      \begin{itemize}
+        \item Keeping code clean which is easier to read and interpret
+        \item Making it easier to catch and fix mistakes
+        \item Making it easier for others to follow and adapt your code
+        \item Preventing possible problems, e.\,g., avoiding dots in function
+          names
+      \end{itemize}
+  \end{itemize}
+  \nocite{Wickham_styleguide, Anderson2023}
+\end{frame}
+
+\begin{frame}[fragile, allowframebreaks]{File names}
+  \begin{itemize}
+    \item File names should be meaningful and end in .R
+    \item Avoid using special characters in file names
+    \item Stick with numbers, letters, \verb+-+, and \verb+_+
+  \begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
+# Good
+fit_models.R
+utility_functions.R
+
+# Bad
+fit models.R
+foo.r
+stuff.r
+  \end{lstlisting}
+      \framebreak
+
+    \item If files should be run in a particular order, prefix them with numbers
+    \item If it seems likely you’ll have more than 10 files, left pad with zero
+  \begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
+00_download.R
+01_explore.R
+...
+09_model.R
+10_visualize.R
+  \end{lstlisting}
+    \item If you later realize that you missed some steps, it’s tempting to use
+      02a, 02b, etc.
+    \item However, it is generally better to bite the bullet and rename all
+      files
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile, allowframebreaks]{Object names}
+  \begin{itemize}
+    \item Variable and function names should use only lowercase letters,
+      numbers, and \verb+_+
+    \item Use underscores (\verb+_+) (so called snake case) to separate words
+      within a name
+  \begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
+# Good
+day_one
+day_1
+
+# Bad
+DayOne
+dayone
+  \end{lstlisting}
+  \framebreak
+
+    \item Generally, variable names should be nouns and function names should be
+      verbs
+    \item Strive for names that are concise and meaningful
+      \begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
+# Good
+day_one
+
+# Bad
+first_day_of_the_month
+djm1
+  \end{lstlisting}
+  \framebreak
+
+    \item Avoid re-using names of common functions and variables
+  \begin{lstlisting}
+# Bad
+T <- FALSE
+c <- 10
+mean <- function(x) sum(x)
+  \end{lstlisting}
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile, allowframebreaks]{Spacing}
+  \begin{itemize}
+    \item Always put a space after a comma, never before
+  \begin{lstlisting}
+# Good
+x[, 1]
+
+# Bad
+x[,1]
+x[ ,1]
+x[ , 1]
+  \end{lstlisting}
+  \framebreak
+
+  \item Do not put spaces inside or outside parentheses for regular function
+    calls
+  \begin{lstlisting}
+# Good
+mean(x, na.rm = TRUE)
+
+# Bad
+mean (x, na.rm = TRUE)
+mean( x, na.rm = TRUE )
+  \end{lstlisting}
+  \framebreak
+
+\item Place a space before and after \texttt{()} when used with \texttt{if},
+  \texttt{for}, or \texttt{while}
+  \begin{lstlisting}
+# Good
+if (debug) {
+  show(x)
+}
+
+# Bad
+if(debug){
+  show(x)
+}
+  \end{lstlisting}
+  \framebreak
+
+\item Place a space after \texttt{()} used for function arguments
+  \begin{lstlisting}
+# Good
+function(x) {}
+
+# Bad
+function (x) {}
+function(x){}
+  \end{lstlisting}
+  \framebreak
+
+    \item Most infix operators (\verb+==+, \verb|+|, \verb+-+, \verb+<-+, etc.)
+      should always be surrounded by spaces
+  \begin{lstlisting}
+# Good
+height <- (feet * 12) + inches
+mean(x, na.rm = TRUE)
+
+# Bad
+height<-feet*12+inches
+mean(x, na.rm=TRUE)
+  \end{lstlisting}
+  \framebreak
+
+    \item There are a few exceptions, which should never be surrounded by
+      spaces: \verb+::+, \verb+:::+, \verb+$+, \verb+@+, \verb+[+, \verb+[[+,
+      \verb+?+, \verb+^+, and \verb+:+
+      {\small
+  \begin{lstlisting}
+# Good
+sqrt(x^2 + y^2)
+df$z
+x <- 1:10
+package?stats
+?mean
+
+# Bad
+sqrt(x ^ 2 + y ^ 2)
+df $ z
+x <- 1 : 10
+package ? stats
+? mean
+  \end{lstlisting}
+  }
+    \item Adding extra spaces is ok if it improves alignment of \verb+=+ or
+      \verb+<-+
+  \begin{lstlisting}
+# Good
+list(
+  total = a + b + c,
+  mean  = (a + b + c) / n
+)
+
+# Also fine
+list(
+  total = a + b + c,
+  mean = (a + b + c) / n
+)
+  \end{lstlisting}
+  \end{itemize}
+\end{frame}
+
+% CITE:
+% https://style.tidyverse.org/index.html
+% R Programming for Reserach: https://geanders.github.io/RProgrammingForResearch/ 
+% Building reproducible analytical pipelines with R: https://raps-with-r.dev/
+
+\section{Script organisation}
+
+\begin{frame}[fragile]{Script header}
+  \begin{itemize}
+    \item It can be very helpful to have some general information right at the
+      top when opening a script
+      \begin{lstlisting}
+# 01_preprocessing.R
+#
+# Cleaning up toy data set (Methods Seminar SS2024)
+#
+# Input:  rawdata/RDM_MS_SS2024_download_2024-06-07.csv
+# Output: processed/data_rdm-ms-ss2024_cleaned.csv
+#         processed/data_rdm-ms-ss2024_cleaned.RData
+#
+# Created: 2024-06-03, NW
+      \end{lstlisting}
+    \item These metadata help you remember faster what you did
+    \item Might not be necessary when using consistent version control (but does
+      not hurt either)
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Line length}
+  {}
+  \begin{center}
+    {\Large\bf Keep lines to 80 characters or less!}
+  \end{center}
+  \begin{lstlisting}
+# Good
+my_df <- data.frame(n = 1:3,
+                    letter = c("a", "b", "c"),
+                    cap_letter = c("A", "B", "C"))
+
+# Bad
+my_df <- data.frame(n = 1:3, letter = c("a", "b", "c"), cap_letter = c("A", "B", "C"))
+  \end{lstlisting}
+  \begin{itemize}
+    \item Ensures that your code is formatted in a way that you can see all of
+      the code without scrolling horizontally
+    \item To set your script pane to be limited to 80 characters, go to\\
+      \verb+RStudio -> Preferences -> Code -> Display+\\
+      and set ``Margin Column'' to 80
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile, allowframebreaks]{File organisation}
+  \begin{itemize}
+    \item Try to write scripts that are concerned with one (major) task
+    \item If you can find a name, that captures the content, it is usually a
+      good way to start
+    \item Some (random) examples
+\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
+download-data.R
+data-cleaning.R
+cluster_analysis_exp1.R
+visualization_logistic-model.R
+anova_h1.R
+  \end{lstlisting}
+  \framebreak
+
+    \item Export data sets for new scripts (do not make yourself run all scripts
+      up to script 5 each time, just because you need the data in a certain
+      format)
+      \begin{lstlisting}
+# Interoperable
+write.table(dat,
+            file = "data_exp1_cleaned.csv",
+            sep = ";",
+            quote = FALSE,
+            row.names = FALSE)
+
+# Preserve order of factor levels, date formats, etc.
+save(dat, file = "data_exp1_cleaned.RData")
+      \end{lstlisting}
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile, allowframebreaks]{Internal structure}
+  \begin{itemize}
+    \item Use commented lines with \texttt{-} or \texttt{=} to break your file
+      up into chunks
+    \item Load additional packages at the beginning of the script
+  \begin{lstlisting}
+library(lme4)
+library(sjPlot)
+
+# Load data ---------------------------
+
+# Plot data ---------------------------
+  \end{lstlisting}
+  \framebreak
+
+    \item If you load several packages, be aware that the order of loading
+      matters!
+    \item If you use only one or two functions from a package, get the function
+      with \verb+::+ instead of loading the whole package
+  \begin{lstlisting}
+library(lme4)
+...
+
+# Fit mixed-effects model to test Hypothesis 1
+lme1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
+summary(lme1)
+sjPlot::tab_model(lme1)
+  \end{lstlisting}
+  \framebreak
+
+    \item Group related pieces of code together
+    \item Separate blocks of code by empty spaces
+  \begin{lstlisting}
+# Load data
+library(faraway)
+data(nepali)
+
+# Relabel sex variable
+nepali$sex <- factor(nepali$sex, 
+                     levels = c(1, 2),
+                     labels = c("Male", "Female"))
+  \end{lstlisting}
+  \end{itemize}
+\end{frame}
+
+
+
+\begin{frame}{How can I test if my code is reproducible?}
+  % slido
+  \centering
+  \includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 5}
+
+  \url{https://app.sli.do/event/uEz8fJWkLBNm1sthQovXNH}
+\end{frame}
+
+\section{Code reviews}
+
+\begin{frame}[<+->]{Use your peers}
+  \begin{itemize}
+    \item Do not overthink it!
+    \item Just give your data and code to a colleague and ask them to reproduce
+      what you did (this sounds easy, but it is actually not!)
+    \item This will give you tons of insights about your workflow
+      \begin{itemize}
+        \item Can this person (in general) understand what you did?
+        \item Is this person able to easily put your data on their machine and
+          run the code right away?
+        \item Anything this person would have done differently?
+        \item Discuss why and which things you do differently
+      \end{itemize}
+    \item Reading other peoples's code is the best way to learn about how things
+      can be done differently than you do them
+    \item You can review code by printing it out and adding comments by hand\\
+      (I highly recommend this!)
+  \end{itemize}
+\end{frame}
+
+\appendix
+%\begin{frame}[allowframebreaks]{References}
+\begin{frame}{References}
+%\renewcommand{\bibfont}{\small}
+  \printbibliography
+\vfill
+\end{frame}
+
+\end{document}
+
@@ -1,3 +1,11 @@
+@book{Anderson2023,
+  title     = {R programming for research},
+  author    = {Brooke Anderson and Rachel Severson and Nicholas Good},
+  year      = {2023},
+  publisher = {Colorado State University, ERHS 535},
+  url       = {https://geanders.github.io/RProgrammingForResearch/}
+}
+
@article{Kathawalla2021,
  title     = {Easing into open science: {A} guide for graduate students and their advisors},
  author    = {Kathawalla, Ummul-Kiram and Silverstein, Priya and Syed, Moin},
@@ -61,6 +69,12 @@
  doi       = {10.1016/j.intell.2012.01.004}
 }

+@misc{Wickham_styleguide,
+  author    = {Hadley Wickham},
+  title     = {The tidyverse style guide},
+  url       = {https://style.tidyverse.org/}
+}
+
@misc{Wilbrandt2023,
  author    = {Wilbrandt, Jeanne},
  title     = {{Research Data Management Intro Series: Coffee Lectures \& Espresso Shots}},