Slides for fifth session

This commit is contained in:
Nora Wickelmaier 2024-07-05 10:49:14 +02:00
parent 66c9711d45
commit 7a2327aba3
3 changed files with 529 additions and 12 deletions

View File

@ -0,0 +1,503 @@
\documentclass[aspectratio=169]{beamer}
\usepackage{listings}
%\usepackage[utf8]{inputenc}
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
\addbibresource{../literature/lit.bib}
\usepackage{fancyvrb}
\usepackage{fontawesome5} % get icons
\usepackage{multirow}
\usepackage{color, colortbl}
\usepackage{tikz}
\usetikzlibrary{fit}
\usepackage[edges]{forest}
\lstset{language = R,%
basicstyle = \ttfamily\color{iwmgray},
frame = single,
rulecolor = \color{iwmgray},
commentstyle = \slshape\color{iwmgreen},
keywordstyle = \bfseries\color{iwmgray},
identifierstyle = \color{iwmpurple},
stringstyle = \color{iwmblue},
numbers = none,%left,numberstyle = \tiny,
basewidth = {.5em, .4em},
showstringspaces = false,
emphstyle = \color{red!50!white}}
\makeatletter \def\newblock{\beamer@newblock} \makeatother
\beamertemplatenavigationsymbolsempty
\setbeamertemplate{itemize items}[circle]
\setbeamertemplate{section in toc}[circle]
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
\setbeamercolor{block body}{bg=iwmorange!50!white}
\setbeamercolor{block title}{fg=white, bg=iwmorange}
% Definitions for biblatex
\setbeamercolor{bibliography entry note}{fg=iwmgray}
\setbeamercolor{bibliography entry author}{fg=iwmgray}
\setbeamertemplate{bibliography item}{}
\definecolor{iwmorange}{RGB}{255,105,0}
\definecolor{iwmgray}{RGB}{67,79,79}
\definecolor{iwmblue}{RGB}{60,180,220}
\definecolor{iwmgreen}{RGB}{145,200,110}
\definecolor{iwmpurple}{RGB}{120,0,75}
\setbeamercolor{title}{fg=iwmorange}
\setbeamercolor{frametitle}{fg=iwmorange}
\setbeamercolor{structure}{fg=iwmorange}
\setbeamercolor{normal text}{fg=iwmgray}
\setbeamercolor{author}{fg=iwmgray}
\setbeamercolor{date}{fg=iwmgray}
\newcommand{\vect}[1]{\mathbf{#1}}
\newcommand{\mat}[1]{\mathbf{#1}}
\newcommand{\gvect}[1]{\boldsymbol{#1}}
\newcommand{\gmat}[1]{\boldsymbol{#1}}
\AtBeginSection[]{
\frame{
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
\setbeamertemplate{headline}{
\begin{beamercolorbox}{section in head}
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
\end{beamercolorbox}
}
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
\title{Clean coding}
\author{Nora Wickelmaier}
\date{July 8, 2024}
\begin{document}
\begin{frame}{}
\thispagestyle{empty}
\titlepage
\end{frame}
\begin{frame}{What is needed to make code reproducible?}
% slido
\centering
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 5}
\url{https://app.sli.do/event/uEz8fJWkLBNm1sthQovXNH}
\end{frame}
\begin{frame}[fragile]{Programming resources}
\footnotesize
\begin{tabular}{ll}
Learning statistics with R & {\url{https://learningstatisticswithr.com/book/}} \\
&\\
R for Data Science & {\url{https://r4ds.hadley.nz/}} \\
&\\
Advanced R & {\url{https://adv-r.hadley.nz/}} \\
&\\
Happy Git and GitHub for the useR & {\url{https://happygitwithr.com/}} \\
&\\
R Programming for Research & {\url{https://geanders.github.io/RProgrammingForResearch/}} \\
&\\
Building reproducible analytical pipelines with R & {\url{https://raps-with-r.dev/}} \\
&\\
Data Skills for Reproducible Science & {\url{https://psyteachr.github.io/msc-data-skills/}} \\
\end{tabular}
\end{frame}
\begin{frame}{Agenda}
\centering
\begin{tabular}{ll}
\hline
Date & Topic \\
\hline
2024-05-13 & Introduction to data management \\
2024-05-27 & Workflow \\
2024-06-10 & Data organisation\\
2024-06-24 & Data sharing \\
\only<1>{2024-07-08}\only<2>{\bf 2024-07-08} &
\only<1>{Clean coding}\only<2>{\bf Clean coding} \\
2024-07-22 & Version control \\
\hline
\end{tabular}
\end{frame}
% Understandable coding
% Cleaning up R code for readability
% Documentation of a final R script
% Reproducible code
\section{Style guidelines}
\begin{frame}[<+->]{Style guidelines in R}
\begin{itemize}
\item R has no mandatory or commonly accepted style guide
\item However, Hadley Wickham and Google developed style guides which are
now widely accepted
\begin{itemize}
\item \url{https://google.github.io/styleguide/Rguide.html}
\item \url{https://style.tidyverse.org/}
\end{itemize}
\item It is always a good idea to follow a style guide and not ``create''
your own rules (if you deviate, be consistent!)
\item A style guide helps with
\begin{itemize}
\item Keeping code clean which is easier to read and interpret
\item Making it easier to catch and fix mistakes
\item Making it easier for others to follow and adapt your code
\item Preventing possible problems, e.\,g., avoiding dots in function
names
\end{itemize}
\end{itemize}
\nocite{Wickham_styleguide, Anderson2023}
\end{frame}
\begin{frame}[fragile, allowframebreaks]{File names}
\begin{itemize}
\item File names should be meaningful and end in .R
\item Avoid using special characters in file names
\item Stick with numbers, letters, \verb+-+, and \verb+_+
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
# Good
fit_models.R
utility_functions.R
# Bad
fit models.R
foo.r
stuff.r
\end{lstlisting}
\framebreak
\item If files should be run in a particular order, prefix them with numbers
\item If it seems likely youll have more than 10 files, left pad with zero
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
00_download.R
01_explore.R
...
09_model.R
10_visualize.R
\end{lstlisting}
\item If you later realize that you missed some steps, its tempting to use
02a, 02b, etc.
\item However, it is generally better to bite the bullet and rename all
files
\end{itemize}
\end{frame}
\begin{frame}[fragile, allowframebreaks]{Object names}
\begin{itemize}
\item Variable and function names should use only lowercase letters,
numbers, and \verb+_+
\item Use underscores (\verb+_+) (so called snake case) to separate words
within a name
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
# Good
day_one
day_1
# Bad
DayOne
dayone
\end{lstlisting}
\framebreak
\item Generally, variable names should be nouns and function names should be
verbs
\item Strive for names that are concise and meaningful
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
# Good
day_one
# Bad
first_day_of_the_month
djm1
\end{lstlisting}
\framebreak
\item Avoid re-using names of common functions and variables
\begin{lstlisting}
# Bad
T <- FALSE
c <- 10
mean <- function(x) sum(x)
\end{lstlisting}
\end{itemize}
\end{frame}
\begin{frame}[fragile, allowframebreaks]{Spacing}
\begin{itemize}
\item Always put a space after a comma, never before
\begin{lstlisting}
# Good
x[, 1]
# Bad
x[,1]
x[ ,1]
x[ , 1]
\end{lstlisting}
\framebreak
\item Do not put spaces inside or outside parentheses for regular function
calls
\begin{lstlisting}
# Good
mean(x, na.rm = TRUE)
# Bad
mean (x, na.rm = TRUE)
mean( x, na.rm = TRUE )
\end{lstlisting}
\framebreak
\item Place a space before and after \texttt{()} when used with \texttt{if},
\texttt{for}, or \texttt{while}
\begin{lstlisting}
# Good
if (debug) {
show(x)
}
# Bad
if(debug){
show(x)
}
\end{lstlisting}
\framebreak
\item Place a space after \texttt{()} used for function arguments
\begin{lstlisting}
# Good
function(x) {}
# Bad
function (x) {}
function(x){}
\end{lstlisting}
\framebreak
\item Most infix operators (\verb+==+, \verb|+|, \verb+-+, \verb+<-+, etc.)
should always be surrounded by spaces
\begin{lstlisting}
# Good
height <- (feet * 12) + inches
mean(x, na.rm = TRUE)
# Bad
height<-feet*12+inches
mean(x, na.rm=TRUE)
\end{lstlisting}
\framebreak
\item There are a few exceptions, which should never be surrounded by
spaces: \verb+::+, \verb+:::+, \verb+$+, \verb+@+, \verb+[+, \verb+[[+,
\verb+?+, \verb+^+, and \verb+:+
{\small
\begin{lstlisting}
# Good
sqrt(x^2 + y^2)
df$z
x <- 1:10
package?stats
?mean
# Bad
sqrt(x ^ 2 + y ^ 2)
df $ z
x <- 1 : 10
package ? stats
? mean
\end{lstlisting}
}
\item Adding extra spaces is ok if it improves alignment of \verb+=+ or
\verb+<-+
\begin{lstlisting}
# Good
list(
total = a + b + c,
mean = (a + b + c) / n
)
# Also fine
list(
total = a + b + c,
mean = (a + b + c) / n
)
\end{lstlisting}
\end{itemize}
\end{frame}
% CITE:
% https://style.tidyverse.org/index.html
% R Programming for Reserach: https://geanders.github.io/RProgrammingForResearch/
% Building reproducible analytical pipelines with R: https://raps-with-r.dev/
\section{Script organisation}
\begin{frame}[fragile]{Script header}
\begin{itemize}
\item It can be very helpful to have some general information right at the
top when opening a script
\begin{lstlisting}
# 01_preprocessing.R
#
# Cleaning up toy data set (Methods Seminar SS2024)
#
# Input: rawdata/RDM_MS_SS2024_download_2024-06-07.csv
# Output: processed/data_rdm-ms-ss2024_cleaned.csv
# processed/data_rdm-ms-ss2024_cleaned.RData
#
# Created: 2024-06-03, NW
\end{lstlisting}
\item These metadata help you remember faster what you did
\item Might not be necessary when using consistent version control (but does
not hurt either)
\end{itemize}
\end{frame}
\begin{frame}[fragile]{Line length}
{}
\begin{center}
{\Large\bf Keep lines to 80 characters or less!}
\end{center}
\begin{lstlisting}
# Good
my_df <- data.frame(n = 1:3,
letter = c("a", "b", "c"),
cap_letter = c("A", "B", "C"))
# Bad
my_df <- data.frame(n = 1:3, letter = c("a", "b", "c"), cap_letter = c("A", "B", "C"))
\end{lstlisting}
\begin{itemize}
\item Ensures that your code is formatted in a way that you can see all of
the code without scrolling horizontally
\item To set your script pane to be limited to 80 characters, go to\\
\verb+RStudio -> Preferences -> Code -> Display+\\
and set ``Margin Column'' to 80
\end{itemize}
\end{frame}
\begin{frame}[fragile, allowframebreaks]{File organisation}
\begin{itemize}
\item Try to write scripts that are concerned with one (major) task
\item If you can find a name, that captures the content, it is usually a
good way to start
\item Some (random) examples
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
download-data.R
data-cleaning.R
cluster_analysis_exp1.R
visualization_logistic-model.R
anova_h1.R
\end{lstlisting}
\framebreak
\item Export data sets for new scripts (do not make yourself run all scripts
up to script 5 each time, just because you need the data in a certain
format)
\begin{lstlisting}
# Interoperable
write.table(dat,
file = "data_exp1_cleaned.csv",
sep = ";",
quote = FALSE,
row.names = FALSE)
# Preserve order of factor levels, date formats, etc.
save(dat, file = "data_exp1_cleaned.RData")
\end{lstlisting}
\end{itemize}
\end{frame}
\begin{frame}[fragile, allowframebreaks]{Internal structure}
\begin{itemize}
\item Use commented lines with \texttt{-} or \texttt{=} to break your file
up into chunks
\item Load additional packages at the beginning of the script
\begin{lstlisting}
library(lme4)
library(sjPlot)
# Load data ---------------------------
# Plot data ---------------------------
\end{lstlisting}
\framebreak
\item If you load several packages, be aware that the order of loading
matters!
\item If you use only one or two functions from a package, get the function
with \verb+::+ instead of loading the whole package
\begin{lstlisting}
library(lme4)
...
# Fit mixed-effects model to test Hypothesis 1
lme1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
summary(lme1)
sjPlot::tab_model(lme1)
\end{lstlisting}
\framebreak
\item Group related pieces of code together
\item Separate blocks of code by empty spaces
\begin{lstlisting}
# Load data
library(faraway)
data(nepali)
# Relabel sex variable
nepali$sex <- factor(nepali$sex,
levels = c(1, 2),
labels = c("Male", "Female"))
\end{lstlisting}
\end{itemize}
\end{frame}
\begin{frame}{How can I test if my code is reproducible?}
% slido
\centering
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 5}
\url{https://app.sli.do/event/uEz8fJWkLBNm1sthQovXNH}
\end{frame}
\section{Code reviews}
\begin{frame}[<+->]{Use your peers}
\begin{itemize}
\item Do not overthink it!
\item Just give your data and code to a colleague and ask them to reproduce
what you did (this sounds easy, but it is actually not!)
\item This will give you tons of insights about your workflow
\begin{itemize}
\item Can this person (in general) understand what you did?
\item Is this person able to easily put your data on their machine and
run the code right away?
\item Anything this person would have done differently?
\item Discuss why and which things you do differently
\end{itemize}
\item Reading other peoples's code is the best way to learn about how things
can be done differently than you do them
\item You can review code by printing it out and adding comments by hand\\
(I highly recommend this!)
\end{itemize}
\end{frame}
\appendix
%\begin{frame}[allowframebreaks]{References}
\begin{frame}{References}
%\renewcommand{\bibfont}{\small}
\printbibliography
\vfill
\end{frame}
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

View File

@ -1,3 +1,11 @@
@book{Anderson2023,
title = {R programming for research},
author = {Brooke Anderson and Rachel Severson and Nicholas Good},
year = {2023},
publisher = {Colorado State University, ERHS 535},
url = {https://geanders.github.io/RProgrammingForResearch/}
}
@article{Kathawalla2021, @article{Kathawalla2021,
title = {Easing into open science: {A} guide for graduate students and their advisors}, title = {Easing into open science: {A} guide for graduate students and their advisors},
author = {Kathawalla, Ummul-Kiram and Silverstein, Priya and Syed, Moin}, author = {Kathawalla, Ummul-Kiram and Silverstein, Priya and Syed, Moin},
@ -9,11 +17,11 @@
} }
@book{Koeller2004, @book{Koeller2004,
title = {Wege zur {H}ochschulreife in {B}aden-{W}{\"u}rttemberg: {TOSCA} -- {E}ine {U}ntersuchung an allgemein bildenden und beruflichen {G}ymnasien}, title = {Wege zur {H}ochschulreife in {B}aden-{W}{\"u}rttemberg: {TOSCA} -- {E}ine {U}ntersuchung an allgemein bildenden und beruflichen {G}ymnasien},
author = {K{\"o}ller, Olaf and Watermann, Ralf and Trautwein, Ulrich and L{\"u}dtke, Oliver}, author = {K{\"o}ller, Olaf and Watermann, Ralf and Trautwein, Ulrich and L{\"u}dtke, Oliver},
year = {2004}, year = {2004},
publisher = {Springer}, publisher = {Springer},
doi = {10.1007/978-3-322-80906-3} doi = {10.1007/978-3-322-80906-3}
} }
@article{Lowndes2017, @article{Lowndes2017,
@ -51,14 +59,20 @@
} }
@article{Wicherts2012, @article{Wicherts2012,
title = {Publish (your data) or (let the data) perish! {W}hy not publish your data too?}, title = {Publish (your data) or (let the data) perish! {W}hy not publish your data too?},
author = {Wicherts, Jelte M and Bakker, Marjan}, author = {Wicherts, Jelte M and Bakker, Marjan},
journal = {Intelligence}, journal = {Intelligence},
volume = {40}, volume = {40},
number = {2}, number = {2},
pages = {73--76}, pages = {73--76},
year = {2012}, year = {2012},
doi = {10.1016/j.intell.2012.01.004} doi = {10.1016/j.intell.2012.01.004}
}
@misc{Wickham_styleguide,
author = {Hadley Wickham},
title = {The tidyverse style guide},
url = {https://style.tidyverse.org/}
} }
@misc{Wilbrandt2023, @misc{Wilbrandt2023,