Compare commits

...

2 Commits

Author SHA1 Message Date
7a2327aba3 Slides for fifth session 2024-07-05 10:49:14 +02:00
66c9711d45 Added slides for repositories to fourth session 2024-07-05 10:47:53 +02:00
4 changed files with 607 additions and 35 deletions

View File

@ -279,7 +279,7 @@ Date & Topic \\
\end{itemize}
\vfill
\pause
(Maybe check out the two tips of the week on this topic:
(Maybe check out the three tips of the week on this topic:
{\tiny
\url{https://iwmonline.sharepoint.com/sites/intranet/SitePages/direktorat/en/Interne-Kommunikation.aspx\#tip-of-the-week-tutorial-series}})
\end{frame}
@ -407,7 +407,41 @@ Date & Topic \\
\section[Repositories]{Sharing data in repositories}
\begin{frame}{Data repositories}
\begin{frame}[allowframebreaks]{Data publication}
Which data should I share?
\begin{itemize}
\item In general, all data that are used in publications
\item Data for your dissertation
\item Data that you collected but know that you will never come around to
analyzing
\end{itemize}
\vspace{.3cm}
What are the reasons to share data?
\begin{itemize}
\item Transparency
\item Data safety
\item Cumulative research process
\item Visibility
\end{itemize}
\framebreak
For whom are you sharing data?
\begin{itemize}
\item Yourself
\item Reviewers
\item People who read your papers
\item Other scientists
\item Colleagues and collaboraters
\end{itemize}
\vspace{.3cm}
How should you share your data?
\begin{itemize}
\item On a public platform (or website), i.\,e., no account needed if
possible
\item Together with a codebook or at least an informative README
\end{itemize}
\end{frame}
\begin{frame}{Data repositories (suggested in our Research Data Policy)}
National
\begin{itemize}
\item \url{https://www.psycharchives.org/}
@ -427,6 +461,48 @@ Date & Topic \\
\hfill{\footnotesize \url{https://datamanagement.hms.harvard.edu/share-publish/data-repositories}}
\end{frame}
\begin{frame}[<+->]{Zenodo}{https://zenodo.org/}
\begin{itemize}
\item General-purpose open repository launched in 2015
\item Financed by the EU (European OpenAIRE program)
\item Operated by CERN
\item All disciplines
\item Suitable for
\begin{itemize}
\item Data sets
\item Papers / Preprints
\item Research software
\item Reports
\item Any other digital research objects
\end{itemize}
\item Upload up to 50 GB possible
\item Easily citable since all objects get DOI
\item Open source code is available on Github
\item IWM example: \url{https://doi.org/10.5281/zenodo.2532411}
\end{itemize}
\end{frame}
\begin{frame}[<+->]{PsychArchives}{https://psycharchives.org/}
\begin{itemize}
\item Disciplinary repository for psychological science (and neighboring
disciplines)
\item Developed and operated by ZPID (Leibniz-Institut für Psycholgie)
\item Accommodating 20 different digital research object (DRO) types
\begin{itemize}
\item Articles
\item Preprints
\item Research data
\item Code
\item Supplements
\item Preregistrations
\item \dots
\end{itemize}
\item Searchable by ``IWM'': \url{https://psycharchives.org/en/browse/?q=iwm}
\item Easily citable since all objects get DOI
\item Different objects can be linked together (e.\,g., data und code)
\end{itemize}
\end{frame}
\appendix
%%\begin{frame}[allowframebreaks]{References}
\begin{frame}{References}
@ -435,26 +511,5 @@ Date & Topic \\
\vfill
\end{frame}
\begin{frame}{A codebook should include}
\begin{tabular}{lp{11cm}}
\hline
Variable name & Usually some abbreviation like \texttt{pna01} \\
Variable label & Brief description to identify variable \\
Question text & If applicable, exact wording from survey question \\
Values & Values variable can take (e.\,g, 1 to 5) \\
Value labels & If applicable, textual descriptions of the values \\
Statistics & For example, range, mean, standard deviation for
numeric variables; frequencies and percentages for categorical variables \\
Missing data & If applicable, values and labels of missing data \\
Notes & Additional notes, remarks, or comments; for measures or
questions from copyrighted instruments, the notes field can be used to
cite the source \\
\hline
\end{tabular}
\vfill
\hfill\tiny \url{https://www.icpsr.umich.edu/web/ICPSR/cms/1983}
\end{frame}
\end{document}

View File

@ -0,0 +1,503 @@
\documentclass[aspectratio=169]{beamer}
\usepackage{listings}
%\usepackage[utf8]{inputenc}
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
\addbibresource{../literature/lit.bib}
\usepackage{fancyvrb}
\usepackage{fontawesome5} % get icons
\usepackage{multirow}
\usepackage{color, colortbl}
\usepackage{tikz}
\usetikzlibrary{fit}
\usepackage[edges]{forest}
\lstset{language = R,%
basicstyle = \ttfamily\color{iwmgray},
frame = single,
rulecolor = \color{iwmgray},
commentstyle = \slshape\color{iwmgreen},
keywordstyle = \bfseries\color{iwmgray},
identifierstyle = \color{iwmpurple},
stringstyle = \color{iwmblue},
numbers = none,%left,numberstyle = \tiny,
basewidth = {.5em, .4em},
showstringspaces = false,
emphstyle = \color{red!50!white}}
\makeatletter \def\newblock{\beamer@newblock} \makeatother
\beamertemplatenavigationsymbolsempty
\setbeamertemplate{itemize items}[circle]
\setbeamertemplate{section in toc}[circle]
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
\setbeamercolor{block body}{bg=iwmorange!50!white}
\setbeamercolor{block title}{fg=white, bg=iwmorange}
% Definitions for biblatex
\setbeamercolor{bibliography entry note}{fg=iwmgray}
\setbeamercolor{bibliography entry author}{fg=iwmgray}
\setbeamertemplate{bibliography item}{}
\definecolor{iwmorange}{RGB}{255,105,0}
\definecolor{iwmgray}{RGB}{67,79,79}
\definecolor{iwmblue}{RGB}{60,180,220}
\definecolor{iwmgreen}{RGB}{145,200,110}
\definecolor{iwmpurple}{RGB}{120,0,75}
\setbeamercolor{title}{fg=iwmorange}
\setbeamercolor{frametitle}{fg=iwmorange}
\setbeamercolor{structure}{fg=iwmorange}
\setbeamercolor{normal text}{fg=iwmgray}
\setbeamercolor{author}{fg=iwmgray}
\setbeamercolor{date}{fg=iwmgray}
\newcommand{\vect}[1]{\mathbf{#1}}
\newcommand{\mat}[1]{\mathbf{#1}}
\newcommand{\gvect}[1]{\boldsymbol{#1}}
\newcommand{\gmat}[1]{\boldsymbol{#1}}
\AtBeginSection[]{
\frame{
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
\setbeamertemplate{headline}{
\begin{beamercolorbox}{section in head}
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
\end{beamercolorbox}
}
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
\title{Clean coding}
\author{Nora Wickelmaier}
\date{July 8, 2024}
\begin{document}
\begin{frame}{}
\thispagestyle{empty}
\titlepage
\end{frame}
\begin{frame}{What is needed to make code reproducible?}
% slido
\centering
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 5}
\url{https://app.sli.do/event/uEz8fJWkLBNm1sthQovXNH}
\end{frame}
\begin{frame}[fragile]{Programming resources}
\footnotesize
\begin{tabular}{ll}
Learning statistics with R & {\url{https://learningstatisticswithr.com/book/}} \\
&\\
R for Data Science & {\url{https://r4ds.hadley.nz/}} \\
&\\
Advanced R & {\url{https://adv-r.hadley.nz/}} \\
&\\
Happy Git and GitHub for the useR & {\url{https://happygitwithr.com/}} \\
&\\
R Programming for Research & {\url{https://geanders.github.io/RProgrammingForResearch/}} \\
&\\
Building reproducible analytical pipelines with R & {\url{https://raps-with-r.dev/}} \\
&\\
Data Skills for Reproducible Science & {\url{https://psyteachr.github.io/msc-data-skills/}} \\
\end{tabular}
\end{frame}
\begin{frame}{Agenda}
\centering
\begin{tabular}{ll}
\hline
Date & Topic \\
\hline
2024-05-13 & Introduction to data management \\
2024-05-27 & Workflow \\
2024-06-10 & Data organisation\\
2024-06-24 & Data sharing \\
\only<1>{2024-07-08}\only<2>{\bf 2024-07-08} &
\only<1>{Clean coding}\only<2>{\bf Clean coding} \\
2024-07-22 & Version control \\
\hline
\end{tabular}
\end{frame}
% Understandable coding
% Cleaning up R code for readability
% Documentation of a final R script
% Reproducible code
\section{Style guidelines}
\begin{frame}[<+->]{Style guidelines in R}
\begin{itemize}
\item R has no mandatory or commonly accepted style guide
\item However, Hadley Wickham and Google developed style guides which are
now widely accepted
\begin{itemize}
\item \url{https://google.github.io/styleguide/Rguide.html}
\item \url{https://style.tidyverse.org/}
\end{itemize}
\item It is always a good idea to follow a style guide and not ``create''
your own rules (if you deviate, be consistent!)
\item A style guide helps with
\begin{itemize}
\item Keeping code clean which is easier to read and interpret
\item Making it easier to catch and fix mistakes
\item Making it easier for others to follow and adapt your code
\item Preventing possible problems, e.\,g., avoiding dots in function
names
\end{itemize}
\end{itemize}
\nocite{Wickham_styleguide, Anderson2023}
\end{frame}
\begin{frame}[fragile, allowframebreaks]{File names}
\begin{itemize}
\item File names should be meaningful and end in .R
\item Avoid using special characters in file names
\item Stick with numbers, letters, \verb+-+, and \verb+_+
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
# Good
fit_models.R
utility_functions.R
# Bad
fit models.R
foo.r
stuff.r
\end{lstlisting}
\framebreak
\item If files should be run in a particular order, prefix them with numbers
\item If it seems likely youll have more than 10 files, left pad with zero
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
00_download.R
01_explore.R
...
09_model.R
10_visualize.R
\end{lstlisting}
\item If you later realize that you missed some steps, its tempting to use
02a, 02b, etc.
\item However, it is generally better to bite the bullet and rename all
files
\end{itemize}
\end{frame}
\begin{frame}[fragile, allowframebreaks]{Object names}
\begin{itemize}
\item Variable and function names should use only lowercase letters,
numbers, and \verb+_+
\item Use underscores (\verb+_+) (so called snake case) to separate words
within a name
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
# Good
day_one
day_1
# Bad
DayOne
dayone
\end{lstlisting}
\framebreak
\item Generally, variable names should be nouns and function names should be
verbs
\item Strive for names that are concise and meaningful
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
# Good
day_one
# Bad
first_day_of_the_month
djm1
\end{lstlisting}
\framebreak
\item Avoid re-using names of common functions and variables
\begin{lstlisting}
# Bad
T <- FALSE
c <- 10
mean <- function(x) sum(x)
\end{lstlisting}
\end{itemize}
\end{frame}
\begin{frame}[fragile, allowframebreaks]{Spacing}
\begin{itemize}
\item Always put a space after a comma, never before
\begin{lstlisting}
# Good
x[, 1]
# Bad
x[,1]
x[ ,1]
x[ , 1]
\end{lstlisting}
\framebreak
\item Do not put spaces inside or outside parentheses for regular function
calls
\begin{lstlisting}
# Good
mean(x, na.rm = TRUE)
# Bad
mean (x, na.rm = TRUE)
mean( x, na.rm = TRUE )
\end{lstlisting}
\framebreak
\item Place a space before and after \texttt{()} when used with \texttt{if},
\texttt{for}, or \texttt{while}
\begin{lstlisting}
# Good
if (debug) {
show(x)
}
# Bad
if(debug){
show(x)
}
\end{lstlisting}
\framebreak
\item Place a space after \texttt{()} used for function arguments
\begin{lstlisting}
# Good
function(x) {}
# Bad
function (x) {}
function(x){}
\end{lstlisting}
\framebreak
\item Most infix operators (\verb+==+, \verb|+|, \verb+-+, \verb+<-+, etc.)
should always be surrounded by spaces
\begin{lstlisting}
# Good
height <- (feet * 12) + inches
mean(x, na.rm = TRUE)
# Bad
height<-feet*12+inches
mean(x, na.rm=TRUE)
\end{lstlisting}
\framebreak
\item There are a few exceptions, which should never be surrounded by
spaces: \verb+::+, \verb+:::+, \verb+$+, \verb+@+, \verb+[+, \verb+[[+,
\verb+?+, \verb+^+, and \verb+:+
{\small
\begin{lstlisting}
# Good
sqrt(x^2 + y^2)
df$z
x <- 1:10
package?stats
?mean
# Bad
sqrt(x ^ 2 + y ^ 2)
df $ z
x <- 1 : 10
package ? stats
? mean
\end{lstlisting}
}
\item Adding extra spaces is ok if it improves alignment of \verb+=+ or
\verb+<-+
\begin{lstlisting}
# Good
list(
total = a + b + c,
mean = (a + b + c) / n
)
# Also fine
list(
total = a + b + c,
mean = (a + b + c) / n
)
\end{lstlisting}
\end{itemize}
\end{frame}
% CITE:
% https://style.tidyverse.org/index.html
% R Programming for Reserach: https://geanders.github.io/RProgrammingForResearch/
% Building reproducible analytical pipelines with R: https://raps-with-r.dev/
\section{Script organisation}
\begin{frame}[fragile]{Script header}
\begin{itemize}
\item It can be very helpful to have some general information right at the
top when opening a script
\begin{lstlisting}
# 01_preprocessing.R
#
# Cleaning up toy data set (Methods Seminar SS2024)
#
# Input: rawdata/RDM_MS_SS2024_download_2024-06-07.csv
# Output: processed/data_rdm-ms-ss2024_cleaned.csv
# processed/data_rdm-ms-ss2024_cleaned.RData
#
# Created: 2024-06-03, NW
\end{lstlisting}
\item These metadata help you remember faster what you did
\item Might not be necessary when using consistent version control (but does
not hurt either)
\end{itemize}
\end{frame}
\begin{frame}[fragile]{Line length}
{}
\begin{center}
{\Large\bf Keep lines to 80 characters or less!}
\end{center}
\begin{lstlisting}
# Good
my_df <- data.frame(n = 1:3,
letter = c("a", "b", "c"),
cap_letter = c("A", "B", "C"))
# Bad
my_df <- data.frame(n = 1:3, letter = c("a", "b", "c"), cap_letter = c("A", "B", "C"))
\end{lstlisting}
\begin{itemize}
\item Ensures that your code is formatted in a way that you can see all of
the code without scrolling horizontally
\item To set your script pane to be limited to 80 characters, go to\\
\verb+RStudio -> Preferences -> Code -> Display+\\
and set ``Margin Column'' to 80
\end{itemize}
\end{frame}
\begin{frame}[fragile, allowframebreaks]{File organisation}
\begin{itemize}
\item Try to write scripts that are concerned with one (major) task
\item If you can find a name, that captures the content, it is usually a
good way to start
\item Some (random) examples
\begin{lstlisting}[identifierstyle = \bfseries\color{iwmgray}]
download-data.R
data-cleaning.R
cluster_analysis_exp1.R
visualization_logistic-model.R
anova_h1.R
\end{lstlisting}
\framebreak
\item Export data sets for new scripts (do not make yourself run all scripts
up to script 5 each time, just because you need the data in a certain
format)
\begin{lstlisting}
# Interoperable
write.table(dat,
file = "data_exp1_cleaned.csv",
sep = ";",
quote = FALSE,
row.names = FALSE)
# Preserve order of factor levels, date formats, etc.
save(dat, file = "data_exp1_cleaned.RData")
\end{lstlisting}
\end{itemize}
\end{frame}
\begin{frame}[fragile, allowframebreaks]{Internal structure}
\begin{itemize}
\item Use commented lines with \texttt{-} or \texttt{=} to break your file
up into chunks
\item Load additional packages at the beginning of the script
\begin{lstlisting}
library(lme4)
library(sjPlot)
# Load data ---------------------------
# Plot data ---------------------------
\end{lstlisting}
\framebreak
\item If you load several packages, be aware that the order of loading
matters!
\item If you use only one or two functions from a package, get the function
with \verb+::+ instead of loading the whole package
\begin{lstlisting}
library(lme4)
...
# Fit mixed-effects model to test Hypothesis 1
lme1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
summary(lme1)
sjPlot::tab_model(lme1)
\end{lstlisting}
\framebreak
\item Group related pieces of code together
\item Separate blocks of code by empty spaces
\begin{lstlisting}
# Load data
library(faraway)
data(nepali)
# Relabel sex variable
nepali$sex <- factor(nepali$sex,
levels = c(1, 2),
labels = c("Male", "Female"))
\end{lstlisting}
\end{itemize}
\end{frame}
\begin{frame}{How can I test if my code is reproducible?}
% slido
\centering
\includegraphics[width = 5cm]{../figures/QR Code for Methodenseminar SS 2024 - Session 5}
\url{https://app.sli.do/event/uEz8fJWkLBNm1sthQovXNH}
\end{frame}
\section{Code reviews}
\begin{frame}[<+->]{Use your peers}
\begin{itemize}
\item Do not overthink it!
\item Just give your data and code to a colleague and ask them to reproduce
what you did (this sounds easy, but it is actually not!)
\item This will give you tons of insights about your workflow
\begin{itemize}
\item Can this person (in general) understand what you did?
\item Is this person able to easily put your data on their machine and
run the code right away?
\item Anything this person would have done differently?
\item Discuss why and which things you do differently
\end{itemize}
\item Reading other peoples's code is the best way to learn about how things
can be done differently than you do them
\item You can review code by printing it out and adding comments by hand\\
(I highly recommend this!)
\end{itemize}
\end{frame}
\appendix
%\begin{frame}[allowframebreaks]{References}
\begin{frame}{References}
%\renewcommand{\bibfont}{\small}
\printbibliography
\vfill
\end{frame}
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

View File

@ -1,3 +1,11 @@
@book{Anderson2023,
title = {R programming for research},
author = {Brooke Anderson and Rachel Severson and Nicholas Good},
year = {2023},
publisher = {Colorado State University, ERHS 535},
url = {https://geanders.github.io/RProgrammingForResearch/}
}
@article{Kathawalla2021,
title = {Easing into open science: {A} guide for graduate students and their advisors},
author = {Kathawalla, Ummul-Kiram and Silverstein, Priya and Syed, Moin},
@ -61,6 +69,12 @@
doi = {10.1016/j.intell.2012.01.004}
}
@misc{Wickham_styleguide,
author = {Hadley Wickham},
title = {The tidyverse style guide},
url = {https://style.tidyverse.org/}
}
@misc{Wilbrandt2023,
author = {Wilbrandt, Jeanne},
title = {{Research Data Management Intro Series: Coffee Lectures \& Espresso Shots}},