Add table with demographics

2025-10-14 16:50:46 +02:00 · 2025-10-14 16:50:46 +02:00 · 79629447c9
commit 79629447c9
parent 9fa99a1d46
3 changed files with 252 additions and 57 deletions
--- a/manuscript.Rnw
+++ b/manuscript.Rnw
@ -1,5 +1,6 @@
 \documentclass{article}
 \usepackage[margin = 2.4cm]{geometry}
 \usepackage[utf8]{inputenc}
 \usepackage{Sweave}
 \usepackage{authblk}
@ -62,27 +63,6 @@ the Leibniz-Institut für Wissensmedien in Tübingen (IWM). There are several (h
 Thus, this data descriptor may be used to examine research questions across the individual work packages, the possibility to extract and analyze specific subgroups or individual trajectories ignored in the work packages. 
 Because the data set was collected shortly before the public release of Apple Intelligence on consumer devices, it offers a timely snapshot of user attitudes and behaviors at a pivotal moment in AI adoption. This context enhances the relevance of the data for understanding emerging patterns in human-AI interaction. Moreover, the findings may provide early indicators of how psychological variables such as trust, perceived usefulness, and willingness to delegate tasks relate to AI usage, potentially offering prognosis of similar developments in other countries.
 % WP1 Teresa/Nico/Vanessa/Angelica https://osf.io/58tqc
 %
 % WP2 Teresa
 %
 % WP3 Sonja https://aspredicted.org/4g3d-rqkt.pdf
 %
 % WP4 Büsra https://aspredicted.org/m6zv9.pdf
 %
 % WP5 Büsra/Teresa https://aspredicted.org/kx5r-4pxq.pdf
 %
 % WP6 Angelica/Gerrit https://doi.org/10.17605/OSF.IO/JAUD4
 %
 % WP7 Mike ???
 %
 % WP8 Steffi/Sonja https://osf.io/f3jyc?view_only=d8d009e575c64dc2bd453f969c3cb7b1
 %
 % WP9 Steffi https://osf.io/h5fwe?view_only=8c5bc9e62074469ebdb3d72b38f4716d
 %
 % --> Are these all WPs? Are there any missing?
 % Previous Publications
 %
 % * Cite any previous publications that utilized these data, in whole or in part
@ -185,6 +165,223 @@ Moreover, a codebook explaining variable abbreviations and containing informatio
 %  --> an overview about all variables, their calculation, their measurement format and ideally their M, SD, cronbachs alpha would be ideal!
 Table~\ref{tab:demographics} provides an overwiev of the demographic variables
 over all six waves.
 <<echo = false, results = tex>>=
 # Read data
 dat1 <- read.csv("../data/03_cleaned_data/HMC_wave1_cleaned.csv")
 dat2 <- read.csv("../data/03_cleaned_data/HMC_wave2_cleaned.csv")
 dat3 <- read.csv("../data/03_cleaned_data/HMC_wave3_cleaned.csv")
 dat4 <- read.csv("../data/03_cleaned_data/HMC_wave4_cleaned.csv")
 dat5 <- read.csv("../data/03_cleaned_data/HMC_wave5_cleaned.csv")
 dat6 <- read.csv("../data/03_cleaned_data/HMC_wave6_cleaned.csv")
 subj_id_w2 <- unique(dat2$subj_id)
 subj_id_w3 <- unique(dat3$subj_id)
 subj_id_w4 <- unique(dat4$subj_id)
 subj_id_w5 <- unique(dat5$subj_id)
 subj_id_w6 <- unique(dat6$subj_id)
 # Demographics were collected in wave 1
 dat <- subset(dat1, select = c(subj_id, age, gender, education, income,
  apple_use, apple_spprt_SiriAI, apple_AI_intent_use, use))
 dat$gender <- factor(dat$gender,
  levels = 1:4,
  labels = c("Male", "Female", "Non-binary / third gender",
             "Prefer not to say"))
 # dat$education <- factor(dat$education,
 #   levels = 1:7,
 #   labels = c("Some high school or less",
 #              "High school diploma or GED",
 #              "Some college, but no degree",
 #              "Associates or technical degree",
 #              "Bachelor's degree",
 #              "Graduate or professional degree (MA, MS, MBA, PhD, JD, MD, DDS etc.)",
 #              "Prefer not to say"))
 # 
 # dat$income <- factor(dat$income,
 #   levels = 1:7,
 #   labels = c("Less than $25,000",
 #              "$25,000-$49,999",
 #              "$50,000-$74,999",
 #              "$75,000-$99,999",
 #              "$100,000-$149,999",
 #              "$150,000 or more",
 #              "Prefer not to say"))
 # TODO: What to do about these? Reported means in table, since it is to detailed
 # otherwise - but is this what we want?
 dat$use <- factor(dat$use,
  levels = 1:2,
  labels = c("user", "noUser"))
 # TODO: Put in separate table? Left out for now!
 dat$apple_use <- factor(dat$apple_use,
  levels = 1:2,
  labels = c("Yes", "No"))
 dat$apple_spprt_SiriAI <- factor(dat$apple_spprt_SiriAI,
  levels = 1:3,
  labels = c("Yes", "No", "I don't know"))
 dat$apple_AI_intent_use <- factor(dat$apple_AI_intent_use,
  levels = 1:3,
  labels = c("Yes", "No", "Maybe"))
 # Create table for demographics
 tab_demo <- matrix(NA, nrow = 6, ncol = 8)
 rownames(tab_demo) <- paste("wave", 1:6)
 colnames(tab_demo) <- c("Total N", "User", "Male", "Female", "Other", "Age M(SD)", "Education M(SD)",
                        "Income M(SD)")
 tab_demo[, 1] <- c(nrow(dat1), nrow(dat2), nrow(dat3), nrow(dat4), nrow(dat5),
                   nrow(dat6))
 tab_demo[, 2] <- c(
  paste0(sprintf(fmt = "%.2f", dat |> subset(use == "user") |> nrow() / dat |> nrow()), "%"),
  paste0(sprintf(fmt = "%.2f", dat |> subset(use == "user" & subj_id %in% subj_id_w2) |> nrow() / dat |> subset(subj_id %in% subj_id_w2) |> nrow()), "%"),
  paste0(sprintf(fmt = "%.2f", dat |> subset(use == "user" & subj_id %in% subj_id_w3) |> nrow() / dat |> subset(subj_id %in% subj_id_w3) |> nrow()), "%"),
  paste0(sprintf(fmt = "%.2f", dat |> subset(use == "user" & subj_id %in% subj_id_w4) |> nrow() / dat |> subset(subj_id %in% subj_id_w4) |> nrow()), "%"),
  paste0(sprintf(fmt = "%.2f", dat |> subset(use == "user" & subj_id %in% subj_id_w5) |> nrow() / dat |> subset(subj_id %in% subj_id_w5) |> nrow()), "%"),
  paste0(sprintf(fmt = "%.2f", dat |> subset(use == "user" & subj_id %in% subj_id_w6) |> nrow() / dat |> subset(subj_id %in% subj_id_w6) |> nrow()), "%")
 )
 tab_demo[, 3] <- c(dat |> subset(gender == "Male") |> nrow(),
  dat |> subset(gender == "Male" & subj_id %in% subj_id_w2) |> nrow(),
  dat |> subset(gender == "Male" & subj_id %in% subj_id_w3) |> nrow(),
  dat |> subset(gender == "Male" & subj_id %in% subj_id_w4) |> nrow(),
  dat |> subset(gender == "Male" & subj_id %in% subj_id_w5) |> nrow(),
  dat |> subset(gender == "Male" & subj_id %in% subj_id_w6) |> nrow()
 )
 tab_demo[, 4] <- c(dat |> subset(gender == "Female") |> nrow(),
  dat |> subset(gender == "Female" & subj_id %in% subj_id_w2) |> nrow(),
  dat |> subset(gender == "Female" & subj_id %in% subj_id_w3) |> nrow(),
  dat |> subset(gender == "Female" & subj_id %in% subj_id_w4) |> nrow(),
  dat |> subset(gender == "Female" & subj_id %in% subj_id_w5) |> nrow(),
  dat |> subset(gender == "Female" & subj_id %in% subj_id_w6) |> nrow()
 )
 tab_demo[, 5] <- c(dat |> subset(gender %in% c("Non-binary / third gender",
                                               "Prefer not to say")) |> nrow(),
  dat |> subset(gender %in% c("Non-binary / third gender", "Prefer not to say") & subj_id %in% subj_id_w2) |> nrow(),
  dat |> subset(gender %in% c("Non-binary / third gender", "Prefer not to say") & subj_id %in% subj_id_w3) |> nrow(),
  dat |> subset(gender %in% c("Non-binary / third gender", "Prefer not to say") & subj_id %in% subj_id_w4) |> nrow(),
  dat |> subset(gender %in% c("Non-binary / third gender", "Prefer not to say") & subj_id %in% subj_id_w5) |> nrow(),
  dat |> subset(gender %in% c("Non-binary / third gender", "Prefer not to say") & subj_id %in% subj_id_w6) |> nrow()
 )
 tab_demo[, 6] <- c(
  paste0(dat$age |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         dat$age |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w2)$age |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w2)$age |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w3)$age |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w3)$age |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w4)$age |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w4)$age |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w5)$age |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w5)$age |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w6)$age |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w6)$age |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  )
 )
 tab_demo[, 7] <- c(
  paste0(dat$education |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         dat$education |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w2)$education |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w2)$education |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w3)$education |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w3)$education |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w4)$education |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w4)$education |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w5)$education |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w5)$education |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w6)$education |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w6)$education |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  )
 )
 tab_demo[, 8] <- c(
  paste0(dat$income |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         dat$income |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w2)$income |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w2)$income |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w3)$income |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w3)$income |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w4)$income |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w4)$income |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w5)$income |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w5)$income |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  ),
  paste0(subset(dat, subj_id %in% subj_id_w6)$income |> mean() |> sprintf(fmt = "%.2f"),
         " (",
         subset(dat, subj_id %in% subj_id_w6)$income |> sd() |> sprintf(fmt = "%.2f"),
         ")"
  )
 )
 xtable::xtable(tab_demo,
               align = c("l", "r", "r", "r", "r", "r", "c", "c", "c"),
               caption = "Demographic variables per wave",
               label = "tab:demographics", auto = TRUE)
@
 \section{Technical Validation}
 Wave 1 was conducted shortly before iOs 18?? was published. -> were there any other external events potentially influencing the survey?
--- a/manuscript.pdf
+++ b/manuscript.pdf
--- a/manuscript.tex
+++ b/manuscript.tex
@ -1,5 +1,6 @@
 \documentclass{article}
 \usepackage[margin = 2.4cm]{geometry}
 \usepackage[utf8]{inputenc}
 \usepackage{Sweave}
 \usepackage{authblk}
@ -30,6 +31,12 @@ Thus, this data set allows for future research on psychological and behavioral d
 \section{Background and Summary}
 % Overview of Dataset
 %
 % * Provide a clear overview of the dataset
 % * Explain the motivation for creating the dataset
 % * Outline the potential reuse value of the dataset
 The introduction of transformer architectures in 2017 marked a major breakthrough in natural language processing (NLP), enabling significant advances in machine learning (ML) and the development of large language models (LLMs). These models, trained on vast corpora of text data, have demonstrated unprecedented capabilities in generating coherent and contextually relevant language. A milestone in public engagement with generative AI (GenAI) was the release of ChatGPT in November 2022, which made LLMs widely accessible to non-expert users.
 Since then, millions of individuals have interacted with conversational agents and other GenAI tools, often regularly integrating them into everyday tasks such as writing, coding, learning, and decision-making (LIT).
 This widespread proliferation of AI technologies, coupled with their increasingly diverse applications and personalized user experiences, raises the questions on how psychological factors shape and might explain differences in AI adoption and usage.
@ -51,43 +58,11 @@ the moderating role of personality.
 % Note: lets all reflect on which term and why we want to use, and how we define it: usage vs. use
 This project is a joint project from the human-computer interaction group at
-the Leibniz-Institut für Wissensmedien in Tübingen (IWM). There are several (how many should we mention?) preregistrations from group members focusing on their individual subquestions.
+the Leibniz-Institut für Wissensmedien in Tübingen (IWM). There are several (how many should we mention?) preregistrations from group members focusing on their individual subquestions. For an overview of the work packages and their research questions, please visit our repository [LINK]. 
 % --> create workpackages.md
 Thus, this data descriptor may be used to examine research questions across the individual work packages, the possibility to extract and analyze specific subgroups or individual trajectories ignored in the work packages. 
 Because the data set was collected shortly before the public release of Apple Intelligence on consumer devices, it offers a timely snapshot of user attitudes and behaviors at a pivotal moment in AI adoption. This context enhances the relevance of the data for understanding emerging patterns in human-AI interaction. Moreover, the findings may provide early indicators of how psychological variables such as trust, perceived usefulness, and willingness to delegate tasks relate to AI usage, potentially offering prognosis of similar developments in other countries.
 % Overview of Dataset
 %
 % * Provide a clear overview of the dataset
 % * Explain the motivation for creating the dataset
 % * Outline the potential reuse value of the dataset
 %
 * dataset brings together various seperate WPs -> possibility to make across-WP analyses
 * potential to look on clusters/subgroups/individual trajectories ignored in the WPs
 * snapshots of important points in time (LLMs on the rise)
 * outlook on potential developments in other countries
 * connection of actual use and stable psychological variables
 % WP1 Teresa/Nico/Vanessa/Angelica https://osf.io/58tqc
 %
 % WP2 Teresa
 %
 % WP3 Sonja https://aspredicted.org/4g3d-rqkt.pdf
 %
 % WP4 Büsra https://aspredicted.org/m6zv9.pdf
 %
 % WP5 Büsra/Teresa https://aspredicted.org/kx5r-4pxq.pdf
 %
 % WP6 Angelica/Gerrit https://doi.org/10.17605/OSF.IO/JAUD4
 %
 % WP7 Mike ???
 %
 % WP8 Steffi/Sonja https://osf.io/f3jyc?view_only=d8d009e575c64dc2bd453f969c3cb7b1
 %
 % WP9 Steffi https://osf.io/h5fwe?view_only=8c5bc9e62074469ebdb3d72b38f4716d
 %
 % --> Are these all WPs? Are there any missing?
 % Previous Publications
 %
 % * Cite any previous publications that utilized these data, in whole or in part
@ -190,6 +165,29 @@ Moreover, a codebook explaining variable abbreviations and containing informatio
 %  --> an overview about all variables, their calculation, their measurement format and ideally their M, SD, cronbachs alpha would be ideal!
 Table~\ref{tab:demographics} provides an overwiev of the demographic variables
 over all six waves.
 % latex table generated in R 4.5.1 by xtable 1.8-4 package
 % Tue Oct 14 16:48:36 2025
 \begin{table}[ht]
 \centering
 \begin{tabular}{lrrrrrccc}
  \hline
 & Total N & User & Male & Female & Other & Age M(SD) & Education M(SD) & Income M(SD) \\ 
  \hline
 wave 1 & 1007 & 0.76\% & 500 & 494 & 13 & 38.68 (11.11) & 4.37 (1.34) & 3.55 (1.62) \\ 
  wave 2 & 768 & 0.76\% & 375 & 384 & 8 & 39.37 (11.08) & 4.33 (1.32) & 3.55 (1.61) \\ 
  wave 3 & 658 & 0.77\% & 318 & 332 & 6 & 39.86 (11.00) & 4.30 (1.33) & 3.57 (1.61) \\ 
  wave 4 & 611 & 0.76\% & 282 & 323 & 5 & 40.13 (11.04) & 4.22 (1.35) & 3.50 (1.62) \\ 
  wave 5 & 564 & 0.76\% & 259 & 300 & 4 & 40.43 (11.06) & 4.19 (1.33) & 3.48 (1.61) \\ 
  wave 6 & 514 & 0.76\% & 238 & 270 & 5 & 40.36 (11.12) & 4.15 (1.33) & 3.43 (1.59) \\ 
   \hline
 \end{tabular}
 \caption{Demographic variables per wave} 
 \label{tab:demographics}
 \end{table}
 \section{Technical Validation}
 Wave 1 was conducted shortly before iOs 18?? was published. -> were there any other external events potentially influencing the survey?
@ -249,8 +247,8 @@ Hier ist ein R-Chunk:
 > summary(x)
 \end{Sinput}
 \begin{Soutput}
-    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-2.68156 -0.66059 -0.06260 -0.05077  0.63839  2.57674 
+-2.5079 -0.8108 -0.2314 -0.1425  0.6314  3.1375 
 \end{Soutput}
 \end{Schunk}