uncongeniality_analysis/analysis_jobs/analysis_job_manuscript.yaml

667 lines
20 KiB
YAML
Raw Permalink Normal View History

---
preprocessing:
data_order0:
- method: data_order
param: 0
data_order1:
- method: data_order
param: 1
data_politics:
- method: data_order
param: 0
- method: data_section
param: 'Politics'
data_foreign_affairs:
- method: data_order
param: 0
- method: data_section
param: 'Foreign affairs'
data_science:
- method: data_order
param: 0
- method: data_section
param: 'Science'
data_economy:
- method: data_order
param: 0
- method: data_section
param: 'Economy'
data_miscellaneous:
- method: data_order
param: 0
- method: data_section
param: 'Miscellaneous'
data_culture:
- method: data_order
param: 0
- method: data_section
param: 'Culture'
data_sports:
- method: data_order
param: 0
- method: data_section
param: 'Sports'
data_mobility:
- method: data_order
param: 0
- method: data_section
param: 'Mobility'
data_internet:
- method: data_order
param: 0
- method: data_section
param: 'Internet'
data_health:
- method: data_order
param: 0
- method: data_section
param: 'Health'
data_order0_with_minimum_one_vote:
- method: data_order
param: 0
- method: exclude_data_with_value
param: {'column': 'totalvotes', 'value': 0}
descriptive:
- !descriptive_overview
name: "Extended_Data_Table_1_Descriptive_Data_for_different_comment_levels"
dataset: "data"
group_by: "order"
metrics:
- operation: "count"
column: null
- operation: "count_nonzero"
column: "totalvotes"
- operation: "sum"
column: "totalvotes"
- operation: "mean"
column: "totalvotes"
- operation: "std_dev"
column: "totalvotes"
- operation: "sum"
column: "upvotes"
- operation: "sum"
column: "downvotes"
- operation: "mean"
column: "bayes-corrected (q=0.25) valence"
- operation: "std_dev"
column: "bayes-corrected (q=0.25) valence"
- operation: "mean"
column: "bayes-corrected (q=0.25) extremity"
- operation: "std_dev"
column: "bayes-corrected (q=0.25) extremity"
- !descriptive_overview
name: "Extended_Data_Table_2_Descriptive_Data_for_different_news_categories"
dataset: "data"
group_by: "section"
metrics:
- operation: "count"
column: null
- operation: "sum"
column: "number O(n+1)-replies"
- operation: "count_nonzero"
column: "number O(n+1)-replies"
- operation: "count_nonzero"
column: "totalvotes"
- operation: "sum"
column: "totalvotes"
- operation: "sum"
column: "upvotes"
- operation: "sum"
column: "downvotes"
- operation: "count_nonzero"
column: "totalvotes"
- operation: "mean"
column: "valence"
- operation: "std_dev"
column: "valence"
- operation: "mean"
column: "bayes-corrected (q=0.25) valence"
- operation: "std_dev"
column: "bayes-corrected (q=0.25) valence"
- operation: "mean"
column: "extremity"
- operation: "std_dev"
column: "extremity"
- operation: "mean"
column: "bayes-corrected (q=0.25) extremity"
- operation: "std_dev"
column: "bayes-corrected (q=0.25) extremity"
analysis:
- !linear_regression
name: "Evidence_uncongeniality_simplest_model_linear_regression_only_valence_non_standardized"
dataset: "data_order0"
independent_variables:
- 'valence'
dependent_variable: 'number O(n+1)-replies'
standardize: false
report_effect_size: true
- !linear_regression
name: "Evidence_uncongeniality_preregistered_model"
dataset: "data_order0"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
report_effect_size: true
- !linear_regression
name: "Evidence_uncongeniality_stability_against_variation_in_weight_q5"
dataset: "data_order0"
independent_variables:
- 'bayes-corrected (q=0.5) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongeniality_stability_against_variation_in_weight_q75"
dataset: "data_order0"
independent_variables:
- 'bayes-corrected (q=0.75) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongeniality_stability_against_variation_in_weight__no_bayes_correction"
dataset: "data_order0"
independent_variables:
- 'valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression_grouped
name: "Evidence_uncongeniality_robustness_analysis_on_person_level"
dataset: "data_order0"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
aggregation_functions:
- 'mean'
- 'sum'
- 'sum'
group_by: 'user_id'
standardize: true
print_detailed_coefficients: true
- !linear_regression_grouped
name: "Evidence_uncongeniality_robustness_analysis_on_section_level"
dataset: "data_order0"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
aggregation_functions:
- 'mean'
- 'sum'
- 'sum'
group_by: 'section'
standardize: true
print_detailed_coefficients: true
- !linear_regression
name: "Evidence_uncongenialty_section_politics"
dataset: "data_politics"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongenialty_section_affairs"
dataset: "data_foreign_affairs"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongenialty_section_science"
dataset: "data_science"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongenialty_section_economy"
dataset: "data_economy"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongenialty_section_miscellaneous"
dataset: "data_miscellaneous"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongenialty_section_culture"
dataset: "data_culture"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongenialty_section_sports"
dataset: "data_sports"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongenialty_section_mobility"
dataset: "data_mobility"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongenialty_section_internet"
dataset: "data_internet"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongenialty_section_health"
dataset: "data_health"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncongeniality_robustness_order1"
dataset: "data_order1"
independent_variables:
- 'bayes-corrected (q=0.25) valence'
- 'totalvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_uncogeniality_model_with_seperate_upvotes_downvotes"
dataset: "data_order0"
independent_variables:
- 'upvotes'
- 'downvotes'
dependent_variable: 'number O(n+1)-replies'
standardize: true
- !linear_regression
name: "Evidence_antagonism_preregistered_model"
dataset: "data_order0"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_stability_against_variation_in_weight_q5"
dataset: "data_order0"
independent_variables:
- 'mean bayes-corrected (q=0.5) valence of replies'
dependent_variable: 'bayes-corrected (q=0.5) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_stability_against_variation_in_weight_q75"
dataset: "data_order0"
independent_variables:
- 'mean bayes-corrected (q=0.75) valence of replies'
dependent_variable: 'bayes-corrected (q=0.75) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_stability_against_variation_in_weight_no_bayes_correction"
dataset: "data_order0"
independent_variables:
- 'mean valence of replies'
dependent_variable: 'valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_section_politics"
dataset: "data_politics"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_section_affairs"
dataset: "data_foreign_affairs"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_section_science"
dataset: "data_science"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_section_economy"
dataset: "data_economy"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_section_miscellaneous"
dataset: "data_miscellaneous"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_section_culture"
dataset: "data_culture"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_section_sports"
dataset: "data_sports"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_section_mobility"
dataset: "data_mobility"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_section_internet"
dataset: "data_internet"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_section_health"
dataset: "data_health"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !linear_regression
name: "Evidence_antagonism_robustness_order1"
dataset: "data_order1"
independent_variables:
- 'mean bayes-corrected (q=0.25) valence of replies'
dependent_variable: 'bayes-corrected (q=0.25) valence'
standardize: true
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity"
dataset: "data_order0"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_stability_against_variation_in_weight_paired_ttest_q5"
dataset: "data_order0"
variable_1: 'bayes-corrected (q=0.5) extremity'
variable_2: 'mean bayes-corrected (q=0.5) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_stability_against_variation_in_weight_paired_ttest_q75"
dataset: "data_order0"
variable_1: 'bayes-corrected (q=0.75) extremity'
variable_2: 'mean bayes-corrected (q=0.75) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_stability_against_variation_in_weight_paired_ttest_bayes"
dataset: "data_order0"
variable_1: 'extremity'
variable_2: 'mean extremity of replies'
- !paired_ttest
name: "Evidence_polarization_robustness_paired_ttest_order1"
dataset: "data_order1"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity_politics"
dataset: "data_politics"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity_foreign_affairs"
dataset: "data_foreign_affairs"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity_science"
dataset: "data_science"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity_economy"
dataset: "data_economy"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity_miscellaneous"
dataset: "data_miscellaneous"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity_culture"
dataset: "data_culture"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity_sports"
dataset: "data_sports"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity_mobility"
dataset: "data_mobility"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity_internet"
dataset: "data_internet"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
- !paired_ttest
name: "Evidence_polarization_paired_ttest_extremity_health"
dataset: "data_health"
variable_1: 'bayes-corrected (q=0.25) extremity'
variable_2: 'mean bayes-corrected (q=0.25) extremity of replies'
visualization:
- !hexbinplot
name: "Fig_2a"
dataset: "data_order0"
variable_x_axis: 'bayes-corrected (q=0.25) valence'
variable_y_axis: 'number O(n+1)-replies'
y_axis_maximum: 40
trendline: True
logarithmic_hex_scaling: True
- !forestplot
name: "Fig_2b"
regression_model_names:
- "Evidence_uncongenialty_section_politics"
- "Evidence_uncongenialty_section_foreign_affairs"
- "Evidence_uncongenialty_section_science"
- "Evidence_uncongenialty_section_economy"
- "Evidence_uncongenialty_section_miscellaneous"
- "Evidence_uncongenialty_section_culture"
- "Evidence_uncongenialty_section_sports"
- "Evidence_uncongenialty_section_mobility"
- "Evidence_uncongenialty_section_internet"
- "Evidence_uncongenialty_section_health"
regression_model_labels:
- "Politics"
- "Foreign Affairs"
- "Science"
- "Economy"
- "Miscellaneous"
- "Culture"
- "Sports"
- "Mobility"
- "Internet"
- "Health"
coefficient_names:
- "bayes-corrected (q=0.25) valence"
- "totalvotes"
x_axis_minimum: -0.6
dotsize: 2
x_axis_label: "Standardized coefficient (95% Confidence Interval)"
- !heatmap
name: "Fig_2c"
dataset: "data_order0_with_minimum_one_vote"
axis_variables:
- 'upvotes'
- 'downvotes'
heat_variable: 'number O(n+1)-replies'
axis_maxima:
- 20
- 20
axis_minima:
- 0
- 0
logarithmic_heat_scaling: 'false'
- !densityplot
name: 'Fig_3a'
dataset: "data_order0"
variable_x_axis: 'mean bayes-corrected (q=0.25) valence of replies'
variable_y_axis: 'bayes-corrected (q=0.25) valence'
data_breakpoints:
- 0
- !forestplot
name: "Fig_3b"
regression_model_names:
- "Evidence_antagonism_section_politics"
- "Evidence_antagonism_section_foreign_affairs"
- "Evidence_antagonism_section_science"
- "Evidence_antagonism_section_economy"
- "Evidence_antagonism_section_miscellaneous"
- "Evidence_antagonism_section_culture"
- "Evidence_antagonism_section_sports"
- "Evidence_antagonism_section_mobility"
- "Evidence_antagonism_section_internet"
- "Evidence_antagonism_section_health"
regression_model_labels:
- "Politics"
- "Foreign Affairs"
- "Science"
- "Economy"
- "Miscellaneous"
- "Culture"
- "Sports"
- "Mobility"
- "Internet"
- "Health"
coefficient_names:
- 'mean bayes-corrected (q=0.25) valence of replies'
x_axis_minimum: -0.1
dotsize: 2
x_axis_label: "Standardized coefficient (95% Confidence Interval)"
- !violinplot
name: "Fig_4a"
dataset: "data_order0"
variable_x_axis: 'bayes-corrected (q=0.25) extremity'
variable_y_axis: 'mean bayes-corrected (q=0.25) extremity of replies'
x_axis_label: ''
y_axis_label: 'Extremity value'
title: ''
- !forestplot_paired_ttest
name: "Fig_4b"
paired_ttest_names:
- "Evidence_polarization_paired_ttest_extremity_politics"
- "Evidence_polarization_paired_ttest_extremity_affairs"
- "Evidence_polarization_paired_ttest_extremity_science"
- "Evidence_polarization_paired_ttest_extremity_economy"
- "Evidence_polarization_paired_ttest_extremity_miscellaneous"
- "Evidence_polarization_paired_ttest_extremity_culture"
- "Evidence_polarization_paired_ttest_extremity_sports"
- "Evidence_polarization_paired_ttest_extremity_mobility"
- "Evidence_polarization_paired_ttest_extremity_internet"
- "Evidence_polarization_paired_ttest_extremity_health"
paired_ttest_labels:
- "Politics"
- "Foreign Affairs"
- "Science"
- "Economy"
- "Miscellaneous"
- "Culture"
- "Sports"
- "Mobility"
- "Internet"
- "Health"
x_axis_minimum: -0.06
dotsize: 2
x_axis_label: "Mean difference bayes-corrected (q=0.25) extremity (95% Confidence Interval)"
- !histogram
name: 'Extended_Fig_1'
dataset: "data"
variable: 'totalvotes'
x_axis_label: 'Number of total votes'
y_axis_label: 'Number of comments'
x_axis_logarithmic_scaling: false
y_axis_logarithmic_scaling: true
title: ''
...