In this document we detail the planned and additional analyses for our experiments. By default, all code elements are hidden and only the R output is visible. Individual code segments can be made visible through the respective buttons. To rerun complete analyses we recommend to download our zip file containg all analysis scripts.

Experiment 1

# Clean up memory
rm(list = ls())

# Experiment filenames
results_file <- "exp1/results"
crowflower_file <- "exp1/crowdflower_data"
invalid_answers_file <- "exp1/invalid_answers"
figure_path <- "plots/exp1-"

# Stimuli values (used to compute comprehension errors)
control <- 87  # % sick people without the drug
drug <- 47  # % sick people with the drug
# compute the true answer given the data
true_answer <- drug/control * 20

# Values reported by Tal and Wanksink to add to our plots as a comparison
bws_nochart <- 6.12
bws_chart <- 6.83
bws_p <- 0.04

Planned Analysis

library("plyr")
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
source("CI.helpers.R")
## Simple Bootstrap Routines (1.1-3 2008-04-30)
source("misc.helpers.R")
source("plotting functions.R")
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
# indicated whether we want ggplot to save the generated plots
save_plots <- FALSE
# print plots to screen
print_plots <- TRUE

Load data

# Load experiment data from csv file

if (exists("results_file")) {
    data <- read.csv(paste("data/", results_file, ".csv", sep = ""), sep = ",")
}

# Add crowdflower metadata (if exists)

if (exists("crowflower_file")) {
    crowdsourcedata <- read.csv(paste("data/", crowflower_file, ".csv", sep = ""), 
        sep = ",")
    data$code <- as.character(data$code)
    crowdsourcedata$completion_code <- as.character(crowdsourcedata$completion_code)
    data <- left_join(data, crowdsourcedata, by = c(code = "completion_code"))
    
    # Load country codes
    country_file <- "countries"
    countrydata <- read.csv(paste("data/", country_file, ".csv", sep = ""), 
        sep = ",")
}

# Add mturk metadata (if exists)

if (exists("mturk_file")) {
    crowdsourcedata <- read.csv(paste("data/", mturk_file, ".csv", sep = ""), 
        sep = ",")
    data$code <- as.character(data$code)
    crowdsourcedata$Answer.completion_code <- as.character(crowdsourcedata$Answer.completion_code)
    data <- left_join(data, crowdsourcedata, by = c(code = "Answer.completion_code"))
}

Invalid jobs

# Utility function for printing percentages
percent <- function(x, n) {
    if (x/n < 0.01) 
        r <- 1 else r <- 0
    paste(" (", format_number(100 * x/n, r), "%)", sep = "")
}

print.title("Invalid jobs")
## 
## ###################################
## # Invalid jobs 
## ###################################
data_invalid <- read.csv(paste("data/", invalid_answers_file, ".csv", sep = ""), 
    sep = ",")
n_valid <- nrow(data)
n_invalid <- nrow(data_invalid)
n <- n_valid + n_invalid
cat("Valid jobs:   ", n_valid, "\n")
## Valid jobs:    123
cat("Invalid jobs: ", n_invalid, "\n\n")
## Invalid jobs:  25
# 
n_failed_attention <- nrow(data_invalid[data_invalid$test_question != "common_cold", 
    ])
n_abnormal_time <- nrow(data_invalid[data_invalid$time_taken < 30 | data_invalid$time_taken > 
    15 * 60, ])
n_previous_study <- nrow(data_invalid[data_invalid$previous_study == "yes", 
    ])
cat("Failed attention check:   ", n_failed_attention, percent(n_failed_attention, 
    n), "\n", sep = "")
## Failed attention check:   7 (5%)
cat("Abnormal completion time: ", n_abnormal_time, percent(n_abnormal_time, 
    n), "\n", sep = "")
## Abnormal completion time: 4 (3%)
cat("Did study before:         ", n_previous_study, percent(n_previous_study, 
    n), "\n", sep = "")
## Did study before:         18 (10%)

Sample size, completion time and demographics

# Sample size

N <- nrow(data)
cat("Total sample size:  ", N, "\n")
## Total sample size:   123
cat("n =", sum(data$condition == "no_graph"), "for the no-chart condition\n")
## n = 62 for the no-chart condition
cat("n =", sum(data$condition == "graph"), "for the chart condition\n\n")
## n = 61 for the chart condition
# Job completion time

mt <- median(data$time_taken)
cat("Median job completion time:", format_number(mt/60, 2), "minutes.\n\n")
## Median job completion time: 2.4 minutes.
# Countries

if (exists("crowflower_file")) {
    data$country <- sapply(data$X_country, function(code) countrydata$name[match(code, 
        countrydata$alpha.3)])
    data$subregion <- sapply(data$X_country, function(code) countrydata$sub.region[match(code, 
        countrydata$alpha.3)])
    data$continent <- sapply(data$X_country, function(code) countrydata$region[match(code, 
        countrydata$alpha.3)])
    cat("Number of different countries:", length(unique(data$country)), "\n")
    print.percents(data$continent)
}
## Number of different countries: 32 
##             n  %
## 1   Europe 68 55
## 2 Americas 34 28
## 3     Asia 21 17
# Gender distribution

# Column is present in all but experiment 3
if ("gender" %in% colnames(data)) {
    cat("\nGender:\n")
    print.percents(data$gender)
}
## 
## Gender:
##           n  %
## 1   male 80 65
## 2 female 43 35
# Age

# Column is present in all but experiment 3
if ("age" %in% colnames(data)) {
    cat("\nMean age:", round(mean(data$age)), "\n")
}
## 
## Mean age: 35
# Education

# Column is present in all but experiment 3
if ("education" %in% colnames(data)) {
    cat("\nEducation:\n")
    print.percents(data$education)
}
## 
## Education:
##                    n  %
## 1 college_or_more 59 48
## 2    some_college 34 28
## 3     high-school 27 22
## 4         neither  3  2

Analysis of dependent variables

##### Retrieve data

# Extract judgments of drug efficacy (values between 1 and 9).  Experiment 1
# and experiments 2-4 use different question wordings, hence we look at two
# possible columns.

# Column is present in experiment 1, which replicates the question from BwS
# experiment 1
if ("effectiveness" %in% colnames(data)) {
    eff_nograph <- data[data$condition == "no_graph", ]$effectiveness
    eff_graph <- data[data$condition == "graph", ]$effectiveness
    variable_name <- "Perceived effectiveness"
}

# Column is present in experiments 2-4, which replicate the question from
# BwS experiment 2
if ("believe_in_effectiveness" %in% colnames(data)) {
    eff_nograph <- data[data$condition == "no_graph", ]$believe_in_effectiveness
    eff_graph <- data[data$condition == "graph", ]$believe_in_effectiveness
    variable_name <- "Belief in efficacy"
}

# Extract belief in drug from the second question (yes = not effective, no =
# effective).

# Column is present in experiment 1 only.
if ("reduces_sickness" %in% colnames(data)) {
    yesno_to_01 <- function(x) {
        if (x == "yes") 
            1 else if (x == "no") 
            0 else NA
    }
    ill_nograph <- data[data$condition == "no_graph", ]$reduces_sickness
    ill_graph <- data[data$condition == "graph", ]$reduces_sickness
    ill_nograph <- sapply(ill_nograph, yesno_to_01)
    ill_graph <- sapply(ill_graph, yesno_to_01)
}

# Extract answers to comprehension question

num_nograph <- data[data$condition == "no_graph", ]$comprehension
num_graph <- data[data$condition == "graph", ]$comprehension

##### Descriptive and inferential statistics

Judgment of efficacy

##### CIs for judgment of efficacy on a 1--9 scale

mean_eff_nograph <- meanCI.bootstrap(eff_nograph)
mean_eff_graph <- meanCI.bootstrap(eff_graph)
mean_eff_diff <- diffMeanCI.bootstrap(eff_graph, eff_nograph)

print.title(variable_name)
## 
## ###################################
## # Perceived effectiveness 
## ###################################
cat("Mean efficacy no-chart: ", format_ci(mean_eff_nograph), "\n")
## Mean efficacy no-chart:  6.8, CI [6.4, 7.1]
cat("Mean efficacy chart:    ", format_ci(mean_eff_graph), "\n")
## Mean efficacy chart:     6.6, CI [6.1, 6.9]
cat("Difference:             ", format_ci(mean_eff_diff), "\n")
## Difference:              -0.18, CI [-0.69, 0.30]
# NHST version for comparison (used in the original paper, but not part of
# this planned analysis)

ttest <- t.test(eff_graph, eff_nograph)
mean_eff_diff_normal <- c(mean(eff_graph) - mean(eff_nograph), ttest$conf.int[1], 
    ttest$conf.int[2])
cat("(using a t-test)        ", format_ci(mean_eff_diff_normal), "p =", format_number(ttest$p.value), 
    "\n")
## (using a t-test)         -0.18, CI [-0.69, 0.33] p = 0.48
# Display distributions of efficacy judgments and confidence intervals for
# estimates

## compile CIs and BwS results into a dataframe for plotting
eff_df <- NULL
eff_df <- add.ci.to.df(mean_eff_nograph, "no chart", "ours", eff_df)
eff_df <- add.ci.to.df(c(bws_nochart, bws_nochart, bws_nochart), "no chart", 
    "BwS", eff_df)
eff_df <- add.ci.to.df(mean_eff_graph, "chart", "ours", eff_df)
eff_df <- add.ci.to.df(c(bws_chart, bws_chart, bws_chart), "chart", "BwS", eff_df)

eff_diff_df <- NULL
eff_diff_df <- add.ci.to.df(mean_eff_diff, " ", "ours")
eff_diff_df <- add.ci.to.df(ci_from_p(bws_chart - bws_nochart, bws_p), " ", 
    "BwS", eff_diff_df)

if (variable_name == "Belief in efficacy") {
    # plotting for experiments 2-4
    eff.hist.no_chart <- histo(eff_nograph, 9, "no chart", 0, 1)
    eff.hist.chart <- histo(eff_graph, 9, "chart", 0, 1)
    
    eff.cis <- perceived.effectiveness.ciplot(eff_df, eff_diff_df)
    
    plot <- drawVertical(eff.hist.no_chart, eff.hist.chart, eff.cis, name.eff.exp24, 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "believe_in_efficacy_plot.pdf"), 
            plot, base_aspect_ratio = 3, base_height = 2)
    if (print_plots) 
        print(plot)
    
} else {
    # plotting for experiment 1
    eff.hist.no_chart <- histo(eff_nograph, 9, "no chart", 0, 1)
    eff.hist.chart <- histo(eff_graph, 9, "chart", 0, 1)
    eff.cis <- perceived.effectiveness.1.ciplot(eff_df, eff_diff_df)
    
    plot <- drawVertical(eff.hist.no_chart, eff.hist.chart, eff.cis, name.eff.exp1, 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "perceived_effectiveness_plot.pdf"), 
            plot, base_aspect_ratio = 3, base_height = 2)
    if (print_plots) 
        print(plot)
}

Binary belief in efficacy

##### CIs for binary belief in efficacy

# Column is present in experiment 1 only, as subsequent experiments didn't
# include that question.
if ("reduces_sickness" %in% colnames(data)) {
    
    mean_ill_nograph <- propCI(sum(ill_nograph == 1), length(ill_nograph))
    mean_ill_graph <- propCI(sum(ill_graph == 1), length(ill_graph))
    mean_ill_diff <- diffpropCI(sum(ill_graph == 1), length(ill_graph), sum(ill_nograph == 
        1), length(ill_nograph))
    
    print.title("Belief in efficacy (binary)")
    cat("Proportion of 'yes' no-chart: ", format_ci(mean_ill_nograph, percent = TRUE), 
        "\n")
    cat("Proportion of 'yes' chart:    ", format_ci(mean_ill_graph, percent = TRUE), 
        "\n")
    cat("Difference:                   ", format_ci(mean_ill_diff, percent = TRUE), 
        "\n")
    
    # NHST version for comparison (used in the original paper, but not part of
    # this planned analysis)
    
    yes <- c(sum(ill_graph == 1), sum(ill_nograph == 1))
    all <- c(length(ill_graph), length(ill_nograph))
    proptest <- prop.test(yes, all, correct = F)  # disable continuity correction to get the same p-value
    mean_ill_diff_2 <- c(yes[1]/all[1] - yes[2]/all[2], proptest$conf.int[1], 
        proptest$conf.int[2])
    cat("(using an X-squared test)     ", format_ci(mean_ill_diff_2, percent = TRUE), 
        "p =", format_number(proptest$p.value), "\n")
    
    # Plot our results in comparison to the BwS results
    
    # Tal and Wanksink's results
    bws.cis.no_graph <- propCI(21, 31)
    bws.cis.graph <- propCI(28, 29)
    bws.cis.diff <- diffpropCI(28, 29, 21, 31)
    
    # construct the dataframes for plotting
    efficiency.df <- NULL
    efficiency.df <- add.ci.to.df(mean_ill_nograph, "no chart", "ours", efficiency.df)
    efficiency.df <- add.ci.to.df(mean_ill_graph, "chart", "ours", efficiency.df)
    efficiency.df <- add.ci.to.df(bws.cis.no_graph, "no chart", "BwS", efficiency.df)
    efficiency.df <- add.ci.to.df(bws.cis.graph, "chart", "BwS", efficiency.df)
    
    efficiency.diff.df <- NULL
    efficiency.diff.df <- add.ci.to.df(mean_ill_diff, " ", "ours", efficiency.diff.df)
    efficiency.diff.df <- add.ci.to.df(bws.cis.diff, " ", "BwS", efficiency.diff.df)
    
    # plot
    efficiency.ciplot <- efficacy.belief.ciplot(efficiency.df, efficiency.diff.df)
    efficiency.plot <- drawHorizontalEfficiency(efficiency.ciplot, "Belief in efficacy", 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "belief_in_efficacy_plot.pdf"), 
            efficiency.plot, base_aspect_ratio = 4, base_height = 1.5)
    if (print_plots) 
        print(efficiency.plot)
}
## 
## ###################################
## # Belief in efficacy (binary) 
## ###################################
## 
## Proportion of 'yes' no-chart:  90%, CI [80%, 95%] 
## Proportion of 'yes' chart:     87%, CI [76%, 93%] 
## Difference:                    -3.4%, CI [-16%, 8.4%] 
## (using an X-squared test)      -3.4%, CI [-15%, 7.8%] p = 0.55

Comprehension question

#### CIs for errors at the comprehension question

# Define error
psick <- drug/control
logodds <- function(p) {
    log(p/(1 - p))
}
logodds.psick <- logodds(psick)
error <- function(n) {
    n[n == 0] <- 0.1
    n[n == 20] <- 19.9
    abs(logodds(n/20) - logodds.psick)
}

# Compute errors
data$error <- error(data$comprehension)
err_nograph <- error(num_nograph)
err_graph <- error(num_graph)

# Use geometric mean to reduce the influence of outliers and increase the
# weight of close-to-exact answers
mean_err_nograph <- geomMeanCI.bootstrap(err_nograph)
mean_err_graph <- geomMeanCI.bootstrap(err_graph)
mean_err_diff <- ratioGeomMeanCI.bootstrap(err_graph, err_nograph)
print.title("Comprehension error")
## 
## ###################################
## # Comprehension error 
## ###################################
cat("Geometric mean error no-chart: ", format_ci(mean_err_nograph), "\n")
## Geometric mean error no-chart:  0.66, CI [0.49, 0.88]
cat("Geometric mean error chart:    ", format_ci(mean_err_graph), "\n")
## Geometric mean error chart:     0.43, CI [0.33, 0.56]
cat("Ratio:                         ", format_ci(mean_err_diff), "\n")
## Ratio:                          0.65, CI [0.44, 0.98]
# Plot raw responses and confidence intervals

comp.hist.no_chart <- histo(num_nograph, 20, "no chart", true_answer, 1)
comp.hist.chart <- histo(num_graph, 20, "chart", true_answer, 1)

# construct the dataframes for plotting
comprehension.df <- NULL
comprehension.df <- add.ci.to.df(mean_err_nograph, "no chart", "ours", comprehension.df)
comprehension.df <- add.ci.to.df(mean_err_graph, "chart", "ours", comprehension.df)

comprehension.diff.df <- NULL
comprehension.diff.df <- add.ci.to.df(mean_err_diff, " ", "ours", comprehension.diff.df)

# plot
comp.cis <- comprehension.ciplot(comprehension.df, comprehension.diff.df)
comp.plot <- drawVerticalComp(comp.hist.no_chart, comp.hist.chart, comp.cis, 
    "Comprehension error", "Error ratio")
if (save_plots) save_plot(paste(sep = "", figure_path, "comprehension_plot.pdf"), 
    comp.plot, base_aspect_ratio = 3, base_height = 2)
if (print_plots) print(comp.plot)

Additional Analysis

Bias in responses

cat("Mean response no-chart: ", format_ci(meanCI.bootstrap(num_nograph), sigdigits = 3), 
    "\n")
## Mean response no-chart:  9.19, CI [7.91, 10.4]
cat("Mean response chart:    ", format_ci(meanCI.bootstrap(num_graph), sigdigits = 3), 
    "\n")
## Mean response chart:     9.13, CI [8.14, 9.97]
cat("Mean difference:        ", format_ci(diffMeanCI.bootstrap(num_graph, num_nograph)), 
    "\n")
## Mean difference:         -0.062, CI [-1.67, 1.40]
# Justifications from those who responded 'no' to the belief question. This
# data was not reported, but it informed the design of our polarization
# question (see section 6.1)

Justifications to ‘no’ answers

whyno <- as.character(data$why_answered_no)
whyno <- whyno[whyno != ""]
cat(whyno, sep = "\n-> ")
## this is how i think
## -> it depend on human body and immune system
## -> because more side effect of medication
## -> Not all people react the same to the medication. It takes a lot of time to demonstrate the usefulness of the medication.
## -> I do not belive that the medications can actually increase immunity. Drugs act on the symptoms.
## -> the drug busts the immune system. There is no way to know if this is the case nor if the user would get sick without it.
## -> i dont belive in miraculous drugs if they found it the studies where over by now
## -> I didn't compute it in the way that the follow up question did, which assumed that those 20 people would by default all get sick. I thought this lower sickness rate could simply be due to extraneous factors. And since the remaining percentage of people getting ill was still rather high, I chose NO here.
## -> It boost immune system still the illnes remains
## -> I simply don't believe in such medication, never had any proof.
## -> The medication does not reduce the disease
## -> The drug is implicitly stated as boosting the human immune system, not fight infections.
## -> Because you can't cure cold.
## -> Because it increases immunity, but does not cure disease

Experiment 2

# Clean up memory
rm(list = ls())

# Experiment filenames
results_file <- "exp2/results"
crowflower_file <- "exp2/crowdflower_data"
invalid_answers_file <- "exp2/invalid_answers"
figure_path <- "plots/exp2-"

# Stimuli values (used to compute comprehension errors)
control <- 83  # % sick people without the drug
drug <- 63  # % sick people with the drug
# compute the true answer given the data
true_answer <- 63/83 * 20

# Values reported by Tal and Wanksink to add to our plots as a comparison
bws_nochart <- 4.66
bws_chart <- 5.75
bws_p <- 0.0066

Planned Analysis

library("plyr")
library("dplyr")

source("CI.helpers.R")
source("misc.helpers.R")
source("plotting functions.R")

# indicated whether we want ggplot to save the generated plots
save_plots <- FALSE
# print plots to screen
print_plots <- TRUE

Load data

# Load experiment data from csv file

if (exists("results_file")) {
    data <- read.csv(paste("data/", results_file, ".csv", sep = ""), sep = ",")
}

# Add crowdflower metadata (if exists)

if (exists("crowflower_file")) {
    crowdsourcedata <- read.csv(paste("data/", crowflower_file, ".csv", sep = ""), 
        sep = ",")
    data$code <- as.character(data$code)
    crowdsourcedata$completion_code <- as.character(crowdsourcedata$completion_code)
    data <- left_join(data, crowdsourcedata, by = c(code = "completion_code"))
    
    # Load country codes
    country_file <- "countries"
    countrydata <- read.csv(paste("data/", country_file, ".csv", sep = ""), 
        sep = ",")
}

# Add mturk metadata (if exists)

if (exists("mturk_file")) {
    crowdsourcedata <- read.csv(paste("data/", mturk_file, ".csv", sep = ""), 
        sep = ",")
    data$code <- as.character(data$code)
    crowdsourcedata$Answer.completion_code <- as.character(crowdsourcedata$Answer.completion_code)
    data <- left_join(data, crowdsourcedata, by = c(code = "Answer.completion_code"))
}

Invalid Jobs

# Utility function for printing percentages
percent <- function(x, n) {
    if (x/n < 0.01) 
        r <- 1 else r <- 0
    paste(" (", format_number(100 * x/n, r), "%)", sep = "")
}

data_invalid <- read.csv(paste("data/", invalid_answers_file, ".csv", sep = ""), 
    sep = ",")
n_valid <- nrow(data)
n_invalid <- nrow(data_invalid)
n <- n_valid + n_invalid
cat("Valid jobs:   ", n_valid, "\n")
## Valid jobs:    164
cat("Invalid jobs: ", n_invalid, "\n\n")
## Invalid jobs:  32
# 
n_failed_attention <- nrow(data_invalid[data_invalid$test_question != "common_cold", 
    ])
n_abnormal_time <- nrow(data_invalid[data_invalid$time_taken < 30 | data_invalid$time_taken > 
    15 * 60, ])
n_previous_study <- nrow(data_invalid[data_invalid$previous_study == "yes", 
    ])
cat("Failed attention check:   ", n_failed_attention, percent(n_failed_attention, 
    n), "\n", sep = "")
## Failed attention check:   6 (3%)
cat("Abnormal completion time: ", n_abnormal_time, percent(n_abnormal_time, 
    n), "\n", sep = "")
## Abnormal completion time: 3 (2%)
cat("Did study before:         ", n_previous_study, percent(n_previous_study, 
    n), "\n", sep = "")
## Did study before:         20 (10%)

Sample size, completion time and demographics

# Sample size

N <- nrow(data)
cat("Total sample size:  ", N, "\n")
## Total sample size:   164
cat("n =", sum(data$condition == "no_graph"), "for the no-chart condition\n")
## n = 79 for the no-chart condition
cat("n =", sum(data$condition == "graph"), "for the chart condition\n\n")
## n = 85 for the chart condition
# Job completion time

mt <- median(data$time_taken)
cat("Median job completion time:", format_number(mt/60, 2), "minutes.\n\n")
## Median job completion time: 2.0 minutes.
# Countries

if (exists("crowflower_file")) {
    data$country <- sapply(data$X_country, function(code) countrydata$name[match(code, 
        countrydata$alpha.3)])
    data$subregion <- sapply(data$X_country, function(code) countrydata$sub.region[match(code, 
        countrydata$alpha.3)])
    data$continent <- sapply(data$X_country, function(code) countrydata$region[match(code, 
        countrydata$alpha.3)])
    cat("Number of different countries:", length(unique(data$country)), "\n")
    print.percents(data$continent)
}
## Number of different countries: 34 
##             n  %
## 1   Europe 91 55
## 2 Americas 37 23
## 3     Asia 32 20
## 4   Africa  2  1
## 5  Oceania  2  1
# Gender distribution

# Column is present in all but experiment 3
if ("gender" %in% colnames(data)) {
    cat("\nGender:\n")
    print.percents(data$gender)
}
## 
## Gender:
##            n  %
## 1   male 118 72
## 2 female  46 28
# Age

# Column is present in all but experiment 3
if ("age" %in% colnames(data)) {
    cat("\nMean age:", round(mean(data$age)), "\n")
}
## 
## Mean age: 34
# Education

# Column is present in all but experiment 3
if ("education" %in% colnames(data)) {
    cat("\nEducation:\n")
    print.percents(data$education)
}
## 
## Education:
##                    n  %
## 1 college_or_more 94 57
## 2    some_college 44 27
## 3     high-school 22 13
## 4         neither  4  2

Analysis of dependent variables

##### Retrieve data

# Extract judgments of drug efficacy (values between 1 and 9).  Experiment 1
# and experiments 2-4 use different question wordings, hence we look at two
# possible columns.

# Column is present in experiment 1, which replicates the question from BwS
# experiment 1
if ("effectiveness" %in% colnames(data)) {
    eff_nograph <- data[data$condition == "no_graph", ]$effectiveness
    eff_graph <- data[data$condition == "graph", ]$effectiveness
    variable_name <- "Perceived effectiveness"
}

# Column is present in experiments 2-4, which replicate the question from
# BwS experiment 2
if ("believe_in_effectiveness" %in% colnames(data)) {
    eff_nograph <- data[data$condition == "no_graph", ]$believe_in_effectiveness
    eff_graph <- data[data$condition == "graph", ]$believe_in_effectiveness
    variable_name <- "Belief in efficacy"
}

# Extract belief in drug from the second question (yes = not effective, no =
# effective).

# Column is present in experiment 1 only.
if ("reduces_sickness" %in% colnames(data)) {
    yesno_to_01 <- function(x) {
        if (x == "yes") 
            1 else if (x == "no") 
            0 else NA
    }
    ill_nograph <- data[data$condition == "no_graph", ]$reduces_sickness
    ill_graph <- data[data$condition == "graph", ]$reduces_sickness
    ill_nograph <- sapply(ill_nograph, yesno_to_01)
    ill_graph <- sapply(ill_graph, yesno_to_01)
}

# Extract answers to comprehension question

num_nograph <- data[data$condition == "no_graph", ]$comprehension
num_graph <- data[data$condition == "graph", ]$comprehension

Belief in efficacy

##### CIs for judgment of efficacy on a 1--9 scale

mean_eff_nograph <- meanCI.bootstrap(eff_nograph)
mean_eff_graph <- meanCI.bootstrap(eff_graph)
mean_eff_diff <- diffMeanCI.bootstrap(eff_graph, eff_nograph)

cat("Mean efficacy no-chart: ", format_ci(mean_eff_nograph), "\n")
## Mean efficacy no-chart:  6.5, CI [6.1, 6.8]
cat("Mean efficacy chart:    ", format_ci(mean_eff_graph), "\n")
## Mean efficacy chart:     6.3, CI [5.9, 6.6]
cat("Difference:             ", format_ci(mean_eff_diff), "\n")
## Difference:              -0.24, CI [-0.72, 0.24]
# NHST version for comparison (used in the original paper, but not part of
# this planned analysis)

ttest <- t.test(eff_graph, eff_nograph)
mean_eff_diff_normal <- c(mean(eff_graph) - mean(eff_nograph), ttest$conf.int[1], 
    ttest$conf.int[2])
cat("(using a t-test)        ", format_ci(mean_eff_diff_normal), "p =", format_number(ttest$p.value), 
    "\n")
## (using a t-test)         -0.24, CI [-0.73, 0.26] p = 0.34
# Display distributions of efficacy judgments and confidence intervals for
# estimates

## compile CIs and BwS results into a dataframe for plotting
eff_df <- NULL
eff_df <- add.ci.to.df(mean_eff_nograph, "no chart", "ours", eff_df)
eff_df <- add.ci.to.df(c(bws_nochart, bws_nochart, bws_nochart), "no chart", 
    "BwS", eff_df)
eff_df <- add.ci.to.df(mean_eff_graph, "chart", "ours", eff_df)
eff_df <- add.ci.to.df(c(bws_chart, bws_chart, bws_chart), "chart", "BwS", eff_df)

eff_diff_df <- NULL
eff_diff_df <- add.ci.to.df(mean_eff_diff, " ", "ours")
eff_diff_df <- add.ci.to.df(ci_from_p(bws_chart - bws_nochart, bws_p), " ", 
    "BwS", eff_diff_df)

if (variable_name == "Belief in efficacy") {
    # plotting for experiments 2-4
    eff.hist.no_chart <- histo(eff_nograph, 9, "no chart", 0, 1)
    eff.hist.chart <- histo(eff_graph, 9, "chart", 0, 1)
    
    eff.cis <- perceived.effectiveness.ciplot(eff_df, eff_diff_df)
    
    plot <- drawVertical(eff.hist.no_chart, eff.hist.chart, eff.cis, name.eff.exp24, 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "believe_in_efficacy_plot.pdf"), 
            plot, base_aspect_ratio = 3, base_height = 2)
    if (print_plots) 
        print(plot)
    
} else {
    # plotting for experiment 1
    eff.hist.no_chart <- histo(eff_nograph, 9, "no chart", 0, 1)
    eff.hist.chart <- histo(eff_graph, 9, "chart", 0, 1)
    eff.cis <- perceived.effectiveness.1.ciplot(eff_df, eff_diff_df)
    
    plot <- drawVertical(eff.hist.no_chart, eff.hist.chart, eff.cis, name.eff.exp1, 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "perceived_effectiveness_plot.pdf"), 
            plot, base_aspect_ratio = 3, base_height = 2)
    if (print_plots) 
        print(plot)
}

##### CIs for binary belief in efficacy

# Column is present in experiment 1 only, as subsequent experiments didn't
# include that question.
if ("reduces_sickness" %in% colnames(data)) {
    
    mean_ill_nograph <- propCI(sum(ill_nograph == 1), length(ill_nograph))
    mean_ill_graph <- propCI(sum(ill_graph == 1), length(ill_graph))
    mean_ill_diff <- diffpropCI(sum(ill_graph == 1), length(ill_graph), sum(ill_nograph == 
        1), length(ill_nograph))
    
    print.title("Belief in efficacy (binary)")
    cat("Proportion of 'yes' no-chart: ", format_ci(mean_ill_nograph, percent = TRUE), 
        "\n")
    cat("Proportion of 'yes' chart:    ", format_ci(mean_ill_graph, percent = TRUE), 
        "\n")
    cat("Difference:                   ", format_ci(mean_ill_diff, percent = TRUE), 
        "\n")
    
    # NHST version for comparison (used in the original paper, but not part of
    # this planned analysis)
    
    yes <- c(sum(ill_graph == 1), sum(ill_nograph == 1))
    all <- c(length(ill_graph), length(ill_nograph))
    proptest <- prop.test(yes, all, correct = F)  # disable continuity correction to get the same p-value
    mean_ill_diff_2 <- c(yes[1]/all[1] - yes[2]/all[2], proptest$conf.int[1], 
        proptest$conf.int[2])
    cat("(using an X-squared test)     ", format_ci(mean_ill_diff_2, percent = TRUE), 
        "p =", format_number(proptest$p.value), "\n")
    
    # Plot our results in comparison to the BwS results
    
    # Tal and Wanksink's results
    bws.cis.no_graph <- propCI(21, 31)
    bws.cis.graph <- propCI(28, 29)
    bws.cis.diff <- diffpropCI(28, 29, 21, 31)
    
    # construct the dataframes for plotting
    efficiency.df <- NULL
    efficiency.df <- add.ci.to.df(mean_ill_nograph, "no chart", "ours", efficiency.df)
    efficiency.df <- add.ci.to.df(mean_ill_graph, "chart", "ours", efficiency.df)
    efficiency.df <- add.ci.to.df(bws.cis.no_graph, "no chart", "BwS", efficiency.df)
    efficiency.df <- add.ci.to.df(bws.cis.graph, "chart", "BwS", efficiency.df)
    
    efficiency.diff.df <- NULL
    efficiency.diff.df <- add.ci.to.df(mean_ill_diff, " ", "ours", efficiency.diff.df)
    efficiency.diff.df <- add.ci.to.df(bws.cis.diff, " ", "BwS", efficiency.diff.df)
    
    # plot
    efficiency.ciplot <- efficacy.belief.ciplot(efficiency.df, efficiency.diff.df)
    efficiency.plot <- drawHorizontalEfficiency(efficiency.ciplot, "Belief in efficacy", 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "belief_in_efficacy_plot.pdf"), 
            efficiency.plot, base_aspect_ratio = 4, base_height = 1.5)
    if (print_plots) 
        print(efficiency.plot)
}

Comprehension question

#### CIs for errors at the comprehension question

# Define error
psick <- drug/control
logodds <- function(p) {
    log(p/(1 - p))
}
logodds.psick <- logodds(psick)
error <- function(n) {
    n[n == 0] <- 0.1
    n[n == 20] <- 19.9
    abs(logodds(n/20) - logodds.psick)
}

# Compute errors
data$error <- error(data$comprehension)
err_nograph <- error(num_nograph)
err_graph <- error(num_graph)

# Use geometric mean to reduce the influence of outliers and increase the
# weight of close-to-exact answers
mean_err_nograph <- geomMeanCI.bootstrap(err_nograph)
mean_err_graph <- geomMeanCI.bootstrap(err_graph)
mean_err_diff <- ratioGeomMeanCI.bootstrap(err_graph, err_nograph)
print.title("Comprehension error")
## 
## ###################################
## # Comprehension error 
## ###################################
cat("Geometric mean error no-chart: ", format_ci(mean_err_nograph), "\n")
## Geometric mean error no-chart:  0.83, CI [0.62, 1.06]
cat("Geometric mean error chart:    ", format_ci(mean_err_graph), "\n")
## Geometric mean error chart:     0.75, CI [0.56, 0.97]
cat("Ratio:                         ", format_ci(mean_err_diff), "\n")
## Ratio:                          0.91, CI [0.63, 1.35]
# Plot raw responses and confidence intervals

comp.hist.no_chart <- histo(num_nograph, 20, "no chart", true_answer, 1)
comp.hist.chart <- histo(num_graph, 20, "chart", true_answer, 1)

# construct the dataframes for plotting
comprehension.df <- NULL
comprehension.df <- add.ci.to.df(mean_err_nograph, "no chart", "ours", comprehension.df)
comprehension.df <- add.ci.to.df(mean_err_graph, "chart", "ours", comprehension.df)

comprehension.diff.df <- NULL
comprehension.diff.df <- add.ci.to.df(mean_err_diff, " ", "ours", comprehension.diff.df)

# plot
comp.cis <- comprehension.ciplot(comprehension.df, comprehension.diff.df)
comp.plot <- drawVerticalComp(comp.hist.no_chart, comp.hist.chart, comp.cis, 
    "Comprehension error", "Error ratio")
if (save_plots) save_plot(paste(sep = "", figure_path, "comprehension_plot.pdf"), 
    comp.plot, base_aspect_ratio = 3, base_height = 2)
if (print_plots) print(comp.plot)

Experiment 3

# Clean up memory
rm(list = ls())

# Experiment filenames
results_file <- "exp3/results"
crowflower_file <- "exp3/crowdflower_data"
invalid_answers_file <- "exp3/invalid_answers"
figure_path <- "plots/exp3-"

# Stimuli values (used to compute comprehension errors)
control <- 83  # % sick people without the drug
drug <- 63  # % sick people with the drug
# compute the true answer given the data
true_answer <- 63/83 * 20

# Values reported by Tal and Wanksink to add to our plots as a comparison
bws_nochart <- 4.66
bws_chart <- 5.75
bws_p <- 0.0066

Planned Analysis

library("plyr")
library("dplyr")

source("CI.helpers.R")
source("misc.helpers.R")
source("plotting functions.R")

# indicated whether we want ggplot to save the generated plots
save_plots <- FALSE
# print plots to screen
print_plots <- TRUE

Load data

# Load experiment data from csv file

if (exists("results_file")) {
    data <- read.csv(paste("data/", results_file, ".csv", sep = ""), sep = ",")
}

# Add crowdflower metadata (if exists)

if (exists("crowflower_file")) {
    crowdsourcedata <- read.csv(paste("data/", crowflower_file, ".csv", sep = ""), 
        sep = ",")
    data$code <- as.character(data$code)
    crowdsourcedata$completion_code <- as.character(crowdsourcedata$completion_code)
    data <- left_join(data, crowdsourcedata, by = c(code = "completion_code"))
    
    # Load country codes
    country_file <- "countries"
    countrydata <- read.csv(paste("data/", country_file, ".csv", sep = ""), 
        sep = ",")
}

# Add mturk metadata (if exists)

if (exists("mturk_file")) {
    crowdsourcedata <- read.csv(paste("data/", mturk_file, ".csv", sep = ""), 
        sep = ",")
    data$code <- as.character(data$code)
    crowdsourcedata$Answer.completion_code <- as.character(crowdsourcedata$Answer.completion_code)
    data <- left_join(data, crowdsourcedata, by = c(code = "Answer.completion_code"))
}

Invalid jobs

# Utility function for printing percentages
percent <- function(x, n) {
    if (x/n < 0.01) 
        r <- 1 else r <- 0
    paste(" (", format_number(100 * x/n, r), "%)", sep = "")
}

print.title("Invalid jobs")
## 
## ###################################
## # Invalid jobs 
## ###################################
data_invalid <- read.csv(paste("data/", invalid_answers_file, ".csv", sep = ""), 
    sep = ",")
n_valid <- nrow(data)
n_invalid <- nrow(data_invalid)
n <- n_valid + n_invalid
cat("Valid jobs:   ", n_valid, "\n")
## Valid jobs:    176
cat("Invalid jobs: ", n_invalid, "\n\n")
## Invalid jobs:  96
# 
n_failed_attention <- nrow(data_invalid[data_invalid$test_question != "common_cold", 
    ])
n_abnormal_time <- nrow(data_invalid[data_invalid$time_taken < 30 | data_invalid$time_taken > 
    15 * 60, ])
n_previous_study <- nrow(data_invalid[data_invalid$previous_study == "yes", 
    ])
cat("Failed attention check:   ", n_failed_attention, percent(n_failed_attention, 
    n), "\n", sep = "")
## Failed attention check:   30 (10%)
cat("Abnormal completion time: ", n_abnormal_time, percent(n_abnormal_time, 
    n), "\n", sep = "")
## Abnormal completion time: 4 (1%)
cat("Did study before:         ", n_previous_study, percent(n_previous_study, 
    n), "\n", sep = "")
## Did study before:         12 (4%)

Sample size, completion time and demographics

print.title("Participants")
## 
## ###################################
## # Participants 
## ###################################
# Sample size

N <- nrow(data)
cat("Total sample size:  ", N, "\n")
## Total sample size:   176
cat("n =", sum(data$condition == "no_graph"), "for the no-chart condition\n")
## n = 88 for the no-chart condition
cat("n =", sum(data$condition == "graph"), "for the chart condition\n\n")
## n = 88 for the chart condition
# Job completion time

mt <- median(data$time_taken)
cat("Median job completion time:", format_number(mt/60, 2), "minutes.\n\n")
## Median job completion time: 2.4 minutes.
# Countries

if (exists("crowflower_file")) {
    data$country <- sapply(data$X_country, function(code) countrydata$name[match(code, 
        countrydata$alpha.3)])
    data$subregion <- sapply(data$X_country, function(code) countrydata$sub.region[match(code, 
        countrydata$alpha.3)])
    data$continent <- sapply(data$X_country, function(code) countrydata$region[match(code, 
        countrydata$alpha.3)])
    cat("Number of different countries:", length(unique(data$country)), "\n")
    print.percents(data$continent)
}
## Number of different countries: 40 
##             n  %
## 1   Europe 89 51
## 2 Americas 63 36
## 3   Africa 10  6
## 4     Asia 10  6
## 5  Oceania  2  1
# Gender distribution

# Column is present in all but experiment 3
if ("gender" %in% colnames(data)) {
    cat("\nGender:\n")
    print.percents(data$gender)
}

# Age

# Column is present in all but experiment 3
if ("age" %in% colnames(data)) {
    cat("\nMean age:", round(mean(data$age)), "\n")
}

# Education

# Column is present in all but experiment 3
if ("education" %in% colnames(data)) {
    cat("\nEducation:\n")
    print.percents(data$education)
}

Analysis of dependent variables

##### Retrieve data

# Extract judgments of drug efficacy (values between 1 and 9).  Experiment 1
# and experiments 2-4 use different question wordings, hence we look at two
# possible columns.

# Column is present in experiment 1, which replicates the question from BwS
# experiment 1
if ("effectiveness" %in% colnames(data)) {
    eff_nograph <- data[data$condition == "no_graph", ]$effectiveness
    eff_graph <- data[data$condition == "graph", ]$effectiveness
    variable_name <- "Perceived effectiveness"
}

# Column is present in experiments 2-4, which replicate the question from
# BwS experiment 2
if ("believe_in_effectiveness" %in% colnames(data)) {
    eff_nograph <- data[data$condition == "no_graph", ]$believe_in_effectiveness
    eff_graph <- data[data$condition == "graph", ]$believe_in_effectiveness
    variable_name <- "Belief in efficacy"
}

# Extract belief in drug from the second question (yes = not effective, no =
# effective).

# Column is present in experiment 1 only.
if ("reduces_sickness" %in% colnames(data)) {
    yesno_to_01 <- function(x) {
        if (x == "yes") 
            1 else if (x == "no") 
            0 else NA
    }
    ill_nograph <- data[data$condition == "no_graph", ]$reduces_sickness
    ill_graph <- data[data$condition == "graph", ]$reduces_sickness
    ill_nograph <- sapply(ill_nograph, yesno_to_01)
    ill_graph <- sapply(ill_graph, yesno_to_01)
}

# Extract answers to comprehension question

num_nograph <- data[data$condition == "no_graph", ]$comprehension
num_graph <- data[data$condition == "graph", ]$comprehension

Belief in efficacy

##### CIs for judgment of efficacy on a 1--9 scale

mean_eff_nograph <- meanCI.bootstrap(eff_nograph)
mean_eff_graph <- meanCI.bootstrap(eff_graph)
mean_eff_diff <- diffMeanCI.bootstrap(eff_graph, eff_nograph)

print.title(variable_name)
## 
## ###################################
## # Belief in efficacy 
## ###################################
cat("Mean efficacy no-chart: ", format_ci(mean_eff_nograph), "\n")
## Mean efficacy no-chart:  6.2, CI [5.9, 6.5]
cat("Mean efficacy chart:    ", format_ci(mean_eff_graph), "\n")
## Mean efficacy chart:     6.3, CI [6.0, 6.6]
cat("Difference:             ", format_ci(mean_eff_diff), "\n")
## Difference:              0.12, CI [-0.35, 0.56]
# NHST version for comparison (used in the original paper, but not part of
# this planned analysis)

ttest <- t.test(eff_graph, eff_nograph)
mean_eff_diff_normal <- c(mean(eff_graph) - mean(eff_nograph), ttest$conf.int[1], 
    ttest$conf.int[2])
cat("(using a t-test)        ", format_ci(mean_eff_diff_normal), "p =", format_number(ttest$p.value), 
    "\n")
## (using a t-test)         0.12, CI [-0.34, 0.59] p = 0.59
# Display distributions of efficacy judgments and confidence intervals for
# estimates

## compile CIs and BwS results into a dataframe for plotting
eff_df <- NULL
eff_df <- add.ci.to.df(mean_eff_nograph, "no chart", "ours", eff_df)
eff_df <- add.ci.to.df(c(bws_nochart, bws_nochart, bws_nochart), "no chart", 
    "BwS", eff_df)
eff_df <- add.ci.to.df(mean_eff_graph, "chart", "ours", eff_df)
eff_df <- add.ci.to.df(c(bws_chart, bws_chart, bws_chart), "chart", "BwS", eff_df)

eff_diff_df <- NULL
eff_diff_df <- add.ci.to.df(mean_eff_diff, " ", "ours")
eff_diff_df <- add.ci.to.df(ci_from_p(bws_chart - bws_nochart, bws_p), " ", 
    "BwS", eff_diff_df)

if (variable_name == "Belief in efficacy") {
    # plotting for experiments 2-4
    eff.hist.no_chart <- histo(eff_nograph, 9, "no chart", 0, 1)
    eff.hist.chart <- histo(eff_graph, 9, "chart", 0, 1)
    
    eff.cis <- perceived.effectiveness.ciplot(eff_df, eff_diff_df)
    
    plot <- drawVertical(eff.hist.no_chart, eff.hist.chart, eff.cis, name.eff.exp24, 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "believe_in_efficacy_plot.pdf"), 
            plot, base_aspect_ratio = 3, base_height = 2)
    if (print_plots) 
        print(plot)
    
} else {
    # plotting for experiment 1
    eff.hist.no_chart <- histo(eff_nograph, 9, "no chart", 0, 1)
    eff.hist.chart <- histo(eff_graph, 9, "chart", 0, 1)
    eff.cis <- perceived.effectiveness.1.ciplot(eff_df, eff_diff_df)
    
    plot <- drawVertical(eff.hist.no_chart, eff.hist.chart, eff.cis, name.eff.exp1, 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "perceived_effectiveness_plot.pdf"), 
            plot, base_aspect_ratio = 3, base_height = 2)
    if (print_plots) 
        print(plot)
}

##### CIs for binary belief in efficacy

# Column is present in experiment 1 only, as subsequent experiments didn't
# include that question.
if ("reduces_sickness" %in% colnames(data)) {
    
    mean_ill_nograph <- propCI(sum(ill_nograph == 1), length(ill_nograph))
    mean_ill_graph <- propCI(sum(ill_graph == 1), length(ill_graph))
    mean_ill_diff <- diffpropCI(sum(ill_graph == 1), length(ill_graph), sum(ill_nograph == 
        1), length(ill_nograph))
    
    print.title("Belief in efficacy (binary)")
    cat("Proportion of 'yes' no-chart: ", format_ci(mean_ill_nograph, percent = TRUE), 
        "\n")
    cat("Proportion of 'yes' chart:    ", format_ci(mean_ill_graph, percent = TRUE), 
        "\n")
    cat("Difference:                   ", format_ci(mean_ill_diff, percent = TRUE), 
        "\n")
    
    # NHST version for comparison (used in the original paper, but not part of
    # this planned analysis)
    
    yes <- c(sum(ill_graph == 1), sum(ill_nograph == 1))
    all <- c(length(ill_graph), length(ill_nograph))
    proptest <- prop.test(yes, all, correct = F)  # disable continuity correction to get the same p-value
    mean_ill_diff_2 <- c(yes[1]/all[1] - yes[2]/all[2], proptest$conf.int[1], 
        proptest$conf.int[2])
    cat("(using an X-squared test)     ", format_ci(mean_ill_diff_2, percent = TRUE), 
        "p =", format_number(proptest$p.value), "\n")
    
    # Plot our results in comparison to the BwS results
    
    # Tal and Wanksink's results
    bws.cis.no_graph <- propCI(21, 31)
    bws.cis.graph <- propCI(28, 29)
    bws.cis.diff <- diffpropCI(28, 29, 21, 31)
    
    # construct the dataframes for plotting
    efficiency.df <- NULL
    efficiency.df <- add.ci.to.df(mean_ill_nograph, "no chart", "ours", efficiency.df)
    efficiency.df <- add.ci.to.df(mean_ill_graph, "chart", "ours", efficiency.df)
    efficiency.df <- add.ci.to.df(bws.cis.no_graph, "no chart", "BwS", efficiency.df)
    efficiency.df <- add.ci.to.df(bws.cis.graph, "chart", "BwS", efficiency.df)
    
    efficiency.diff.df <- NULL
    efficiency.diff.df <- add.ci.to.df(mean_ill_diff, " ", "ours", efficiency.diff.df)
    efficiency.diff.df <- add.ci.to.df(bws.cis.diff, " ", "BwS", efficiency.diff.df)
    
    # plot
    efficiency.ciplot <- efficacy.belief.ciplot(efficiency.df, efficiency.diff.df)
    efficiency.plot <- drawHorizontalEfficiency(efficiency.ciplot, "Belief in efficacy", 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "belief_in_efficacy_plot.pdf"), 
            efficiency.plot, base_aspect_ratio = 4, base_height = 1.5)
    if (print_plots) 
        print(efficiency.plot)
}

Comprehension question

################################################################################ Comprehension question Display distributions of comprehension test
################################################################################ responses

# par(mfrow=c(1,2)) myhist('Number sick - no chart', num_nograph, 0, 20, 15)
# myhist('Number sick - chart', num_graph, 0, 20, 15)

#### CIs for errors at the comprehension question

# Define error
psick <- drug/control
logodds <- function(p) {
    log(p/(1 - p))
}
logodds.psick <- logodds(psick)
error <- function(n) {
    n[n == 0] <- 0.1
    n[n == 20] <- 19.9
    abs(logodds(n/20) - logodds.psick)
}

# Compute errors
data$error <- error(data$comprehension)
err_nograph <- error(num_nograph)
err_graph <- error(num_graph)

# Use geometric mean to reduce the influence of outliers and increase the
# weight of close-to-exact answers
mean_err_nograph <- geomMeanCI.bootstrap(err_nograph)
mean_err_graph <- geomMeanCI.bootstrap(err_graph)
mean_err_diff <- ratioGeomMeanCI.bootstrap(err_graph, err_nograph)
print.title("Comprehension error")
## 
## ###################################
## # Comprehension error 
## ###################################
cat("Geometric mean error no-chart: ", format_ci(mean_err_nograph), "\n")
## Geometric mean error no-chart:  0.76, CI [0.61, 0.94]
cat("Geometric mean error chart:    ", format_ci(mean_err_graph), "\n")
## Geometric mean error chart:     0.59, CI [0.45, 0.77]
cat("Ratio:                         ", format_ci(mean_err_diff), "\n")
## Ratio:                          0.78, CI [0.55, 1.08]
# Plot raw responses and confidence intervals

comp.hist.no_chart <- histo(num_nograph, 20, "no chart", true_answer, 1)
comp.hist.chart <- histo(num_graph, 20, "chart", true_answer, 1)

# construct the dataframes for plotting
comprehension.df <- NULL
comprehension.df <- add.ci.to.df(mean_err_nograph, "no chart", "ours", comprehension.df)
comprehension.df <- add.ci.to.df(mean_err_graph, "chart", "ours", comprehension.df)

comprehension.diff.df <- NULL
comprehension.diff.df <- add.ci.to.df(mean_err_diff, " ", "ours", comprehension.diff.df)

# plot
comp.cis <- comprehension.ciplot(comprehension.df, comprehension.diff.df)
comp.plot <- drawVerticalComp(comp.hist.no_chart, comp.hist.chart, comp.cis, 
    "Comprehension error", "Error ratio")
if (save_plots) save_plot(paste(sep = "", figure_path, "comprehension_plot.pdf"), 
    comp.plot, base_aspect_ratio = 3, base_height = 2)
if (print_plots) print(comp.plot)

Additional Analysis

Bias in responses

cat("Mean response no-chart: ", format_ci(meanCI.bootstrap(num_nograph), sigdigits = 3), 
    "\n")
## Mean response no-chart:  11.3, CI [10.2, 12.2]
cat("Mean response chart:    ", format_ci(meanCI.bootstrap(num_graph), sigdigits = 3), 
    "\n")
## Mean response chart:     11.8, CI [10.7, 12.7]
cat("Mean difference:        ", format_ci(diffMeanCI.bootstrap(num_graph, num_nograph)), 
    "\n")
## Mean difference:         0.48, CI [-0.91, 1.85]

Experiment 4

# Clean up memory
rm(list = ls())

# Experiment filenames
results_file <- "exp4/results"
mturk_file <- "exp4/mturk"
invalid_answers_file <- "exp4/invalid_answers"
figure_path <- "plots/exp4-"

# Stimuli values (used to compute comprehension errors)
control <- 83  # % sick people without the drug
drug <- 63  # % sick people with the drug
# compute the true answer given the data
true_answer <- 63/83 * 20

# Values reported by Tal and Wanksink to add to our plots as a comparison
bws_nochart <- 4.66
bws_chart <- 5.75
bws_p <- 0.0066

Planned Analysis

library("plyr")
library("dplyr")

source("CI.helpers.R")
source("misc.helpers.R")
source("plotting functions.R")

# indicated whether we want ggplot to save the generated plots
save_plots <- FALSE
# print plots to screen
print_plots <- TRUE

Load data

# Load experiment data from csv file

if (exists("results_file")) {
    data <- read.csv(paste("data/", results_file, ".csv", sep = ""), sep = ",")
}

# Add crowdflower metadata (if exists)

if (exists("crowflower_file")) {
    crowdsourcedata <- read.csv(paste("data/", crowflower_file, ".csv", sep = ""), 
        sep = ",")
    data$code <- as.character(data$code)
    crowdsourcedata$completion_code <- as.character(crowdsourcedata$completion_code)
    data <- left_join(data, crowdsourcedata, by = c(code = "completion_code"))
    
    # Load country codes
    country_file <- "countries"
    countrydata <- read.csv(paste("data/", country_file, ".csv", sep = ""), 
        sep = ",")
}

# Add mturk metadata (if exists)

if (exists("mturk_file")) {
    crowdsourcedata <- read.csv(paste("data/", mturk_file, ".csv", sep = ""), 
        sep = ",")
    data$code <- as.character(data$code)
    crowdsourcedata$Answer.completion_code <- as.character(crowdsourcedata$Answer.completion_code)
    data <- left_join(data, crowdsourcedata, by = c(code = "Answer.completion_code"))
}

Invalid jobs

# Utility function for printing percentages
percent <- function(x, n) {
    if (x/n < 0.01) 
        r <- 1 else r <- 0
    paste(" (", format_number(100 * x/n, r), "%)", sep = "")
}

data_invalid <- read.csv(paste("data/", invalid_answers_file, ".csv", sep = ""), 
    sep = ",")
n_valid <- nrow(data)
n_invalid <- nrow(data_invalid)
n <- n_valid + n_invalid
cat("Valid jobs:   ", n_valid, "\n")
## Valid jobs:    160
cat("Invalid jobs: ", n_invalid, "\n\n")
## Invalid jobs:  29
# 
n_failed_attention <- nrow(data_invalid[data_invalid$test_question != "common_cold", 
    ])
n_abnormal_time <- nrow(data_invalid[data_invalid$time_taken < 30 | data_invalid$time_taken > 
    15 * 60, ])
n_previous_study <- nrow(data_invalid[data_invalid$previous_study == "yes", 
    ])
cat("Failed attention check:   ", n_failed_attention, percent(n_failed_attention, 
    n), "\n", sep = "")
## Failed attention check:   6 (3%)
cat("Abnormal completion time: ", n_abnormal_time, percent(n_abnormal_time, 
    n), "\n", sep = "")
## Abnormal completion time: 0 (0%)
cat("Did study before:         ", n_previous_study, percent(n_previous_study, 
    n), "\n", sep = "")
## Did study before:         0 (0%)

Sample size, completion time and demographics

print.title("Participants")
## 
## ###################################
## # Participants 
## ###################################
# Sample size

N <- nrow(data)
cat("Total sample size:  ", N, "\n")
## Total sample size:   160
cat("n =", sum(data$condition == "no_graph"), "for the no-chart condition\n")
## n = 80 for the no-chart condition
cat("n =", sum(data$condition == "graph"), "for the chart condition\n\n")
## n = 80 for the chart condition
# Job completion time

mt <- median(data$time_taken)
cat("Median job completion time:", format_number(mt/60, 2), "minutes.\n\n")
## Median job completion time: 1.7 minutes.
# Countries

if (exists("crowflower_file")) {
    data$country <- sapply(data$X_country, function(code) countrydata$name[match(code, 
        countrydata$alpha.3)])
    data$subregion <- sapply(data$X_country, function(code) countrydata$sub.region[match(code, 
        countrydata$alpha.3)])
    data$continent <- sapply(data$X_country, function(code) countrydata$region[match(code, 
        countrydata$alpha.3)])
    cat("Number of different countries:", length(unique(data$country)), "\n")
    print.percents(data$continent)
}

# Gender distribution

# Column is present in all but experiment 3
if ("gender" %in% colnames(data)) {
    cat("\nGender:\n")
    print.percents(data$gender)
}
## 
## Gender:
##              n  %
## 1      male 89 56
## 2    female 70 44
## 3 no_answer  1  1
# Age

# Column is present in all but experiment 3
if ("age" %in% colnames(data)) {
    cat("\nMean age:", round(mean(data$age)), "\n")
}
## 
## Mean age: 37
# Education

# Column is present in all but experiment 3
if ("education" %in% colnames(data)) {
    cat("\nEducation:\n")
    print.percents(data$education)
}
## 
## Education:
##                    n  %
## 1 college_or_more 77 48
## 2    some_college 60 38
## 3     high-school 23 14

Analysis of dependent variables

##### Retrieve data

# Extract judgments of drug efficacy (values between 1 and 9).  Experiment 1
# and experiments 2-4 use different question wordings, hence we look at two
# possible columns.

# Column is present in experiment 1, which replicates the question from BwS
# experiment 1
if ("effectiveness" %in% colnames(data)) {
    eff_nograph <- data[data$condition == "no_graph", ]$effectiveness
    eff_graph <- data[data$condition == "graph", ]$effectiveness
    variable_name <- "Perceived effectiveness"
}

# Column is present in experiments 2-4, which replicate the question from
# BwS experiment 2
if ("believe_in_effectiveness" %in% colnames(data)) {
    eff_nograph <- data[data$condition == "no_graph", ]$believe_in_effectiveness
    eff_graph <- data[data$condition == "graph", ]$believe_in_effectiveness
    variable_name <- "Belief in efficacy"
}

# Extract belief in drug from the second question (yes = not effective, no =
# effective).

# Column is present in experiment 1 only.
if ("reduces_sickness" %in% colnames(data)) {
    yesno_to_01 <- function(x) {
        if (x == "yes") 
            1 else if (x == "no") 
            0 else NA
    }
    ill_nograph <- data[data$condition == "no_graph", ]$reduces_sickness
    ill_graph <- data[data$condition == "graph", ]$reduces_sickness
    ill_nograph <- sapply(ill_nograph, yesno_to_01)
    ill_graph <- sapply(ill_graph, yesno_to_01)
}

# Extract answers to comprehension question

num_nograph <- data[data$condition == "no_graph", ]$comprehension
num_graph <- data[data$condition == "graph", ]$comprehension

Belief in Efficacy

##### CIs for judgment of efficacy on a 1--9 scale

mean_eff_nograph <- meanCI.bootstrap(eff_nograph)
mean_eff_graph <- meanCI.bootstrap(eff_graph)
mean_eff_diff <- diffMeanCI.bootstrap(eff_graph, eff_nograph)

print.title(variable_name)
## 
## ###################################
## # Belief in efficacy 
## ###################################
cat("Mean efficacy no-chart: ", format_ci(mean_eff_nograph), "\n")
## Mean efficacy no-chart:  6.8, CI [6.6, 7.1]
cat("Mean efficacy chart:    ", format_ci(mean_eff_graph), "\n")
## Mean efficacy chart:     5.9, CI [5.5, 6.3]
cat("Difference:             ", format_ci(mean_eff_diff), "\n")
## Difference:              -0.92, CI [-1.42, -0.50]
# NHST version for comparison (used in the original paper, but not part of
# this planned analysis)

ttest <- t.test(eff_graph, eff_nograph)
mean_eff_diff_normal <- c(mean(eff_graph) - mean(eff_nograph), ttest$conf.int[1], 
    ttest$conf.int[2])
cat("(using a t-test)        ", format_ci(mean_eff_diff_normal), "p =", format_number(ttest$p.value), 
    "\n")
## (using a t-test)         -0.92, CI [-1.39, -0.46] p = 0.00013
# Display distributions of efficacy judgments and confidence intervals for
# estimates

## compile CIs and BwS results into a dataframe for plotting
eff_df <- NULL
eff_df <- add.ci.to.df(mean_eff_nograph, "no chart", "ours", eff_df)
eff_df <- add.ci.to.df(c(bws_nochart, bws_nochart, bws_nochart), "no chart", 
    "BwS", eff_df)
eff_df <- add.ci.to.df(mean_eff_graph, "chart", "ours", eff_df)
eff_df <- add.ci.to.df(c(bws_chart, bws_chart, bws_chart), "chart", "BwS", eff_df)

eff_diff_df <- NULL
eff_diff_df <- add.ci.to.df(mean_eff_diff, " ", "ours")
eff_diff_df <- add.ci.to.df(ci_from_p(bws_chart - bws_nochart, bws_p), " ", 
    "BwS", eff_diff_df)

if (variable_name == "Belief in efficacy") {
    # plotting for experiments 2-4
    eff.hist.no_chart <- histo(eff_nograph, 9, "no chart", 0, 1)
    eff.hist.chart <- histo(eff_graph, 9, "chart", 0, 1)
    
    eff.cis <- perceived.effectiveness.ciplot(eff_df, eff_diff_df)
    
    plot <- drawVertical(eff.hist.no_chart, eff.hist.chart, eff.cis, name.eff.exp24, 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "believe_in_efficacy_plot.pdf"), 
            plot, base_aspect_ratio = 3, base_height = 2)
    if (print_plots) 
        print(plot)
    
} else {
    # plotting for experiment 1
    eff.hist.no_chart <- histo(eff_nograph, 9, "no chart", 0, 1)
    eff.hist.chart <- histo(eff_graph, 9, "chart", 0, 1)
    eff.cis <- perceived.effectiveness.1.ciplot(eff_df, eff_diff_df)
    
    plot <- drawVertical(eff.hist.no_chart, eff.hist.chart, eff.cis, name.eff.exp1, 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "perceived_effectiveness_plot.pdf"), 
            plot, base_aspect_ratio = 3, base_height = 2)
    if (print_plots) 
        print(plot)
}

##### CIs for binary belief in efficacy

# Column is present in experiment 1 only, as subsequent experiments didn't
# include that question.
if ("reduces_sickness" %in% colnames(data)) {
    
    mean_ill_nograph <- propCI(sum(ill_nograph == 1), length(ill_nograph))
    mean_ill_graph <- propCI(sum(ill_graph == 1), length(ill_graph))
    mean_ill_diff <- diffpropCI(sum(ill_graph == 1), length(ill_graph), sum(ill_nograph == 
        1), length(ill_nograph))
    
    print.title("Belief in efficacy (binary)")
    cat("Proportion of 'yes' no-chart: ", format_ci(mean_ill_nograph, percent = TRUE), 
        "\n")
    cat("Proportion of 'yes' chart:    ", format_ci(mean_ill_graph, percent = TRUE), 
        "\n")
    cat("Difference:                   ", format_ci(mean_ill_diff, percent = TRUE), 
        "\n")
    
    # NHST version for comparison (used in the original paper, but not part of
    # this planned analysis)
    
    yes <- c(sum(ill_graph == 1), sum(ill_nograph == 1))
    all <- c(length(ill_graph), length(ill_nograph))
    proptest <- prop.test(yes, all, correct = F)  # disable continuity correction to get the same p-value
    mean_ill_diff_2 <- c(yes[1]/all[1] - yes[2]/all[2], proptest$conf.int[1], 
        proptest$conf.int[2])
    cat("(using an X-squared test)     ", format_ci(mean_ill_diff_2, percent = TRUE), 
        "p =", format_number(proptest$p.value), "\n")
    
    # Plot our results in comparison to the BwS results
    
    # Tal and Wanksink's results
    bws.cis.no_graph <- propCI(21, 31)
    bws.cis.graph <- propCI(28, 29)
    bws.cis.diff <- diffpropCI(28, 29, 21, 31)
    
    # construct the dataframes for plotting
    efficiency.df <- NULL
    efficiency.df <- add.ci.to.df(mean_ill_nograph, "no chart", "ours", efficiency.df)
    efficiency.df <- add.ci.to.df(mean_ill_graph, "chart", "ours", efficiency.df)
    efficiency.df <- add.ci.to.df(bws.cis.no_graph, "no chart", "BwS", efficiency.df)
    efficiency.df <- add.ci.to.df(bws.cis.graph, "chart", "BwS", efficiency.df)
    
    efficiency.diff.df <- NULL
    efficiency.diff.df <- add.ci.to.df(mean_ill_diff, " ", "ours", efficiency.diff.df)
    efficiency.diff.df <- add.ci.to.df(bws.cis.diff, " ", "BwS", efficiency.diff.df)
    
    # plot
    efficiency.ciplot <- efficacy.belief.ciplot(efficiency.df, efficiency.diff.df)
    efficiency.plot <- drawHorizontalEfficiency(efficiency.ciplot, "Belief in efficacy", 
        "Difference")
    if (save_plots) 
        save_plot(paste(sep = "", figure_path, "belief_in_efficacy_plot.pdf"), 
            efficiency.plot, base_aspect_ratio = 4, base_height = 1.5)
    if (print_plots) 
        print(efficiency.plot)
}

Comprehension question

# Display distributions of comprehension test responses

# par(mfrow=c(1,2)) myhist('Number sick - no chart', num_nograph, 0, 20, 15)
# myhist('Number sick - chart', num_graph, 0, 20, 15)

#### CIs for errors at the comprehension question

# Define error
psick <- drug/control
logodds <- function(p) {
    log(p/(1 - p))
}
logodds.psick <- logodds(psick)
error <- function(n) {
    n[n == 0] <- 0.1
    n[n == 20] <- 19.9
    abs(logodds(n/20) - logodds.psick)
}

# Compute errors
data$error <- error(data$comprehension)
err_nograph <- error(num_nograph)
err_graph <- error(num_graph)

# Use geometric mean to reduce the influence of outliers and increase the
# weight of close-to-exact answers
mean_err_nograph <- geomMeanCI.bootstrap(err_nograph)
mean_err_graph <- geomMeanCI.bootstrap(err_graph)
mean_err_diff <- ratioGeomMeanCI.bootstrap(err_graph, err_nograph)
print.title("Comprehension error")
## 
## ###################################
## # Comprehension error 
## ###################################
cat("Geometric mean error no-chart: ", format_ci(mean_err_nograph), "\n")
## Geometric mean error no-chart:  0.47, CI [0.36, 0.60]
cat("Geometric mean error chart:    ", format_ci(mean_err_graph), "\n")
## Geometric mean error chart:     0.46, CI [0.35, 0.59]
cat("Ratio:                         ", format_ci(mean_err_diff), "\n")
## Ratio:                          0.97, CI [0.69, 1.42]
# Plot raw responses and confidence intervals

comp.hist.no_chart <- histo(num_nograph, 20, "no chart", true_answer, 1)
comp.hist.chart <- histo(num_graph, 20, "chart", true_answer, 1)

# construct the dataframes for plotting
comprehension.df <- NULL
comprehension.df <- add.ci.to.df(mean_err_nograph, "no chart", "ours", comprehension.df)
comprehension.df <- add.ci.to.df(mean_err_graph, "chart", "ours", comprehension.df)

comprehension.diff.df <- NULL
comprehension.diff.df <- add.ci.to.df(mean_err_diff, " ", "ours", comprehension.diff.df)

# plot
comp.cis <- comprehension.ciplot(comprehension.df, comprehension.diff.df)
comp.plot <- drawVerticalComp(comp.hist.no_chart, comp.hist.chart, comp.cis, 
    "Comprehension error", "Error ratio")
if (save_plots) save_plot(paste(sep = "", figure_path, "comprehension_plot.pdf"), 
    comp.plot, base_aspect_ratio = 3, base_height = 2)
if (print_plots) print(comp.plot)

Extra planned analysis

# Remove NAs for belief in science (not sure how this happened)
data_clean <- data[!is.na(data$belief_in_science), ]

# Break down by condition
nograph <- data_clean[data_clean$condition == "no_graph", ]
graph <- data_clean[data_clean$condition == "graph", ]

# Datasets with positively polarized people removed
nograph_nopp <- nograph[nograph$belief_in_medicine < 7, ]
graph_nopp <- graph[graph$belief_in_medicine < 7, ]

# Keep the removed people to plot separately
nograph_removed <- nograph[nograph$belief_in_medicine >= 7, ]
graph_removed <- graph[graph$belief_in_medicine >= 7, ]

# Function for computing and plotting the regression
fit <- function(title, x, xs, y, ys, x_ignored = NULL, y_ignored = NULL) {
    coeff_bigger = 1.2
    exponent = 0.5
    AA = xyTable(x, y)
    plot(AA$x, AA$y, main = title, xlab = xs, ylab = ys, xlim = c(1, 9), xaxp = c(1, 
        9, 8), ylim = c(1, 9), yaxp = c(1, 9, 8), yaxs = "r", type = "n")
    
    fit <- lm(y ~ x)
    clip(min(AA$x) - 0.5, max(AA$x) + 0.5, 0, 10)
    abline(fit, lty = 1)
    clip(0, 10, 0, 10)
    confint(fit)
    
    points(AA$x, AA$y, cex = (AA$number)^exponent * coeff_bigger, col = rgb(0, 
        0, 0, 0.2), pch = 16)
    text(AA$x, AA$y, ifelse(AA$number > 1, AA$number, ""), col = rgb(0, 0, 0, 
        0.5))
    if (length(x_ignored) > 0) {
        AA = xyTable(x_ignored, y_ignored)
        points(AA$x, AA$y, cex = (AA$number)^exponent * coeff_bigger, col = rgb(0, 
            0, 0, 0.2), pch = 1)
        text(AA$x, AA$y, ifelse(AA$number > 1, AA$number, ""), col = rgb(0, 
            0, 0, 0.3))
    }
}

# Interaction between belief in science and chart persuasiveness (Fig 16
# left part)
layout(matrix(c(1, 2, 3, 4), 1, 4, byrow = T), widths = c(2.5, 2.5, 2.5, 2.5), 
    heights = c(2.5))
fit("No chart", nograph$belief_in_science, "Belief in science", nograph$believe_in_effectiveness, 
    "Belief in efficacy")
fit("Chart", graph$belief_in_science, "Belief in science", graph$believe_in_effectiveness, 
    "Belief in efficacy")

# Interaction between polarization and chart persuasiveness (Fig 16 right
# part)

fit("No chart", nograph_nopp$belief_in_medicine, "Polarization", nograph_nopp$believe_in_effectiveness, 
    "Belief in efficacy", nograph_removed$belief_in_medicine, nograph_removed$believe_in_effectiveness)
fit("Chart", graph_nopp$belief_in_medicine, "Polarization", graph_nopp$believe_in_effectiveness, 
    "Belief in efficacy", graph_removed$belief_in_medicine, graph_removed$believe_in_effectiveness)

Left group: interaction betweeen belief in science and chart persuasiveness Right group: interaction between polarization and chart persuasiveness

Regression analysis

# Difference between slopes: belief in science and chart persuasiveness

cat("Difference between slopes -- belief in science:      ", format_ci(diff.slopes.bootstrap(nograph$belief_in_science, 
    nograph$believe_in_effectiveness, graph$belief_in_science, graph$believe_in_effectiveness)), 
    "\n")
## Difference between slopes -- belief in science:       -0.052, CI [-0.61, 0.37]
cat("Difference between slopes -- polarization:           ", format_ci(diff.slopes.bootstrap(nograph_nopp$belief_in_medicine, 
    nograph_nopp$believe_in_effectiveness, graph_nopp$belief_in_medicine, graph_nopp$believe_in_effectiveness)), 
    "\n")
## Warning in norm.inter(t, adj.alpha): extreme order statistics used as
## endpoints
## Difference between slopes -- polarization:            0.33, CI [0.31, 0.75]

Meta Analysis

# Clean up memory
rm(list = ls())

library("dplyr")
library("plyr")
library("bootES")

source("CI.helpers.R")
source("misc.helpers.R")

# Number of replicates to use in bootstrapping. The higher the more accurate
# the results. Warning: a large R can yield very lengthy computations.
R = 5000

######################################################################## Load data

# Read data from all 4 experiments

data1 <- read.csv(paste("data/exp1/results.csv", sep = ""), sep = ",")
data2 <- read.csv(paste("data/exp2/results.csv", sep = ""), sep = ",")
data3 <- read.csv(paste("data/exp3/results.csv", sep = ""), sep = ",")
data4 <- read.csv(paste("data/exp4/results.csv", sep = ""), sep = ",")

# Compute the total number of observations

N <- nrow(data1) + nrow(data2) + nrow(data3) + nrow(data4)
cat("Total sample size: N =", N, "\n\n")
## Total sample size: N = 623
######################################################################## Retrieve observations of interest

# Efficacy judgement data for all 4 experiments

eff1_nograph <- data1[data1$condition == "no_graph", ]$effectiveness
eff2_nograph <- data2[data2$condition == "no_graph", ]$believe_in_effectiveness
eff3_nograph <- data3[data3$condition == "no_graph", ]$believe_in_effectiveness
eff4_nograph <- data4[data4$condition == "no_graph", ]$believe_in_effectiveness
eff1_graph <- data1[data1$condition == "graph", ]$effectiveness
eff2_graph <- data2[data2$condition == "graph", ]$believe_in_effectiveness
eff3_graph <- data3[data3$condition == "graph", ]$believe_in_effectiveness
eff4_graph <- data4[data4$condition == "graph", ]$believe_in_effectiveness

# Comprehension error data for all 4 experiments

logodds <- function(p) {
    log(p/(1 - p))
}
error <- function(n, control, drug) {
    n[n == 0] <- 0.1
    n[n == 20] <- 19.9
    abs(logodds(n/20) - logodds(drug/control))
}
error1 <- function(n) {
    error(n, 87, 47)
}  # For exp 1
error2 <- function(n) {
    error(n, 83, 63)
}  # For exp 2,3,4

err1_nograph <- error1(data1[data1$condition == "no_graph", ]$comprehension)
err2_nograph <- error2(data2[data2$condition == "no_graph", ]$comprehension)
err3_nograph <- error2(data3[data3$condition == "no_graph", ]$comprehension)
err4_nograph <- error2(data4[data4$condition == "no_graph", ]$comprehension)
err1_graph <- error1(data1[data1$condition == "graph", ]$comprehension)
err2_graph <- error2(data2[data2$condition == "graph", ]$comprehension)
err3_graph <- error2(data3[data3$condition == "graph", ]$comprehension)
err4_graph <- error2(data4[data4$condition == "graph", ]$comprehension)

######################################################################## Compute standardized effect sizes and their CI for each experiment

# Convenience function that computes, prints and returns the standardized
# effect size for the mean difference between two independent samples, with
# its 95% BCa bootstrap CI.

printEffectSize <- function(exp_name, data_nograph, data_graph) {
    data <- data.frame(obs = c(data_nograph, data_graph), group = c(rep("nograph", 
        length(data_nograph)), rep("graph", length(data_graph))))
    set.seed(0)  # make deterministic
    b <- bootES(data, R = R, effect.type = "cohens.d", data.col = "obs", group.col = "group", 
        contrast = c(nograph = 1, graph = -1))
    pointEstimate <- b[[1]]
    bootci <- boot.ci(b, type = "bca", conf = conf.level)
    cat("Effect size for ", exp_name, ": ", format_ci(c(pointEstimate, bootci$bca[4], 
        bootci$bca[5])), "\n", sep = "")
    return(data.frame(estimate = pointEstimate, ci.low = bootci$bca[4], ci.high = bootci$bca[5]))
}

# Note: the exact point and interval estimates obtained here may differ
# slightly from the paper because in the previous version, the random seed
# was not reset before each call to bootES. Use a large value of R to get
# more accurate estimates.

exp1.belief <- printEffectSize("exp 1 belief     ", eff1_graph, eff1_nograph)
## Effect size for exp 1 belief     : -0.13, CI [-0.47, 0.22]
exp2.belief <- printEffectSize("exp 2 belief     ", eff2_graph, eff2_nograph)
## Effect size for exp 2 belief     : -0.15, CI [-0.45, 0.15]
exp3.belief <- printEffectSize("exp 3 belief     ", eff3_graph, eff3_nograph)
## Effect size for exp 3 belief     : 0.081, CI [-0.22, 0.38]
exp4.belief <- printEffectSize("exp 4 belief     ", eff4_graph, eff4_nograph)
## Effect size for exp 4 belief     : -0.62, CI [-0.91, -0.33]
cat("\n")
exp1.error <- printEffectSize("exp 1 log error  ", log(err1_nograph), log(err1_graph))
## Effect size for exp 1 log error  : 0.38, CI [0.0084, 0.75]
exp2.error <- printEffectSize("exp 2 log error  ", log(err2_nograph), log(err2_graph))
## Effect size for exp 2 log error  : 0.075, CI [-0.25, 0.38]
exp3.error <- printEffectSize("exp 3 log error  ", log(err3_nograph), log(err3_graph))
## Effect size for exp 3 log error  : 0.22, CI [-0.078, 0.52]
exp4.error <- printEffectSize("exp 4 log error  ", log(err4_nograph), log(err4_graph))
## Effect size for exp 4 log error  : 0.022, CI [-0.30, 0.32]
cat("\n")
######################################################################## Compute the aggregated standardized effect size and its CI

# Custom function that performs a contrast weighted by sample size to
# compute a standardized effect size for all 4 experiments together, and its
# 95% BCa bootstrap CI.  ng1: no-graph condition, experiment 1 ng2: no-graph
# condition, experiment 2 ng3: no-graph condition, experiment 3 ng4:
# no-graph condition, experiment 4 g1: graph condition, experiment 1 g2:
# graph condition, experiment 2 g3: graph condition, experiment 3 g4: graph
# condition, experiment 4

printMetaEffectSize <- function(exp, ng1, ng2, ng3, ng4, g1, g2, g3, g4) {
    
    # Create custom data frame to hold all the data
    data <- data.frame(obs = c(ng1, ng2, ng3, ng4, g1, g2, g3, g4), group = c(rep("ng1", 
        length(ng1)), rep("ng2", length(ng2)), rep("ng3", length(ng3)), rep("ng4", 
        length(ng4)), rep("g1", length(g1)), rep("g2", length(g2)), rep("g3", 
        length(g3)), rep("g4", length(g4))))
    
    # Set weights for the contrast
    n_ng = length(ng1) + length(ng2) + length(ng3) + length(ng4)  # total sample size for no-graph
    n_g = length(g1) + length(g2) + length(g3) + length(g4)  # total sample size for graph
    w_ng1 = length(ng1)/n_ng
    w_ng2 = length(ng2)/n_ng
    w_ng3 = length(ng3)/n_ng
    w_ng4 = length(ng4)/n_ng
    w_g1 = length(g1)/n_g
    w_g2 = length(g2)/n_g
    w_g3 = length(g3)/n_g
    w_g4 = length(g4)/n_g
    # print(w_ng1+w_ng2+w_ng3-w_g1-w_g2-w_g3) # should be zero
    
    set.seed(0)  # make deterministic
    b <- bootES(data, R = R, effect.type = "cohens.d", data.col = "obs", group.col = "group", 
        scale.weights = T, contrast = c(ng1 = w_ng1, ng2 = w_ng2, ng3 = w_ng3, 
            ng4 = w_ng4, g1 = -w_g1, g2 = -w_g2, g3 = -w_g3, g4 = -w_g4))
    pointEstimate <- b[[1]]
    bootci <- boot.ci(b, type = "bca", conf = conf.level)
    cat("Effect size for ", exp, ": ", format_ci(c(pointEstimate, bootci$bca[4], 
        bootci$bca[5])), "\n", sep = "")
    return(data.frame(estimate = pointEstimate, ci.low = bootci$bca[4], ci.high = bootci$bca[5]))
}

# Note: the exact point and interval estimates obtained here may differ
# slightly from the paper because in the previous version, the random seed
# was not reset before each call to bootES. Use a large value of R to get
# more accurate estimates.

meta.belief <- printMetaEffectSize("all belief     ", eff1_graph, eff2_graph, 
    eff3_graph, eff4_graph, eff1_nograph, eff2_nograph, eff3_nograph, eff4_nograph)
## Effect size for all belief     : -0.20, CI [-0.35, -0.040]
cat("\n")
meta.error <- printMetaEffectSize("all log error  ", log(err1_nograph), log(err2_nograph), 
    log(err3_nograph), log(err4_nograph), log(err1_graph), log(err2_graph), 
    log(err3_graph), log(err4_graph))
## Effect size for all log error  : 0.15, CI [-0.0022, 0.31]
library('ggplot2')
library("gridExtra")
library("cowplot")
source("CI.helpers.R")
source("misc.helpers.R")


scale.min <- -0.92
scale.max <- 0.82
effect.size.indicator.color <- "black"



meta.ci.df <- NULL
meta.ci.df <- add.ci.to.df(exp1.belief, "belief", "EXP1", meta.ci.df)
meta.ci.df <- add.ci.to.df(exp1.error, "error", "EXP1", meta.ci.df)
meta.ci.df <- add.ci.to.df(exp2.belief, "belief", "EXP2", meta.ci.df)
meta.ci.df <- add.ci.to.df(exp2.error, "error", "EXP2", meta.ci.df)
meta.ci.df <- add.ci.to.df(exp3.belief, "belief", "EXP3", meta.ci.df)
meta.ci.df <- add.ci.to.df(exp3.error, "error", "EXP3", meta.ci.df)
meta.ci.df <- add.ci.to.df(exp4.belief, "belief", "EXP4", meta.ci.df)
meta.ci.df <- add.ci.to.df(exp4.error, "error", "EXP4", meta.ci.df)
meta.ci.df <- add.ci.to.df(meta.belief, "belief", "ALL", meta.ci.df)
meta.ci.df <- add.ci.to.df(meta.error, "error", "ALL", meta.ci.df)




citheme <- theme_bw(base_family = "Open Sans") +
  theme( # remove the vertical grid lines
    plot.background = element_blank(),
    plot.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "pt"),
    legend.position = "none", #c(0.2,0.88),
    legend.title = element_blank(),
    legend.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "pt"),
    legend.text = element_text(size=8),
    legend.key.size = unit(c(3.5, 3.5),'mm'),
    legend.key.width = unit(c(0.3, 0.3),'cm'),
    axis.title.y = element_blank(),
    axis.title.x = element_blank(),
    axis.text.y = element_blank(),
    axis.text.x = element_text(size=10, colour="#888888", margin = margin(1,1.5,0,0,"mm")),
    panel.spacing = unit(0, "lines"),
    panel.border = element_blank(), 
    axis.line = element_blank(),
    axis.ticks = element_blank(),
    axis.line.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_line(size=0.2, color="#BBBBBB"),
    panel.grid.major.y = element_blank(),
    strip.background = element_blank(),
    strip.text.y = element_text(angle = 180, margin = margin(1,-7,0,0,"mm"))
  )

condFatten <- function(x,...) {
  ifelse(x=="ALL", 1.3, 1)
}

condShape <- function(x,...) {
  ifelse(x=="ALL", 5, 16)
}

meta.ciplot <- function(df, scale = T) {

  ratioPlot <- ggplot(data= df) +
    expand_limits( y=c(scale.min, scale.max)) +
    citheme +
    geom_hline(yintercept = -0.8, color = effect.size.indicator.color, linetype = "dotted") +
    geom_hline(yintercept = -0.5, color = effect.size.indicator.color, linetype = "dotted") +
    geom_hline(yintercept = -0.2, color = effect.size.indicator.color, linetype = "dotted") +
    geom_hline(yintercept = 0, color = "black") +
    geom_hline(yintercept = 0.2, color = effect.size.indicator.color, linetype = "dotted") +
    geom_hline(yintercept = 0.5, color = effect.size.indicator.color, linetype = "dotted") +
    geom_hline(yintercept = 0.8, color = effect.size.indicator.color, linetype = "dotted") +
    coord_flip() +
    geom_pointrange(aes(y = estimate, x=reorder(name, order), ymin= ci.low, ymax = ci.high, color = name), size = condFatten(df$study), fatten = 2, shape= condShape(df$study)) +
    scale_colour_manual(name = 'belief in blue', values = setNames(c('#2b83ba', "black", "black", "black"),c("belief", "error", "meta", "afake"))) +
      geom_path(data=data.frame(name=c("error", "error", "belief", "belief"), estimate = c(0.75, 0.82, 0.75, 0.82), study= c("ALL", "ALL", "ALL", "ALL")), aes(x = name, y = estimate, color=name), arrow=arrow(angle = 25, length = unit(3, "mm"), type="closed")) +
    facet_grid(study ~ ., space= "free_x", switch = "y") +
    labs(x=NULL, y=NULL) +
    theme(axis.text.y = element_blank()) +
    scale_y_continuous(breaks= seq(from=-0.8, to=0.8, by= 0.2), labels = seq(from=-0.8, to=0.8, by= 0.2), limits = c(scale.min, scale.max))
  if (!scale) {
    ratioPlot <- ratioPlot + theme(axis.text.x = element_blank() )
  }
  return(ratioPlot)
}

metaplot <- meta.ciplot(meta.ci.df)

finalplot <- ggdraw() + 
  draw_plot(metaplot, 0, 0, 1, 0.86) +
  draw_label("large", 0.115, 0.95, vjust=1, hjust = 0.5, size = 10) +
  draw_label("medium", 0.27, 0.95, vjust=1, hjust = 0.5, size = 10) +
  draw_label("small", 0.425, 0.95, vjust=1, hjust = 0.5, size = 10) +
  draw_label("no\neffect", 0.53, 0.98, vjust=1, hjust = 0.5, size = 10) +
  draw_label("small", 0.635, 0.95, vjust=1, hjust = 0.5, size = 10) +
  draw_label("medium", 0.79, 0.95, vjust=1, hjust = 0.5, size = 10) +
  draw_label("large", 0.95, 0.95, vjust=1, hjust = 0.5, size = 10) +
 draw_label("Charts help\nunderstand", 0.91, 0.06, vjust = -1, hjust = 1, size = 11, fontface = "bold") +
  draw_label("Charts persuade", 0.91, 0.05, vjust = -1, hjust = 1, size = 11, colour = '#2b83ba', fontface = "bold") 
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database

## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family 'Open Sans' not found in PostScript font database
print(finalplot)