Load the data
library(dplyr)
library(ggplot2)
library(lme4)
library(broom)
library(jtools)
plot_estimates <- function(glmod, title=NULL,
subtitle=NULL, estimate_name="odds"){
vline_intercept = 0
(glmod
%>% tidy(conf.int = T)
%>% mutate(is_significant=p.value < 0.05)
-> coefs)
if (family(glmod)$family %in% c("binomial", "Gamma")){
(mutate_at(
coefs,
vars("estimate","conf.low","conf.high"),
~exp(.))
%>% mutate(std.error=std.error*estimate)
%>% mutate(term=str_remove(term, " TRUE"))
-> coefs)
vline_intercept = 1
}
if(exists('group', where=coefs))
coefs %<>% filter(group == "fixed")
(coefs
%>% ggcoef(sort = "ascending",
vline_intercept = vline_intercept,
mapping = aes_string(y = "term", x = "estimate", color="is_significant")
)
+ scale_color_manual(values=c("black", "#F4511E"))
+ scale_y_discrete(name="Variable")
+ scale_x_continuous(name=sprintf("Estimate (%s)", estimate_name))
+ theme_minimal()
+ theme(legend.position = "none")
+ ggtitle(title, subtitle = subtitle))
}
Trials <- read.csv("trials.csv")
Targets <- read.csv("target-metadata.csv")
Trials <- left_join(Trials, Targets, by=c("task", "stimuli"))
Trials$features_pretty <- factor(Trials$features_pretty,
levels = c("baseline-deficient", "+position", "+size", "+color", "+speed",
"+direction", "+area increase", "baseline-charged",
"-position", "-size", "-color", "-speed",
"-direction", "-area increase"),
ordered = T)
Model Fitting
Correctness - Speed
g_speed <- glmer(correct ~
position +
color +
size +
direction +
size_increase +
(1|subject) +
(1|scene),
data=filter(Trials, task=="speed"),
family="binomial")
summary(g_speed)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: binomial ( logit )
Formula: correct ~ position + color + size + direction + size_increase + (1 | subject) + (1 | scene)
Data: filter(Trials, task == "speed")
AIC BIC logLik deviance df.resid
2498.7 2545.0 -1241.3 2482.7 2392
Scaled residuals:
Min 1Q Median 3Q Max
-6.7437 -0.6043 0.2306 0.5945 9.7967
Random effects:
Groups Name Variance Std.Dev.
subject (Intercept) 0.8610 0.9279
scene (Intercept) 0.6222 0.7888
Number of obs: 2400, groups: subject, 61; scene, 10
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.45948 0.29661 -4.920 8.63e-07 ***
position TRUE 1.06561 0.11327 9.408 < 2e-16 ***
color TRUE 0.66738 0.11415 5.847 5.02e-09 ***
size TRUE 0.08019 0.11943 0.671 0.502
direction TRUE 1.54940 0.11403 13.588 < 2e-16 ***
size_increase TRUE -0.07778 0.12126 -0.641 0.521
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) psTRUE clTRUE szTRUE drTRUE
positinTRUE -0.100
color TRUE -0.088 -0.075
size TRUE -0.048 -0.161 -0.183
directnTRUE -0.111 -0.015 -0.052 -0.146
sz_ncrsTRUE -0.042 -0.186 -0.209 -0.268 -0.174
plot_estimates(g_speed, "Speed Task")
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector

Correctness - Direction
g_direction <- glmer(correct ~
position +
color +
size +
speed +
size_increase +
(1|subject) +
(1|scene),
data=filter(Trials, task=="direction"),
family="binomial")
summary(g_direction)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: binomial ( logit )
Formula: correct ~ position + color + size + speed + size_increase + (1 | subject) + (1 | scene)
Data: filter(Trials, task == "direction")
AIC BIC logLik deviance df.resid
2440.5 2486.8 -1212.3 2424.5 2392
Scaled residuals:
Min 1Q Median 3Q Max
-4.0627 -0.5641 -0.2490 0.6116 8.2269
Random effects:
Groups Name Variance Std.Dev.
subject (Intercept) 1.6255 1.2749
scene (Intercept) 0.3044 0.5517
Number of obs: 2400, groups: subject, 61; scene, 10
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.17183 0.26879 -8.080 6.47e-16 ***
position TRUE 1.16983 0.11665 10.029 < 2e-16 ***
color TRUE 0.36758 0.11728 3.134 0.001724 **
size TRUE 0.44090 0.11707 3.766 0.000166 ***
speed TRUE 0.97281 0.11629 8.366 < 2e-16 ***
size_increase TRUE -0.06062 0.12104 -0.501 0.616465
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) psTRUE clTRUE szTRUE spTRUE
positinTRUE -0.137
color TRUE -0.090 -0.120
size TRUE -0.095 -0.115 -0.163
speed TRUE -0.127 -0.060 -0.131 -0.124
sz_ncrsTRUE -0.056 -0.176 -0.215 -0.210 -0.183
plot_estimates(g_direction, "Direction Task")
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector

Number of views (Plays)
Trials %>%
mutate(num_trials = factor(num_trials)) %>%
group_by(task, features_pretty, correct, num_trials) %>%
summarise(count=n(), group=group[1]) %>%
ungroup() %>%
mutate(num_trials = as.numeric(num_trials)) %>%
arrange(task, features_pretty) %>%
ggplot(aes(x=num_trials, y=count, color=group, linetype=correct)) +
geom_line(size=1.2) +
scale_x_continuous(breaks = c(1,2,3)) +
scale_linetype_manual(values=c("twodash", "solid")) +
ylab("Count") +
xlab("Number of Views") +
facet_grid(task~features_pretty) +
theme_minimal() +
theme(strip.text = element_text(angle = 90, hjust = 0)) +
theme(legend.position = "bottom")

Which features mislead?
scale_this <- function(x){(x - mean(x, na.rm=TRUE)) / sd(x, na.rm=TRUE)}
Scenes = read.csv("scenes.csv")
IncorrectSelections =
filter(Scenes, name < 50, count > 0) %>%
mutate(task = ifelse(str_detect(stimulus, "speed"), "speed", "direction")) %>%
rename(speed=change)
ScaledIncorrectSelections = mutate_at(IncorrectSelections,
vars(matches('saliency|speed')),
scale_this) %>%
mutate(direction = scale_this(abs(mean(theta)-theta)))
Error count - Speed
g_errors_speed = glm(count ~ speed +
(saliency_direction +
saliency_color +
saliency_xy1 +
saliency_xy2 +
saliency_size)*speed,
data=filter(ScaledIncorrectSelections, task=="speed"),
family=Gamma(link = 'log'))
summary(g_errors_speed)
Call:
glm(formula = count ~ speed + (saliency_direction + saliency_color +
saliency_xy1 + saliency_xy2 + saliency_size) * speed, family = Gamma(link = "log"),
data = filter(ScaledIncorrectSelections, task == "speed"))
Deviance Residuals:
Min 1Q Median 3Q Max
-0.98964 -0.37617 -0.14822 0.09759 2.06064
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.40990 0.02356 17.399 < 2e-16 ***
speed 0.23589 0.02495 9.453 < 2e-16 ***
saliency_direction 0.17222 0.02182 7.893 1.25e-14 ***
saliency_color 0.04042 0.02264 1.785 0.07470 .
saliency_xy1 0.11038 0.03662 3.015 0.00267 **
saliency_xy2 0.08845 0.03734 2.369 0.01815 *
saliency_size 0.05002 0.02403 2.082 0.03777 *
speed:saliency_direction 0.11026 0.02365 4.662 3.80e-06 ***
speed:saliency_color 0.01700 0.02184 0.778 0.43668
speed:saliency_xy1 0.07753 0.03860 2.009 0.04498 *
speed:saliency_xy2 0.01533 0.03855 0.398 0.69108
speed:saliency_size 0.02078 0.02351 0.884 0.37719
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for Gamma family taken to be 0.3189045)
Null deviance: 245.67 on 664 degrees of freedom
Residual deviance: 154.99 on 653 degrees of freedom
AIC: 1379.6
Number of Fisher Scoring iterations: 7
plot_estimates(g_errors_speed,
title = "Errors",
subtitle = "Speed task",
estimate_name = "count")

Error count - Direction
g_errors_direction = glm(count ~ direction +
(saliency_speed+
saliency_color +
saliency_xy1 +
saliency_xy2 +
saliency_size)*direction,
data=filter(ScaledIncorrectSelections, task=="direction"),
family=Gamma(link = 'log'))
summary(g_errors_direction)
Call:
glm(formula = count ~ direction + (saliency_speed + saliency_color +
saliency_xy1 + saliency_xy2 + saliency_size) * direction,
family = Gamma(link = "log"), data = filter(ScaledIncorrectSelections,
task == "direction"))
Deviance Residuals:
Min 1Q Median 3Q Max
-1.0219 -0.4208 -0.1653 0.1909 1.6568
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.439153 0.019882 22.088 < 2e-16 ***
direction 0.271668 0.021539 12.613 < 2e-16 ***
saliency_speed 0.113799 0.017853 6.374 3.01e-10 ***
saliency_color 0.062911 0.020942 3.004 0.002742 **
saliency_xy1 0.015986 0.036124 0.443 0.658220
saliency_xy2 0.219453 0.035706 6.146 1.22e-09 ***
saliency_size -0.031221 0.018622 -1.677 0.093985 .
direction:saliency_speed -0.002005 0.020260 -0.099 0.921191
direction:saliency_color 0.042631 0.020231 2.107 0.035391 *
direction:saliency_xy1 0.153920 0.036695 4.195 3.02e-05 ***
direction:saliency_xy2 -0.131537 0.035738 -3.681 0.000247 ***
direction:saliency_size 0.035604 0.021198 1.680 0.093400 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for Gamma family taken to be 0.2961768)
Null deviance: 319.72 on 864 degrees of freedom
Residual deviance: 203.51 on 853 degrees of freedom
AIC: 1875.9
Number of Fisher Scoring iterations: 8
plot_estimates(g_errors_direction,
title = "Errors",
subtitle = "Direction task",
estimate_name = "count")

rbind(
interact_plot(g_errors_speed, "speed", "saliency_direction")$data
%>% mutate(variable="saliency_direction"),
interact_plot(g_errors_speed, "speed", "saliency_color")$data
%>% mutate(variable="saliency_color"),
interact_plot(g_errors_speed, "speed", "saliency_xy1")$data
%>% mutate(variable="saliency_xy1")
) %>%
ggplot(aes(x=speed, y=count, color=modx_group, linetype=modx_group)) +
geom_line(size=.85) +
scale_color_brewer(
palette="Blues",
breaks=c("+ 1 SD", "Mean", "- 1 SD"),
direction=-1) +
facet_wrap(~ variable) +
theme_minimal() +
theme(legend.title = element_blank(),
legend.position = "bottom") +
ggtitle(
"Errors - Significant interaction terms",
"Speed task")
invalid factor level, NA generatedinvalid factor level, NA generated

rbind(
interact_plot(g_errors_direction, "direction", "saliency_xy1")$data
%>% mutate(variable="saliency_xy1"),
interact_plot(g_errors_direction, "direction", "saliency_color")$data
%>% mutate(variable="saliency_color"),
interact_plot(g_errors_direction, "direction", "saliency_xy2")$data
%>% mutate(variable="saliency_xy2")
) %>%
ggplot(aes(x=direction, y=count, color=modx_group, linetype=modx_group)) +
geom_line(size=.85) +
scale_color_brewer(
palette="Blues",
breaks=c("+ 1 SD", "Mean", "- 1 SD"),
direction=-1) +
facet_wrap(~ variable) +
theme_minimal() +
theme(legend.title = element_blank(),
legend.position = "bottom") +
ggtitle("Errors - Significant interaction terms",
"Direction task")
invalid factor level, NA generatedinvalid factor level, NA generated

---
title: "Analysis - Saliency Deficit and Motion Outlier Detection in Animated Scatterplots"
output: html_notebook
---

## Load the _data_

```{r}
library(dplyr)
library(ggplot2)
library(lme4)
library(broom)
library(jtools)

plot_estimates <- function(glmod, title=NULL, 
  subtitle=NULL, estimate_name="odds"){
  
    vline_intercept = 0
  
    (glmod
      %>% tidy(conf.int = T)
      %>% mutate(is_significant=p.value < 0.05) 
      -> coefs)
    
    if (family(glmod)$family %in% c("binomial", "Gamma")){
      (mutate_at( 
        coefs,
        vars("estimate","conf.low","conf.high"), 
        ~exp(.))
      %>% mutate(std.error=std.error*estimate)
      %>% mutate(term=str_remove(term, " TRUE"))
      ->  coefs)
      
      vline_intercept = 1
    }
    
    if(exists('group', where=coefs))
      coefs %<>% filter(group == "fixed")

    (coefs
    %>% ggcoef(sort = "ascending",
      vline_intercept = vline_intercept,
      mapping = aes_string(y = "term", x = "estimate", color="is_significant")
    )
    + scale_color_manual(values=c("black", "#F4511E"))
    + scale_y_discrete(name="Variable")
    + scale_x_continuous(name=sprintf("Estimate (%s)", estimate_name))
    + theme_minimal()
    + theme(legend.position = "none")
    + ggtitle(title, subtitle = subtitle))
}



```


```{r echo=TRUE}
Trials  <- read.csv("trials.csv")
Targets <- read.csv("target-metadata.csv")
Trials  <- left_join(Trials, Targets, by=c("task", "stimuli"))
```

```{r}
Trials$features_pretty <- factor(Trials$features_pretty,
  levels = c("baseline-deficient", "+position", "+size", "+color", "+speed",
    "+direction", "+area increase", "baseline-charged", 
    "-position", "-size", "-color", "-speed",
    "-direction", "-area increase"),
  ordered = T)
```

## Calculate Accuracy

```{r}
Accuracy <- Trials %>%
  group_by(task, group, features, stimuli) %>%
  summarise(accuracy = sum(correct)/n(),
    n = n(),
    features_pretty = features_pretty[1],
    scene = scene[1]) %>%
  ungroup()
```

## Plot Accuracy x Task x Condition


```{r}
ggplot(Accuracy) +
  geom_boxplot(aes(x=features_pretty, y=accuracy, fill=group)) + 
  ylab("Accuracy") +
  xlab("Condition") +
  ggtitle("Tasks") +
  facet_wrap(~task) +
  theme(axis.text.x=element_text(angle = -90, hjust = 0))
```

## Model Fitting

### Correctness - Speed

```{r}
g_speed <- glmer(correct ~ 
    position      + 
    color         + 
    size          + 
    direction     + 
    size_increase + 
    (1|subject)   + 
    (1|scene),
  data=filter(Trials, task=="speed"),
  family="binomial")

summary(g_speed)
plot_estimates(g_speed, "Speed Task")
```


### Correctness - Direction

```{r}
g_direction <- glmer(correct ~ 
    position      + 
    color         + 
    size          + 
    speed         + 
    size_increase + 
    (1|subject)   + 
    (1|scene),
  data=filter(Trials, task=="direction"),
  family="binomial")

summary(g_direction)
plot_estimates(g_direction, "Direction Task")
```

### Number of views (Plays)

```{r}
Trials %>%
  mutate(num_trials = factor(num_trials)) %>%
  group_by(task, features_pretty, correct, num_trials) %>%
  summarise(count=n(), group=group[1]) %>%
  ungroup() %>%
  mutate(num_trials = as.numeric(num_trials)) %>%
  arrange(task, features_pretty) %>%
  ggplot(aes(x=num_trials, y=count, color=group, linetype=correct)) +
  geom_line(size=1.2) +
  scale_x_continuous(breaks = c(1,2,3)) +
  scale_linetype_manual(values=c("twodash", "solid")) +
  ylab("Count") +
  xlab("Number of Views") +
  facet_grid(task~features_pretty) +
  theme_minimal() +
  theme(strip.text = element_text(angle = 90, hjust = 0)) +
  theme(legend.position = "bottom")
```


### Which features mislead?

```{r}
scale_this <- function(x){(x - mean(x, na.rm=TRUE)) / sd(x, na.rm=TRUE)}
Scenes = read.csv("scenes.csv")
IncorrectSelections = 
  filter(Scenes, name < 50, count > 0) %>%
  mutate(task = ifelse(str_detect(stimulus, "speed"), "speed", "direction")) %>%
  rename(speed=change)

ScaledIncorrectSelections = mutate_at(IncorrectSelections,
  vars(matches('saliency|speed')),
  scale_this) %>%
  mutate(direction = scale_this(abs(mean(theta)-theta)))

```

### Error count - Speed

```{r}
g_errors_speed = glm(count ~ speed + 
    (saliency_direction + 
        saliency_color + 
        saliency_xy1 + 
        saliency_xy2 +
        saliency_size)*speed,
  data=filter(ScaledIncorrectSelections, task=="speed"),
  family=Gamma(link = 'log'))
summary(g_errors_speed)
```


```{r}
plot_estimates(g_errors_speed, 
  title = "Errors",
  subtitle = "Speed task",
  estimate_name = "count")
```

### Error count - Direction

```{r}
g_errors_direction = glm(count ~ direction + 
    (saliency_speed+
        saliency_color + 
        saliency_xy1 + 
        saliency_xy2 + 
        saliency_size)*direction,
  data=filter(ScaledIncorrectSelections, task=="direction"),
  family=Gamma(link = 'log'))

summary(g_errors_direction)
```

```{r}
plot_estimates(g_errors_direction, 
  title = "Errors",
  subtitle = "Direction task",
  estimate_name = "count")
```

```{r}
rbind(
  interact_plot(g_errors_speed, "speed", "saliency_direction")$data
    %>% mutate(variable="saliency_direction"),
  interact_plot(g_errors_speed, "speed", "saliency_color")$data
    %>% mutate(variable="saliency_color"),
  interact_plot(g_errors_speed, "speed", "saliency_xy1")$data
    %>% mutate(variable="saliency_xy1")
) %>%
  ggplot(aes(x=speed, y=count, color=modx_group, linetype=modx_group)) +
    geom_line(size=.85) +
    scale_color_brewer(
      palette="Blues", 
      breaks=c("+ 1 SD", "Mean", "- 1 SD"),
      direction=-1) +
    facet_wrap(~ variable) +
    theme_minimal() +
    theme(legend.title = element_blank(),
      legend.position = "bottom") +
    ggtitle(
      "Errors - Significant interaction terms", 
      "Speed task")
```

```{r}
rbind(
  interact_plot(g_errors_direction, "direction", "saliency_xy1")$data
  %>% mutate(variable="saliency_xy1"),
  interact_plot(g_errors_direction, "direction", "saliency_color")$data
  %>% mutate(variable="saliency_color"),
  interact_plot(g_errors_direction, "direction", "saliency_xy2")$data
  %>% mutate(variable="saliency_xy2")
) %>%
  ggplot(aes(x=direction, y=count, color=modx_group, linetype=modx_group)) +
  geom_line(size=.85) +
  scale_color_brewer(
    palette="Blues", 
    breaks=c("+ 1 SD", "Mean", "- 1 SD"),
    direction=-1) +
  facet_wrap(~ variable) +
  theme_minimal() +
  theme(legend.title = element_blank(),
    legend.position = "bottom") +
  ggtitle("Errors - Significant interaction terms", 
    "Direction task")
```


