Load the data

library(dplyr)
library(ggplot2)
library(lme4)
library(broom)
library(jtools)
plot_estimates <- function(glmod, title=NULL, 
  subtitle=NULL, estimate_name="odds"){
  
    vline_intercept = 0
  
    (glmod
      %>% tidy(conf.int = T)
      %>% mutate(is_significant=p.value < 0.05) 
      -> coefs)
    
    if (family(glmod)$family %in% c("binomial", "Gamma")){
      (mutate_at( 
        coefs,
        vars("estimate","conf.low","conf.high"), 
        ~exp(.))
      %>% mutate(std.error=std.error*estimate)
      %>% mutate(term=str_remove(term, " TRUE"))
      ->  coefs)
      
      vline_intercept = 1
    }
    
    if(exists('group', where=coefs))
      coefs %<>% filter(group == "fixed")
    (coefs
    %>% ggcoef(sort = "ascending",
      vline_intercept = vline_intercept,
      mapping = aes_string(y = "term", x = "estimate", color="is_significant")
    )
    + scale_color_manual(values=c("black", "#F4511E"))
    + scale_y_discrete(name="Variable")
    + scale_x_continuous(name=sprintf("Estimate (%s)", estimate_name))
    + theme_minimal()
    + theme(legend.position = "none")
    + ggtitle(title, subtitle = subtitle))
}
Trials  <- read.csv("trials.csv")
Targets <- read.csv("target-metadata.csv")
Trials  <- left_join(Trials, Targets, by=c("task", "stimuli"))
Trials$features_pretty <- factor(Trials$features_pretty,
  levels = c("baseline-deficient", "+position", "+size", "+color", "+speed",
    "+direction", "+area increase", "baseline-charged", 
    "-position", "-size", "-color", "-speed",
    "-direction", "-area increase"),
  ordered = T)

Calculate Accuracy

Accuracy <- Trials %>%
  group_by(task, group, features, stimuli) %>%
  summarise(accuracy = sum(correct)/n(),
    n = n(),
    features_pretty = features_pretty[1],
    scene = scene[1]) %>%
  ungroup()

Plot Accuracy x Task x Condition

ggplot(Accuracy) +
  geom_boxplot(aes(x=features_pretty, y=accuracy, fill=group)) + 
  ylab("Accuracy") +
  xlab("Condition") +
  ggtitle("Tasks") +
  facet_wrap(~task) +
  theme(axis.text.x=element_text(angle = -90, hjust = 0))

Model Fitting

Correctness - Speed

g_speed <- glmer(correct ~ 
    position      + 
    color         + 
    size          + 
    direction     + 
    size_increase + 
    (1|subject)   + 
    (1|scene),
  data=filter(Trials, task=="speed"),
  family="binomial")
summary(g_speed)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: correct ~ position + color + size + direction + size_increase +      (1 | subject) + (1 | scene)
   Data: filter(Trials, task == "speed")

     AIC      BIC   logLik deviance df.resid 
  2498.7   2545.0  -1241.3   2482.7     2392 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-6.7437 -0.6043  0.2306  0.5945  9.7967 

Random effects:
 Groups  Name        Variance Std.Dev.
 subject (Intercept) 0.8610   0.9279  
 scene   (Intercept) 0.6222   0.7888  
Number of obs: 2400, groups:  subject, 61; scene, 10

Fixed effects:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)        -1.45948    0.29661  -4.920 8.63e-07 ***
position TRUE       1.06561    0.11327   9.408  < 2e-16 ***
color TRUE          0.66738    0.11415   5.847 5.02e-09 ***
size TRUE           0.08019    0.11943   0.671    0.502    
direction TRUE      1.54940    0.11403  13.588  < 2e-16 ***
size_increase TRUE -0.07778    0.12126  -0.641    0.521    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) psTRUE clTRUE szTRUE drTRUE
positinTRUE -0.100                            
color TRUE  -0.088 -0.075                     
size TRUE   -0.048 -0.161 -0.183              
directnTRUE -0.111 -0.015 -0.052 -0.146       
sz_ncrsTRUE -0.042 -0.186 -0.209 -0.268 -0.174
plot_estimates(g_speed, "Speed Task")
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector

Correctness - Direction

g_direction <- glmer(correct ~ 
    position      + 
    color         + 
    size          + 
    speed         + 
    size_increase + 
    (1|subject)   + 
    (1|scene),
  data=filter(Trials, task=="direction"),
  family="binomial")
summary(g_direction)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: correct ~ position + color + size + speed + size_increase + (1 |      subject) + (1 | scene)
   Data: filter(Trials, task == "direction")

     AIC      BIC   logLik deviance df.resid 
  2440.5   2486.8  -1212.3   2424.5     2392 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.0627 -0.5641 -0.2490  0.6116  8.2269 

Random effects:
 Groups  Name        Variance Std.Dev.
 subject (Intercept) 1.6255   1.2749  
 scene   (Intercept) 0.3044   0.5517  
Number of obs: 2400, groups:  subject, 61; scene, 10

Fixed effects:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)        -2.17183    0.26879  -8.080 6.47e-16 ***
position TRUE       1.16983    0.11665  10.029  < 2e-16 ***
color TRUE          0.36758    0.11728   3.134 0.001724 ** 
size TRUE           0.44090    0.11707   3.766 0.000166 ***
speed TRUE          0.97281    0.11629   8.366  < 2e-16 ***
size_increase TRUE -0.06062    0.12104  -0.501 0.616465    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) psTRUE clTRUE szTRUE spTRUE
positinTRUE -0.137                            
color TRUE  -0.090 -0.120                     
size TRUE   -0.095 -0.115 -0.163              
speed TRUE  -0.127 -0.060 -0.131 -0.124       
sz_ncrsTRUE -0.056 -0.176 -0.215 -0.210 -0.183
plot_estimates(g_direction, "Direction Task")
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector

Number of views (Plays)

Trials %>%
  mutate(num_trials = factor(num_trials)) %>%
  group_by(task, features_pretty, correct, num_trials) %>%
  summarise(count=n(), group=group[1]) %>%
  ungroup() %>%
  mutate(num_trials = as.numeric(num_trials)) %>%
  arrange(task, features_pretty) %>%
  ggplot(aes(x=num_trials, y=count, color=group, linetype=correct)) +
  geom_line(size=1.2) +
  scale_x_continuous(breaks = c(1,2,3)) +
  scale_linetype_manual(values=c("twodash", "solid")) +
  ylab("Count") +
  xlab("Number of Views") +
  facet_grid(task~features_pretty) +
  theme_minimal() +
  theme(strip.text = element_text(angle = 90, hjust = 0)) +
  theme(legend.position = "bottom")

Which features mislead?

scale_this <- function(x){(x - mean(x, na.rm=TRUE)) / sd(x, na.rm=TRUE)}
Scenes = read.csv("scenes.csv")
IncorrectSelections = 
  filter(Scenes, name < 50, count > 0) %>%
  mutate(task = ifelse(str_detect(stimulus, "speed"), "speed", "direction")) %>%
  rename(speed=change)
ScaledIncorrectSelections = mutate_at(IncorrectSelections,
  vars(matches('saliency|speed')),
  scale_this) %>%
  mutate(direction = scale_this(abs(mean(theta)-theta)))

Error count - Speed

g_errors_speed = glm(count ~ speed + 
    (saliency_direction + 
        saliency_color + 
        saliency_xy1 + 
        saliency_xy2 +
        saliency_size)*speed,
  data=filter(ScaledIncorrectSelections, task=="speed"),
  family=Gamma(link = 'log'))
summary(g_errors_speed)

Call:
glm(formula = count ~ speed + (saliency_direction + saliency_color + 
    saliency_xy1 + saliency_xy2 + saliency_size) * speed, family = Gamma(link = "log"), 
    data = filter(ScaledIncorrectSelections, task == "speed"))

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-0.98964  -0.37617  -0.14822   0.09759   2.06064  

Coefficients:
                         Estimate Std. Error t value Pr(>|t|)    
(Intercept)               0.40990    0.02356  17.399  < 2e-16 ***
speed                     0.23589    0.02495   9.453  < 2e-16 ***
saliency_direction        0.17222    0.02182   7.893 1.25e-14 ***
saliency_color            0.04042    0.02264   1.785  0.07470 .  
saliency_xy1              0.11038    0.03662   3.015  0.00267 ** 
saliency_xy2              0.08845    0.03734   2.369  0.01815 *  
saliency_size             0.05002    0.02403   2.082  0.03777 *  
speed:saliency_direction  0.11026    0.02365   4.662 3.80e-06 ***
speed:saliency_color      0.01700    0.02184   0.778  0.43668    
speed:saliency_xy1        0.07753    0.03860   2.009  0.04498 *  
speed:saliency_xy2        0.01533    0.03855   0.398  0.69108    
speed:saliency_size       0.02078    0.02351   0.884  0.37719    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for Gamma family taken to be 0.3189045)

    Null deviance: 245.67  on 664  degrees of freedom
Residual deviance: 154.99  on 653  degrees of freedom
AIC: 1379.6

Number of Fisher Scoring iterations: 7
plot_estimates(g_errors_speed, 
  title = "Errors",
  subtitle = "Speed task",
  estimate_name = "count")

Error count - Direction

g_errors_direction = glm(count ~ direction + 
    (saliency_speed+
        saliency_color + 
        saliency_xy1 + 
        saliency_xy2 + 
        saliency_size)*direction,
  data=filter(ScaledIncorrectSelections, task=="direction"),
  family=Gamma(link = 'log'))
summary(g_errors_direction)

Call:
glm(formula = count ~ direction + (saliency_speed + saliency_color + 
    saliency_xy1 + saliency_xy2 + saliency_size) * direction, 
    family = Gamma(link = "log"), data = filter(ScaledIncorrectSelections, 
        task == "direction"))

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.0219  -0.4208  -0.1653   0.1909   1.6568  

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               0.439153   0.019882  22.088  < 2e-16 ***
direction                 0.271668   0.021539  12.613  < 2e-16 ***
saliency_speed            0.113799   0.017853   6.374 3.01e-10 ***
saliency_color            0.062911   0.020942   3.004 0.002742 ** 
saliency_xy1              0.015986   0.036124   0.443 0.658220    
saliency_xy2              0.219453   0.035706   6.146 1.22e-09 ***
saliency_size            -0.031221   0.018622  -1.677 0.093985 .  
direction:saliency_speed -0.002005   0.020260  -0.099 0.921191    
direction:saliency_color  0.042631   0.020231   2.107 0.035391 *  
direction:saliency_xy1    0.153920   0.036695   4.195 3.02e-05 ***
direction:saliency_xy2   -0.131537   0.035738  -3.681 0.000247 ***
direction:saliency_size   0.035604   0.021198   1.680 0.093400 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for Gamma family taken to be 0.2961768)

    Null deviance: 319.72  on 864  degrees of freedom
Residual deviance: 203.51  on 853  degrees of freedom
AIC: 1875.9

Number of Fisher Scoring iterations: 8
plot_estimates(g_errors_direction, 
  title = "Errors",
  subtitle = "Direction task",
  estimate_name = "count")

rbind(
  interact_plot(g_errors_speed, "speed", "saliency_direction")$data
    %>% mutate(variable="saliency_direction"),
  interact_plot(g_errors_speed, "speed", "saliency_color")$data
    %>% mutate(variable="saliency_color"),
  interact_plot(g_errors_speed, "speed", "saliency_xy1")$data
    %>% mutate(variable="saliency_xy1")
) %>%
  ggplot(aes(x=speed, y=count, color=modx_group, linetype=modx_group)) +
    geom_line(size=.85) +
    scale_color_brewer(
      palette="Blues", 
      breaks=c("+ 1 SD", "Mean", "- 1 SD"),
      direction=-1) +
    facet_wrap(~ variable) +
    theme_minimal() +
    theme(legend.title = element_blank(),
      legend.position = "bottom") +
    ggtitle(
      "Errors - Significant interaction terms", 
      "Speed task")
invalid factor level, NA generatedinvalid factor level, NA generated

rbind(
  interact_plot(g_errors_direction, "direction", "saliency_xy1")$data
  %>% mutate(variable="saliency_xy1"),
  interact_plot(g_errors_direction, "direction", "saliency_color")$data
  %>% mutate(variable="saliency_color"),
  interact_plot(g_errors_direction, "direction", "saliency_xy2")$data
  %>% mutate(variable="saliency_xy2")
) %>%
  ggplot(aes(x=direction, y=count, color=modx_group, linetype=modx_group)) +
  geom_line(size=.85) +
  scale_color_brewer(
    palette="Blues", 
    breaks=c("+ 1 SD", "Mean", "- 1 SD"),
    direction=-1) +
  facet_wrap(~ variable) +
  theme_minimal() +
  theme(legend.title = element_blank(),
    legend.position = "bottom") +
  ggtitle("Errors - Significant interaction terms", 
    "Direction task")
invalid factor level, NA generatedinvalid factor level, NA generated

---
title: "Analysis - Saliency Deficit and Motion Outlier Detection in Animated Scatterplots"
output: html_notebook
---

## Load the _data_

```{r}
library(dplyr)
library(ggplot2)
library(lme4)
library(broom)
library(jtools)

plot_estimates <- function(glmod, title=NULL, 
  subtitle=NULL, estimate_name="odds"){
  
    vline_intercept = 0
  
    (glmod
      %>% tidy(conf.int = T)
      %>% mutate(is_significant=p.value < 0.05) 
      -> coefs)
    
    if (family(glmod)$family %in% c("binomial", "Gamma")){
      (mutate_at( 
        coefs,
        vars("estimate","conf.low","conf.high"), 
        ~exp(.))
      %>% mutate(std.error=std.error*estimate)
      %>% mutate(term=str_remove(term, " TRUE"))
      ->  coefs)
      
      vline_intercept = 1
    }
    
    if(exists('group', where=coefs))
      coefs %<>% filter(group == "fixed")

    (coefs
    %>% ggcoef(sort = "ascending",
      vline_intercept = vline_intercept,
      mapping = aes_string(y = "term", x = "estimate", color="is_significant")
    )
    + scale_color_manual(values=c("black", "#F4511E"))
    + scale_y_discrete(name="Variable")
    + scale_x_continuous(name=sprintf("Estimate (%s)", estimate_name))
    + theme_minimal()
    + theme(legend.position = "none")
    + ggtitle(title, subtitle = subtitle))
}



```


```{r echo=TRUE}
Trials  <- read.csv("trials.csv")
Targets <- read.csv("target-metadata.csv")
Trials  <- left_join(Trials, Targets, by=c("task", "stimuli"))
```

```{r}
Trials$features_pretty <- factor(Trials$features_pretty,
  levels = c("baseline-deficient", "+position", "+size", "+color", "+speed",
    "+direction", "+area increase", "baseline-charged", 
    "-position", "-size", "-color", "-speed",
    "-direction", "-area increase"),
  ordered = T)
```

## Calculate Accuracy

```{r}
Accuracy <- Trials %>%
  group_by(task, group, features, stimuli) %>%
  summarise(accuracy = sum(correct)/n(),
    n = n(),
    features_pretty = features_pretty[1],
    scene = scene[1]) %>%
  ungroup()
```

## Plot Accuracy x Task x Condition


```{r}
ggplot(Accuracy) +
  geom_boxplot(aes(x=features_pretty, y=accuracy, fill=group)) + 
  ylab("Accuracy") +
  xlab("Condition") +
  ggtitle("Tasks") +
  facet_wrap(~task) +
  theme(axis.text.x=element_text(angle = -90, hjust = 0))
```

## Model Fitting

### Correctness - Speed

```{r}
g_speed <- glmer(correct ~ 
    position      + 
    color         + 
    size          + 
    direction     + 
    size_increase + 
    (1|subject)   + 
    (1|scene),
  data=filter(Trials, task=="speed"),
  family="binomial")

summary(g_speed)
plot_estimates(g_speed, "Speed Task")
```


### Correctness - Direction

```{r}
g_direction <- glmer(correct ~ 
    position      + 
    color         + 
    size          + 
    speed         + 
    size_increase + 
    (1|subject)   + 
    (1|scene),
  data=filter(Trials, task=="direction"),
  family="binomial")

summary(g_direction)
plot_estimates(g_direction, "Direction Task")
```

### Number of views (Plays)

```{r}
Trials %>%
  mutate(num_trials = factor(num_trials)) %>%
  group_by(task, features_pretty, correct, num_trials) %>%
  summarise(count=n(), group=group[1]) %>%
  ungroup() %>%
  mutate(num_trials = as.numeric(num_trials)) %>%
  arrange(task, features_pretty) %>%
  ggplot(aes(x=num_trials, y=count, color=group, linetype=correct)) +
  geom_line(size=1.2) +
  scale_x_continuous(breaks = c(1,2,3)) +
  scale_linetype_manual(values=c("twodash", "solid")) +
  ylab("Count") +
  xlab("Number of Views") +
  facet_grid(task~features_pretty) +
  theme_minimal() +
  theme(strip.text = element_text(angle = 90, hjust = 0)) +
  theme(legend.position = "bottom")
```


### Which features mislead?

```{r}
scale_this <- function(x){(x - mean(x, na.rm=TRUE)) / sd(x, na.rm=TRUE)}
Scenes = read.csv("scenes.csv")
IncorrectSelections = 
  filter(Scenes, name < 50, count > 0) %>%
  mutate(task = ifelse(str_detect(stimulus, "speed"), "speed", "direction")) %>%
  rename(speed=change)

ScaledIncorrectSelections = mutate_at(IncorrectSelections,
  vars(matches('saliency|speed')),
  scale_this) %>%
  mutate(direction = scale_this(abs(mean(theta)-theta)))

```

### Error count - Speed

```{r}
g_errors_speed = glm(count ~ speed + 
    (saliency_direction + 
        saliency_color + 
        saliency_xy1 + 
        saliency_xy2 +
        saliency_size)*speed,
  data=filter(ScaledIncorrectSelections, task=="speed"),
  family=Gamma(link = 'log'))
summary(g_errors_speed)
```


```{r}
plot_estimates(g_errors_speed, 
  title = "Errors",
  subtitle = "Speed task",
  estimate_name = "count")
```

### Error count - Direction

```{r}
g_errors_direction = glm(count ~ direction + 
    (saliency_speed+
        saliency_color + 
        saliency_xy1 + 
        saliency_xy2 + 
        saliency_size)*direction,
  data=filter(ScaledIncorrectSelections, task=="direction"),
  family=Gamma(link = 'log'))

summary(g_errors_direction)
```

```{r}
plot_estimates(g_errors_direction, 
  title = "Errors",
  subtitle = "Direction task",
  estimate_name = "count")
```

```{r}
rbind(
  interact_plot(g_errors_speed, "speed", "saliency_direction")$data
    %>% mutate(variable="saliency_direction"),
  interact_plot(g_errors_speed, "speed", "saliency_color")$data
    %>% mutate(variable="saliency_color"),
  interact_plot(g_errors_speed, "speed", "saliency_xy1")$data
    %>% mutate(variable="saliency_xy1")
) %>%
  ggplot(aes(x=speed, y=count, color=modx_group, linetype=modx_group)) +
    geom_line(size=.85) +
    scale_color_brewer(
      palette="Blues", 
      breaks=c("+ 1 SD", "Mean", "- 1 SD"),
      direction=-1) +
    facet_wrap(~ variable) +
    theme_minimal() +
    theme(legend.title = element_blank(),
      legend.position = "bottom") +
    ggtitle(
      "Errors - Significant interaction terms", 
      "Speed task")
```

```{r}
rbind(
  interact_plot(g_errors_direction, "direction", "saliency_xy1")$data
  %>% mutate(variable="saliency_xy1"),
  interact_plot(g_errors_direction, "direction", "saliency_color")$data
  %>% mutate(variable="saliency_color"),
  interact_plot(g_errors_direction, "direction", "saliency_xy2")$data
  %>% mutate(variable="saliency_xy2")
) %>%
  ggplot(aes(x=direction, y=count, color=modx_group, linetype=modx_group)) +
  geom_line(size=.85) +
  scale_color_brewer(
    palette="Blues", 
    breaks=c("+ 1 SD", "Mean", "- 1 SD"),
    direction=-1) +
  facet_wrap(~ variable) +
  theme_minimal() +
  theme(legend.title = element_blank(),
    legend.position = "bottom") +
  ggtitle("Errors - Significant interaction terms", 
    "Direction task")
```


