This notebook provides the code for the vowel plot that was shown on Oxbury & McCarthy’s ICPhS poster.
As a first step, of course, load tidyverse.
library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.1 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Load in the data
data <- read_csv("icphs_data.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## participant = col_character(),
## sound_label = col_character(),
## word = col_character(),
## task = col_character(),
## age = col_character(),
## gender = col_character(),
## rol.var = col_character(),
## face.l = col_character(),
## price.l = col_character()
## )
## See spec(...) for full column specifications.
Make a grouping variable.
data <- data %>% mutate(
group = ifelse(
gender=="M" & age=="child", "Ch_M", ifelse(
gender=="M" & age=="adolescent", "Ad_M", ifelse(
gender=="F" & age=="child", "Ch_F", "Ad_F"
)
)
)
)
Check that it worked
table(data$group)
##
## Ad_F Ad_M Ch_F Ch_M
## 2466 1759 2048 2282
Convert sound_label to a factor
data$sound_label <- as.factor(data$sound_label)
Check levels
levels(data$sound_label)
## [1] "dress" "face" "fleece" "foot" "goat" "goose" "kit" "lot"
## [9] "mouth" "price" "start" "strut" "trap"
But we don’t want all of these vowels. We will create a vector of the names of the vowels we are interested in – the diphthongs FACE, PRICE and GOAT, and the reference vowels FLEECE, LOT and TRAP – so that then we can subset these out.
vowels <- c("face",
"fleece",
"goat",
"lot",
"price",
"trap")
Now create the subset:
sub_data <- data %>% filter(sound_label %in% vowels)
Check that this worked:
table(sub_data$sound_label)
##
## dress face fleece foot goat goose kit lot mouth price start
## 0 1937 304 0 2076 0 0 296 0 3106 0
## strut trap
## 0 293
Rename sound_label as Vowel – that is what it should be called!
sub_data <- sub_data %>%
rename(
Vowel = sound_label
)
And rename the levels of “Vowel” so that they are in capitals. First, check what the levels are at present:
levels(sub_data$Vowel)
## [1] "dress" "face" "fleece" "foot" "goat" "goose" "kit" "lot"
## [9] "mouth" "price" "start" "strut" "trap"
And then rename these:
levels(sub_data$Vowel) <- c("dress",
"FACE",
"FLEECE",
"foot",
"GOAT",
"goose",
"kit",
"LOT",
"mouth",
"PRICE",
"start",
"strut",
"TRAP")
First of all, define a colourblind friendly palette:
cbbPalette <- c("#D55E00", "#56B4E9", "#009E73", "#F0E442", "#000000", "#E69F00")
Create an initial plot of just ellipses:
a <- sub_data %>% ggplot(aes(x=normF2_20,
y=normF1_20,
group = Vowel,
color = Vowel,
fill = Vowel,
shape = gender,
linetype = age)) +
scale_x_reverse() +
scale_y_reverse() +
stat_ellipse(geom = "polygon",
alpha = 0.2,
linetype = 0,
level=0.67) +
theme_minimal() +
scale_fill_manual(values=cbbPalette) +
scale_colour_manual(values=cbbPalette)
a
Try adding ellipses for diphthong finishing points
just.diph <- sub_data %>% filter(Vowel == "FACE" | Vowel == "PRICE" | Vowel == "GOAT")
b <- a + stat_ellipse(data = just.diph, aes(x=normF2_35, y=normF1_35), geom = "polygon", alpha = 0.1, linetype = 0, level=0.67) +
stat_ellipse(data = just.diph, aes(x=normF2_50, y=normF1_50), geom = "polygon", alpha = 0.1, linetype = 0, level=0.67) +
stat_ellipse(data = just.diph, aes(x=normF2_65, y=normF1_65), geom = "polygon", alpha = 0.1, linetype = 0, level=0.67) +
stat_ellipse(data = just.diph, aes(x=normF2_80, y=normF1_80), geom = "polygon", alpha = 0.2, linetype = 0, level=0.67)
b
Quite nice. we could add points to it
means <- sub_data %>%
group_by(Vowel, age, gender) %>%
summarise(meanF120 = mean(normF1_20), sdF120 = sd(normF1_20),
meanF135 = mean(normF1_35), sdF135 = sd(normF1_35),
meanF150 = mean(normF1_50), sdF150 = sd(normF1_50),
meanF165 = mean(normF1_65), sdF165 = sd(normF1_65),
meanF180 = mean(normF1_80), sdF180 = sd(normF1_80),
meanF220 = mean(normF2_20), sdF220 = sd(normF2_20),
meanF235 = mean(normF2_35), sdF235 = sd(normF2_35),
meanF250 = mean(normF2_50), sdF250 = sd(normF2_50),
meanF265 = mean(normF2_65), sdF265 = sd(normF2_65),
meanF280 = mean(normF2_80), sdF280 = sd(normF2_80)
)
## `summarise()` regrouping output by 'Vowel', 'age' (override with `.groups` argument)
And means for the diphthongs
diph <- means %>% filter(Vowel == "FACE" | Vowel == "PRICE" | Vowel == "GOAT")
Add these to the plot
c <- b + geom_point(data = means,
aes(x=meanF220,
y=meanF120), size = 4) +
geom_segment(data = diph,
aes(
x=meanF220,
y=meanF120,
xend=meanF235,
yend=meanF135
), size = 1) +
geom_segment(data = diph,
aes(
x=meanF235,
y=meanF135,
xend=meanF250,
yend=meanF150
), size = 1) +
geom_segment(data = diph,
aes(
x=meanF250,
y=meanF150,
xend=meanF265,
yend=meanF165
), size = 1) +
geom_segment(data = diph,
aes(
x=meanF265,
y=meanF165,
xend=meanF280,
yend=meanF180
), arrow = arrow(ends="last"), size = 1)
c
And now let’s make the legends, titles etc neat
d <- c + theme(axis.title = element_text(size = rel(2)),
legend.title = element_blank(),
legend.text = element_text(size = rel(1)),
axis.text = element_text(size = rel(1))) +
scale_shape("Sex") +
labs(x = "Normalized F2",
y = "Normalized F1") +
scale_linetype("Age")
d