Markov playlist
LIKELIHOOD OF PLAYLIST
The average Log Likelihood of a playlist \(S\), given a playlister \(A\), is written as
\[ \mathcal{L}(S|A) = \frac{1}{S} \sum log\ P(f_i|f_{i+1}) \]
Where \(f_i\) represents the feature \(f\) of song \(i\). We take the average because \(S\) might have different lengths. This is implemented below:
# arr: array com transições; mo: empirically derived markov object, returns log likelihood of arr
ll = function(mo, arr){ t <- log(transitionProbability(mo, arr[1], arr[2]))
return( if( length(arr) == 2 ) t else t + ll(mo, arr[2:length(arr)]) )}
# get the mean likelihood of playlist
mll = function(mo, arr){ return (ll(mo, arr)/length(arr)) }
Given a sequence of dicrete transitions \(p_1\), we extract a matrix \(Mt\):
p1 = sample(c('loud', 'quiet', 'medium'), replace = T, 100)
mt = data.frame(markovchainFit(data = p1)$estimate@transitionMatrix); mt
Now we can calculate $\mathcal{L}(S|A)$:
playlist_model <- new("markovchain", states = c("loud", "medium", "quiet"),
transitionMatrix = matrix(data = as.vector(t(mt)),
byrow = TRUE,
nrow = nrow(mt)),
name = "Playlist model")
ll(playlist_model, p1)
# for verification - tolerance of 1.5e-8.:
all.equal(markovchainFit(data = p1)$logLikelihood, ll(playlist_model, p1))
-104.259082491669
TRUE
Empirical validation
Consider that musicians and music producers use a implicit playlister $A$ to sort out tracks within their albums. The transition probabilities of \(A\) can be empirically derived with a transition matrix from a sample of playlists. Let \(A_r\) be a random playlister defined by
\[ P[X_{t+1} = x | X_t = x_t] \begin{cases}
\frac{1}{n−1}\ x \neq x_t \
\
0\ x = x_t
\end{cases} \]
Where $n$ is the length of the playlist, and $t$ is the position of the track within the playlist. Should there be an implicit playlister used by music producers, we should find that \[ \mathcal{L}(P_i|A)\ >\ \mathcal{L}(P_j|A_r)\]
Let us now bring a sample of playlists (music albums), and derive the transition matrices, as well as the average lilekihood of playlists.
dt = fread("grande_com_low.csv")
nomes = unique(dt$name)
dist<-adist(nomes, partial = TRUE, ignore.case = TRUE)
min.name<-apply(dist.name, 1, min)
# dt = dt[dt$track_number < 9, ]
# dt = split(dt, dt$album_id)
# for(i in 1:length(dt)){
# dt[[i]]$danceability = as.factor(rank(dt[[i]]$danceability, na.last = "keep", ties.method = "random"))
# dt[[i]]$valence = as.factor(rank(dt[[i]]$valence, na.last = "keep", ties.method = "random"))
# dt[[i]]$energy = as.factor(rank(dt[[i]]$energy, na.last = "keep", ties.method = "random"))
# dt[[i]]$loudness = as.factor(rank(dt[[i]]$loudness, na.last = "keep", ties.method = "random"))
# dt[[i]]$tempo = as.factor(rank(dt[[i]]$tempo, na.last = "keep", ties.method = "random"))
# }
# # dt = dplyr::bind_rows(dt)
# # mapply(paste, table(dt$energy)/nrow(dt))
# # split into traning and test data
# dt_train = dt[1:length(dt)*0.7]
# dt_test = dt[(length(dt)*0.7):length(dt)]
Error in fread("grande_sem_low.csv"): File 'grande_sem_low.csv' does not exist or is non-readable. getwd()=='C:/Users/Pedro Neto/Documents/GitHub/music_order/markov_chains'
Traceback:
1. fread("grande_sem_low.csv")
2. stop("File '", file, "' does not exist or is non-readable. getwd()=='",
. getwd(), "'")
valence = c(); energy = c() ;loudness = c(); tempo = c()
rv = c(); re = c() ; rl = c(); rt = c()
valence_t = c(); energy_t = c() ;loudness_t = c(); tempo_t = c()
for(i in 1:length(dt_train)){
valence[[i]] <- c('i', dt_train[[i]]$valence)
energy[[i]] <- c('i', dt_train[[i]]$energy)
loudness[[i]] <- c('i', dt_train[[i]]$loudness)
tempo[[i]] <- c('i', dt_train[[i]]$tempo)
}
for(i in 1:length(dt_train)){
rv[[i]] <- c('i', sample(dt_train[[i]]$valence, replace = FALSE, size = length(dt_train[[i]]$valence)))
re[[i]] <- c('i', sample(dt_train[[i]]$energy, replace = FALSE, size = length(dt_train[[i]]$energy)))
rl[[i]] <- c('i', sample(dt_train[[i]]$loudness, replace = FALSE, size = length(dt_train[[i]]$loudness)))
rt[[i]] <- c('i', sample(dt_train[[i]]$tempo, replace = FALSE, size = length(dt_train[[i]]$tempo)))
}
for(i in 1:length(dt_test)){
valence_t[[i]] <- c('i', dt_test[[i]]$valence)
energy_t[[i]] <- c('i', dt_test[[i]]$energy)
loudness_t[[i]] <- c('i', dt_test[[i]]$loudness)
tempo_t[[i]] <- c('i', dt_test[[i]]$tempo)
}
# #Getting the transition
v = data.frame(markovchainFit(data = valence)$estimate@transitionMatrix)
e = data.frame(markovchainFit(data = energy)$estimate@transitionMatrix)
l = data.frame(markovchainFit(data = loudness)$estimate@transitionMatrix)
t = data.frame(markovchainFit(data = tempo)$estimate@transitionMatrix)
#creating the object
v <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(v)), byrow = TRUE, nrow = nrow(v)), name = "valence")
e <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(e)), byrow = TRUE, nrow = nrow(e)), name = "energy")
l <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(l)), byrow = TRUE, nrow = nrow(l)), name = "loudness")
t <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(t)), byrow = TRUE, nrow = nrow(t)), name = "tempo")
# creating a uniform distribution
# random <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = c(1/5, 1/5, 1/5, 1/5, 1/5, 0, 1/5, 1/5, 1/5, 1/5, 1/5, 0, 1/5, 1/5, 1/5, 1/5, 1/5, 0, 1/5, 1/5, 1/5, 1/5, 1/5, 0, 1/5, 1/5, 1/5, 1/5, 1/5, 0, 1/5, 1/5, 1/5, 1/5, 1/5, 0), byrow = TRUE, nrow = nrow(t)), name = "random")
rv = data.frame(markovchainFit(data = rv)$estimate@transitionMatrix)
re = data.frame(markovchainFit(data = re)$estimate@transitionMatrix)
rl = data.frame(markovchainFit(data = rl)$estimate@transitionMatrix)
rt = data.frame(markovchainFit(data = rt)$estimate@transitionMatrix)
rv <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(rv)), byrow = TRUE, nrow = nrow(v)), name = "rvalence")
re <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(re)), byrow = TRUE, nrow = nrow(e)), name = "renergy")
rl <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(rl)), byrow = TRUE, nrow = nrow(l)), name = "rloudness")
rt <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(rt)), byrow = TRUE, nrow = nrow(t)), name = "rtempo")
# library("lattice")
# levelplot(t(v@transitionMatrix[2:8, 1:8]), ylab = "x", xlab = "x+1")
v_ll <- c(); e_ll <- c(); l_ll <- c(); t_ll <- c()
r_v <- c(); r_e <- c(); r_l <- c(); r_t <- c()
for(i in 1:length(valence_t)){
v_ll[i] <- mll(v, valence_t[[i]])
e_ll[i] <- mll(e, energy_t[[i]])
l_ll[i] <- mll(l, loudness_t[[i]])
t_ll[i] <- mll(t, tempo_t[[i]])
r_v[i] <- mll(rv, valence_t[[i]])
r_e[i] <- mll(re, energy_t[[i]])
r_l[i] <- mll(rl, loudness_t[[i]])
r_t[i] <- mll(rt, tempo_t[[i]])
}
}
log_likelihoods <-
data.frame(empirical_valence = v_ll,
empirical_energy = e_ll,
empirical_loudness = l_ll,
empirical_tempo = t_ll,
random_valence = r_v,
random_energy = r_e,
random_loudness = r_l,
random_tempo = r_t
)
ll <-
melt(data = log_likelihoods,
measure.vars = c('empirical_valence',
'empirical_energy',
'empirical_loudness',
'empirical_tempo',
'random_valence',
'random_energy',
'random_loudness',
'random_tempo')
)
ll<-
tidyr::separate(ll,
variable,
c("condition", "feature"),
"_")
library(ggplot2)
teste <- plyr::ddply(ll, c('feature', 'condition'), plyr::summarise,
log_likelihood = mean(value),
se = sd(value)/sqrt(length(value))
)
teste
ggplot(teste, aes(x=feature, y=log_likelihood, color = condition)) +
geom_errorbar(size = 1.2, aes(ymin=log_likelihood-se, ymax=log_likelihood+se), position=position_dodge(0.5)) +
geom_point(size = 3, position=position_dodge(0.5), fill = 'black')
Conclusion
For some features, the empirically derived playlister \(A\) is superior to the randomized playlister, which I created by deriving a transition matrix from a randomly shuffle playlist (playlister \(A'´\)). Generally, I found that \[ \mathcal{L}(P_i|A)\ >\ \mathcal{L}(P_j|A_r) \]