Sep 5, 2020

Markov playlist

LIKELIHOOD OF PLAYLIST

The average Log Likelihood of a playlist $S$, given a playlister $A$, is written as

\[ \mathcal{L}(S|A) = \frac{1}{S} \sum log\ P(f_i|f_{i+1}) \]

Where $f_i$ represents the feature $f$ of song $i$. We take the average because $S$ might have different lengths. This is implemented below:

# arr: array com transições; mo: empirically derived markov object, returns log likelihood of arr
ll = function(mo, arr){ t <- log(transitionProbability(mo, arr[1], arr[2]))
                       return( if( length(arr) == 2 ) t else t + ll(mo, arr[2:length(arr)]) )}

# get the mean likelihood of playlist
mll = function(mo, arr){ return (ll(mo, arr)/length(arr)) }

Given a sequence of dicrete transitions $p_1$, we extract a matrix $Mt$:

p1 = sample(c('loud', 'quiet', 'medium'), replace = T, 100)
mt = data.frame(markovchainFit(data = p1)$estimate@transitionMatrix); mt

Now we can calculate $\mathcal{L}(S|A)$:

playlist_model <- new("markovchain", states = c("loud", "medium", "quiet"),
                      transitionMatrix = matrix(data = as.vector(t(mt)),
                                                byrow = TRUE, 
                                                nrow = nrow(mt)),
                      name = "Playlist model")

ll(playlist_model, p1)

# for verification - tolerance of 1.5e-8.: 
all.equal(markovchainFit(data = p1)$logLikelihood, ll(playlist_model, p1))

-104.259082491669

TRUE

Empirical validation

Consider that musicians and music producers use a implicit playlister $A$ to sort out tracks within their albums. The transition probabilities of $A$ can be empirically derived with a transition matrix from a sample of playlists. Let $A_r$ be a random playlister defined by

\[ P[X_{t+1} = x | X_t = x_t] \begin{cases} \frac{1}{n−1}\ x \neq x_t \
\
0\ x = x_t \end{cases} \]

Where $n$ is the length of the playlist, and $t$ is the position of the track within the playlist. Should there be an implicit playlister used by music producers, we should find that \[ \mathcal{L}(P_i|A)\ >\ \mathcal{L}(P_j|A_r)\]

Let us now bring a sample of playlists (music albums), and derive the transition matrices, as well as the average lilekihood of playlists.

dt = fread("grande_com_low.csv")

nomes = unique(dt$name)

dist<-adist(nomes, partial = TRUE, ignore.case = TRUE)

min.name<-apply(dist.name, 1, min)

# dt = dt[dt$track_number < 9, ] 
# dt = split(dt, dt$album_id)

# for(i in 1:length(dt)){
#     dt[[i]]$danceability = as.factor(rank(dt[[i]]$danceability, na.last = "keep", ties.method = "random"))
#     dt[[i]]$valence      = as.factor(rank(dt[[i]]$valence, na.last = "keep", ties.method = "random"))
#     dt[[i]]$energy       = as.factor(rank(dt[[i]]$energy, na.last = "keep", ties.method = "random"))
#     dt[[i]]$loudness     = as.factor(rank(dt[[i]]$loudness, na.last = "keep", ties.method = "random"))
#     dt[[i]]$tempo        = as.factor(rank(dt[[i]]$tempo, na.last = "keep", ties.method = "random"))
# }

# # dt = dplyr::bind_rows(dt)
# # mapply(paste, table(dt$energy)/nrow(dt))

# # split into traning and test data
# dt_train  = dt[1:length(dt)*0.7]
# dt_test   = dt[(length(dt)*0.7):length(dt)]

Error in fread("grande_sem_low.csv"): File 'grande_sem_low.csv' does not exist or is non-readable. getwd()=='C:/Users/Pedro Neto/Documents/GitHub/music_order/markov_chains'
Traceback:


1. fread("grande_sem_low.csv")

2. stop("File '", file, "' does not exist or is non-readable. getwd()=='", 
 .     getwd(), "'")

valence = c(); energy = c() ;loudness = c(); tempo = c()
rv = c(); re = c() ; rl = c(); rt = c()
valence_t = c(); energy_t = c() ;loudness_t = c(); tempo_t = c()

for(i in 1:length(dt_train)){
    valence[[i]]  <- c('i', dt_train[[i]]$valence)
    energy[[i]]   <- c('i', dt_train[[i]]$energy)
    loudness[[i]] <- c('i', dt_train[[i]]$loudness)
    tempo[[i]]    <- c('i', dt_train[[i]]$tempo)
}

for(i in 1:length(dt_train)){
    rv[[i]]  <- c('i', sample(dt_train[[i]]$valence, replace = FALSE, size = length(dt_train[[i]]$valence)))
    re[[i]]   <- c('i', sample(dt_train[[i]]$energy, replace = FALSE, size = length(dt_train[[i]]$energy)))
    rl[[i]] <- c('i', sample(dt_train[[i]]$loudness, replace = FALSE, size = length(dt_train[[i]]$loudness)))
    rt[[i]]    <- c('i', sample(dt_train[[i]]$tempo, replace = FALSE, size = length(dt_train[[i]]$tempo)))
}

for(i in 1:length(dt_test)){
    valence_t[[i]]  <- c('i', dt_test[[i]]$valence)
    energy_t[[i]]   <- c('i', dt_test[[i]]$energy)
    loudness_t[[i]] <- c('i', dt_test[[i]]$loudness)
    tempo_t[[i]]    <- c('i', dt_test[[i]]$tempo)
}

# #Getting the transition
v = data.frame(markovchainFit(data = valence)$estimate@transitionMatrix)
e = data.frame(markovchainFit(data = energy)$estimate@transitionMatrix)
l = data.frame(markovchainFit(data = loudness)$estimate@transitionMatrix)
t = data.frame(markovchainFit(data = tempo)$estimate@transitionMatrix)

#creating the object
v <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(v)), byrow = TRUE, nrow = nrow(v)), name = "valence")
e <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(e)), byrow = TRUE, nrow = nrow(e)), name = "energy")
l <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(l)), byrow = TRUE, nrow = nrow(l)), name = "loudness")
t <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(t)), byrow = TRUE, nrow = nrow(t)), name = "tempo")

# creating a uniform distribution
# random <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = c(1/5, 1/5, 1/5, 1/5, 1/5, 0, 1/5, 1/5, 1/5, 1/5, 1/5, 0, 1/5, 1/5, 1/5, 1/5, 1/5, 0, 1/5, 1/5, 1/5, 1/5, 1/5, 0, 1/5, 1/5, 1/5, 1/5, 1/5, 0, 1/5, 1/5, 1/5, 1/5, 1/5, 0), byrow = TRUE, nrow = nrow(t)), name = "random")

rv = data.frame(markovchainFit(data = rv)$estimate@transitionMatrix)
re = data.frame(markovchainFit(data = re)$estimate@transitionMatrix)
rl = data.frame(markovchainFit(data = rl)$estimate@transitionMatrix)
rt = data.frame(markovchainFit(data = rt)$estimate@transitionMatrix)

rv <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(rv)), byrow = TRUE, nrow = nrow(v)), name = "rvalence")
re <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(re)), byrow = TRUE, nrow = nrow(e)), name = "renergy")
rl <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(rl)), byrow = TRUE, nrow = nrow(l)), name = "rloudness")
rt <- new("markovchain", states = c("1", "2", "3", "4", "5", "6", "7", "8", "i"), transitionMatrix = matrix(data = as.vector(t(rt)), byrow = TRUE, nrow = nrow(t)), name = "rtempo")

# library("lattice")
# levelplot(t(v@transitionMatrix[2:8, 1:8]), ylab = "x", xlab = "x+1")

v_ll <- c(); e_ll <- c(); l_ll <- c(); t_ll <- c()
r_v <- c(); r_e <- c(); r_l <- c(); r_t <- c()

for(i in 1:length(valence_t)){
    v_ll[i] <- mll(v, valence_t[[i]])
    e_ll[i] <- mll(e, energy_t[[i]])
    l_ll[i] <- mll(l, loudness_t[[i]])
    t_ll[i] <- mll(t, tempo_t[[i]])
        
    r_v[i] <- mll(rv, valence_t[[i]])
    r_e[i] <- mll(re, energy_t[[i]])
    r_l[i] <- mll(rl, loudness_t[[i]])
    r_t[i] <- mll(rt, tempo_t[[i]])
    }
}

log_likelihoods <- 
    data.frame(empirical_valence = v_ll,
               empirical_energy = e_ll,
               empirical_loudness = l_ll,
               empirical_tempo = t_ll,
               
               random_valence = r_v,
               random_energy = r_e,
               random_loudness = r_l,
               random_tempo = r_t
              )

ll <-
    melt(data = log_likelihoods, 
        measure.vars = c('empirical_valence',
                         'empirical_energy',
                         'empirical_loudness',
                         'empirical_tempo',
                         'random_valence',
                         'random_energy',
                         'random_loudness',
                         'random_tempo')
    )

ll<- 
    tidyr::separate(ll, 
         variable, 
         c("condition", "feature"), 
         "_")

library(ggplot2)
teste <- plyr::ddply(ll, c('feature', 'condition'), plyr::summarise,
                           log_likelihood = mean(value),
                           se = sd(value)/sqrt(length(value))
)
teste
ggplot(teste, aes(x=feature, y=log_likelihood, color = condition)) +
    geom_errorbar(size = 1.2, aes(ymin=log_likelihood-se, ymax=log_likelihood+se), position=position_dodge(0.5)) +
    geom_point(size = 3, position=position_dodge(0.5), fill = 'black')

png

Conclusion

For some features, the empirically derived playlister $A$ is superior to the randomized playlister, which I created by deriving a transition matrix from a randomly shuffle playlist (playlister $A'´$). Generally, I found that \[ \mathcal{L}(P_i|A)\ >\ \mathcal{L}(P_j|A_r) \]