## This file creates matrices of MFCCs from one-second wav files. 
## The wav files must be downloaded separately from 
## `download.tensorflow.org/data/speech_commands_v0.01.tar.gz`


## path to the unzipped directory of wav files
ddir<-"~/Dropbox/Data/SpeechCommand/" 

## command words 
words<-c("yes","no","up","down","left","right","on","off","stop","go")

## 
p1<-99 # time points
p2<-13 # cepstral coefficients

## load data and compute mfccs
MC<-list() 
for(k in seq_along(words)){   

  # setup data array for this word
  word<-words[k]
  files<-system(paste0("ls ",ddir,word,"/"),intern=TRUE)   
  Y<-array(dim=c(length(files),p1,p2)) 
  id<-character(length(files)) 

  # load wave files and compute mfccs 
  for(i in 1:length(files)){ 
    id[i]<-substr(files[i],1,8) 
    wi<-tuneR::readWave(paste0(ddir,word,"/",files[i]))
    if(length(wi)==16000){ 
      Y[i,,]<-tuneR::melfcc(wi,numcep=p2,wintime=.01,hoptime=.01) 
    } 
  } 

  # remove indices of those that were not of standard length
  imiss<-which(is.na(apply(Y,1,sum)))
  Y<-Y[-imiss,,]
  id<-id[-imiss]  
  dimnames(Y)[[1]]<-id 
  MC[[k]]<-Y
  cat(words[k],"\n") 
} 

names(MC)<-words 
saveRDS(MC,file="speech-MFCCs.rds")