## This file creates matrices of MFCCs from one-second wav files. ## The wav files must be downloaded separately from ## `download.tensorflow.org/data/speech_commands_v0.01.tar.gz` ## path to the unzipped directory of wav files ddir<-"~/Dropbox/Data/SpeechCommand/" ## command words words<-c("yes","no","up","down","left","right","on","off","stop","go") ## p1<-99 # time points p2<-13 # cepstral coefficients ## load data and compute mfccs MC<-list() for(k in seq_along(words)){ # setup data array for this word word<-words[k] files<-system(paste0("ls ",ddir,word,"/"),intern=TRUE) Y<-array(dim=c(length(files),p1,p2)) id<-character(length(files)) # load wave files and compute mfccs for(i in 1:length(files)){ id[i]<-substr(files[i],1,8) wi<-tuneR::readWave(paste0(ddir,word,"/",files[i])) if(length(wi)==16000){ Y[i,,]<-tuneR::melfcc(wi,numcep=p2,wintime=.01,hoptime=.01) } } # remove indices of those that were not of standard length imiss<-which(is.na(apply(Y,1,sum))) Y<-Y[-imiss,,] id<-id[-imiss] dimnames(Y)[[1]]<-id MC[[k]]<-Y cat(words[k],"\n") } names(MC)<-words saveRDS(MC,file="speech-MFCCs.rds")