##################################################### # Generalized confusion matrix maker # # Aaron Braver www.aaronbraver.com # # v 1.0 May 26, 2013 # # # # Given files in a directory (or just a # # data.frame) with counts of participant responses# # and associated correct/actual values, # # generate a confusion matrix with either raw # # values, or with percents # ##################################################### # Take multiple (participant) files in a directory # and combine them into a single data frame directory<-"/Users/your/path/to/directory/with/files" filetype<-"*.txt" #NB: other file types may require other read functions below (e.g., read.csv) #Get the files in the directory txtFiles<-list.files(directory, full.names=TRUE, pattern="*.txt") #Make the data frame x<-data.frame() for (txtFile in txtFiles) { thisX<-read.table(txtFile, header=F, sep="\t",skip=6) #Change these options, as appropriate x<-rbind(x,thisX) } # Select any subsets you need (e.g., remove practice blocks) # Otherwise, just comment this out x<-subset(x, x[,3] != "Practice" & x[,3] != "EndofPractice") # Specify which columns contain guesses and # correct/actual values # # The regex in vecActual was to get the actual/correct level # from a long SuperLab trial name..., but you might want just, e.g., x[,5] vecActual<-sub("(^.*)(.)_S(.*)", "\\2", x[,5]) vecGuess<-x[,7]-1 Actual.df<-data.frame(table(vecActual)) Guess.df<-data.frame(table(vecGuess)) #Make the confusion matrices # ConfusionMatrix has raw values # PercentMatrix has percents by column ConfusionMatrix<-data.frame() PercentMatrix<-data.frame() for(actual in 1:nrow(Actual.df)) { #Loop through the levels in Actual for(guess in 1:nrow(Guess.df)) { #Loop through the levels in Guess ConfusionMatrix[guess, actual]<-nrow(subset(x, vecGuess==Guess.df[guess,1] & vecActual==Actual.df[actual,1])) numerator<-nrow(subset(x, vecGuess==Guess.df[guess,1] & vecActual==Actual.df[actual,1])) denom<-nrow(subset(x,vecActual==Actual.df[actual,1])) PercentMatrix[guess,actual]<-round(100*(numerator/denom),2) } } #Set some column and row names to make printing prettier colnames(ConfusionMatrix)<-Actual.df[,1] rownames(ConfusionMatrix)<-Guess.df[,1] colnames(PercentMatrix)<-Actual.df[,1] rownames(PercentMatrix)<-rownames(ConfusionMatrix)<-Guess.df[,1] #Print the Confusion Matrix in a pretty fashion printConfusionMatrix<-function(){ printedConfusionMatrix<-ConfusionMatrix rownames(printedConfusionMatrix)<-paste(" ", rownames(ConfusionMatrix)) rownames(printedConfusionMatrix)[1]<-paste("Guess:", rownames(ConfusionMatrix)[1]) cat("\n\t\tConfusion Matrix\n\n\t\t\t\tActual value\n") format(printedConfusionMatrix, width=4) } printConfusionMatrix() #Print the Confusion Matrix, with percents, in a pretty fashion printPercentMatrix<-function(){ printedPercentMatrix<-PercentMatrix rownames(printedPercentMatrix)<-paste(" ", rownames(PercentMatrix)) rownames(printedPercentMatrix)[1]<-paste("Guess:", rownames(PercentMatrix)[1]) cat("\n\t\tConfusion Matrix (percents)\n\n\t\t\t\tActual value\n") format(printedPercentMatrix, width=4) } printPercentMatrix()