# This script creates a *.csv table with relevant metadata in tabular form. It can be run from within R as follows: # > setwd('path/to/folder/where/the/imdis/are/located') # > source('path/to/where/you/put/imdi.table.R') # or from within the shell (on a machine in which R is installed): # 1. move imdi.table.R in the the same directory where you store the *imdi files, then cd there: # 2. $ cd path/to/folder/where/the/imdis/are/located # 3. $ R CMD BATCH --slave imdi.table.R # # If it doesn't work, a likely problem is that the XML library is not installed. If so, do this, in the shell: # 1. $ R # 2. > install.packages("XML") # You will be asked for choosing a mirror. Select one in the neighborhood. library(XML) speaker.ages <- lapply(dir(pattern='\\d\\.imdi'), function(session) { file <- xmlTreeParse(paste(session),asTree=F, useInternalNodes=T) age <- xpathSApply(file, "//o:Session//o:MDGroup//o:Actors//o:Age", xmlValue, namespaces=c(o="http://www.mpi.nl/IMDI/Schema/IMDI")) code <- xpathSApply(file, "//o:Session//o:MDGroup//o:Actors//o:Code", xmlValue, namespaces=c(o="http://www.mpi.nl/IMDI/Schema/IMDI")) full.name <- xpathSApply(file, "//o:Session//o:MDGroup//o:Actors//o:FullName", xmlValue, namespaces=c(o="http://www.mpi.nl/IMDI/Schema/IMDI")) birth.date <- xpathSApply(file, "//o:Session//o:MDGroup//o:Actors//o:BirthDate", xmlValue, namespaces=c(o="http://www.mpi.nl/IMDI/Schema/IMDI")) recording.date <- xpathSApply(file, "//o:Session//o:Date", xmlValue, namespaces=c(o="http://www.mpi.nl/IMDI/Schema/IMDI")) rec.date <- recording.date[1] # the one in the first field. role <- xpathSApply(file, "//o:Session//o:MDGroup//o:Actors//o:Role", xmlValue, namespaces=c(o="http://www.mpi.nl/IMDI/Schema/IMDI")) social.role <- xpathSApply(file, "//o:Session//o:MDGroup//o:Actors//o:FamilySocialRole", xmlValue, namespaces=c(o="http://www.mpi.nl/IMDI/Schema/IMDI")) duration <- xpathSApply(file, "//o:Session//o:Resources//o:Source//o:Keys//o:Key[@Name='TimeDuration']", xmlValue, namespaces=c(o="http://www.mpi.nl/IMDI/Schema/IMDI")) session <- gsub('\\.imdi','', session) data.list <- list(session=rep(session, length.out=length(code)), code=code, full.name=full.name, role=role, social.role=social.role, age=age, birth.date=birth.date, rec.date=rep(rec.date,length.out=length(code)), rec.duration=rep(duration, length.out=length(code))) return(data.list) }) # coerce into one big dataframe: imdi.metadata <- do.call(rbind,lapply(speaker.ages, function(i) data.frame(session=i$session, code=i$code, full.name=i$full.name, role=i$role, social.role=i$social.role, age=i$age, birth.date=i$birth.date, rec.date=i$rec.date, rec.duration=i$rec.duration))) write.csv(imdi.metadata, file='imdi.table.csv') ## notes: # once we have age groups in the keys, here is how we'll read them: # unlist(xpathApply(test, "//o:Session//o:MDGroup//o:Actors//o:Key[@Name='Age Group']", xmlValue, namespaces=c(o="http://www.mpi.nl/IMDI/Schema/IMDI")))