# CREATE A DATA FRAME FROM DATA ON SEVERAL GROUPS, HELD IN DIFFERENT FILES. # # R. M. Neal, 2003. # The first argument to this function should be a vector of names for files. # Each of these files should contain numeric data, separated by spaces and # newlines. All data in one file is considered to belong to one group, with # data in different file belonging to different groups. # # The value of this function is a data frame with as many rows as the total # number of numeric values in all files, and with two columns. The first # column (called "group") contains a group id for each item, which by default # is an R factor value derived from the name of the file containing the data # in that group. The group.ids argument can be used to specify other ids. # (Strings will be turned into factor values.) The second column of the data # frame (called "value") contains the numeric values for the items in each group. group.data <- function (files, group.ids=files) { # Verify that the arguments make sense. Stop if not. if (length(files)==0) { stop("No files given to group.data") } if (length(group.ids)!=length(files)) { stop("Number of group ids doesn't match number of files in group.data") } # Start with the vector of ids and the vector of values being null. ids <- c() values <- c() # Read data from each file in turn, appending it to 'ids' and 'values'. for (i in 1:length(files)) { # Get the numeric data from the next file; warn if there isn't any. d <- scan(files[i]) if (length(d)==0) { cat("Warning: group.data found no data in",files[i],"\n") next } # Append the data read to the end of 'values'. values <- c (values, d) # Append as many copies of the id for this group as there are data items # to the end of 'ids'. ids <- c (ids, rep (group.ids[i], length(d))) } # Create a data frame with columns containing the ids and the values. data.frame (group=ids, value=values) }