# SOLUTION TO STA 247 ASSIGNMENT 2. # SIMULATE A HASH TABLE SCHEME. The arguments are as follows: # # n.tables Number of hash tables to simulate # n.add Number of keys to add to each table # n.lookups Number of keys to look up in each table after adding keys # range The range of integers from which to pick keys # size The number of buckets in a hash table # step The maximum number of buckets to step by after a collision # # The value of this function is a matrix of dimensions n.tables by n.lookups # holding the number of probes needed for each of the lookup operations. # # See the assignment sheet for further details. hash.simulator <- function (n.tables, n.add, n.lookups, range, size, step) { # Create a matrix to hold the results. results <- matrix(NA,n.tables,n.lookups) # Main loop. Each iteration creates a table, and then lookups up keys in it. for (i in 1:n.tables) { # Create a new hash table. hash.table.setup(size,step) # Add n.add keys to the table, sampling them without replacement from the # given range. keys.added <- sample(range,n.add,replace=FALSE) for (k in keys.added) { hash.lookup(k,TRUE) } # Look up n.lookups keys in the table, sampling them with replacement from # the given range. Record how many probes were needed for each lookup. keys.looked.for <- sample(range,n.lookups,replace=TRUE) for (j in 1:n.lookups) { results[i,j] <- hash.lookup(keys.looked.for[j],FALSE) $ probes } } # Return the matrix holding the number of probes for each lookup. results } # DO THE EXPERIMENTS COMPARING THE HASHING SCHEMES. Prints various estimates, # and produces various plots, in the ass2.ps file. The argument is the # random number seed to use for the simulations. hash.experiments <- function (seed) { # Simulate hash tables using step=1 and step=20. set.seed(seed) res.1 <- hash.simulator (50, 160, 200, 1:1000, 197, 1) set.seed(seed) res.20 <- hash.simulator (50, 160, 200, 1:1000, 197, 20) # Initialize for plotting. postscript("ass2.ps",paper="letter",horizontal=TRUE) # Uses "landscape" mode par(mfrow=c(1,2)) # Two plots on one page # Produce histograms of the numbers of probes required hist (res.1) hist (res.20) # Print estimates of the expected numbers of probes for the two schemes. cat("Estimates for the expected numbers of probes needed:", round(mean(res.1),2), round(mean(res.20),2), "\n") # Produce histograms of the average numbers of probes required for tables. res.1.means <- apply(res.1,1,mean) res.20.means <- apply(res.20,1,mean) hist (res.1.means) hist (res.20.means) # Use the means for tables (which are independent) to estimate the accuracy # of the above estimates. cat("Standard errors for the estimates above:", round(sd(res.1.means)/sqrt(50),2), round(sd(res.20.means)/sqrt(50),2), "\n") # Estimate the probability that more than 25 probes will be required for # each of the two schemes. cat("Estimated probabilities that over 25 probes will be needed:", round(mean(res.1>25),4), round(mean(res.20>25),4), "\n") # Estimate the accuracy of the above estimates, again using means for tables. res.1.pr <- apply(res.1>25,1,mean) res.20.pr <- apply(res.20>25,1,mean) cat("Standard errors for above estimates:", round(sd(res.1.pr)/sqrt(50),4), round(sd(res.20.pr)/sqrt(50),4), "\n") dev.off() # Return the simulation results, in case they're useful later. list (res.1=res.1, res.20=res.20) }