library(MASS) library(lattice) source("boulder_library.R") #----------------------------------------------------------- # Looking at an F2 cross where the QTL is one of the markers #----------------------------------------------------------- # read marker data marker.data <- read.delim("f2.markers") head(marker.data) # read genetic data using our function ped <- read.ped("f2.ped", marker.names = marker.data$name) head(ped) # test marker association of qtl to genotype # fit a model to a marker that's not the qtl fit <- lm(Phenotype ~ m1, data=ped) fit str(fit) # test whether m1 ontributes significantly fit0 <- lm(Phenotype ~ 1, data=ped) fit0 # compare the two models fit.anova <- anova(fit0, fit) fit.anova # a short cut to doing this is to just give anova() the bigger model fit.anova <- anova(fit) fit.anova # fit a linear model, store the results in fit, look at the results fit <- lm(Phenotype ~ q1, data=ped) fit.anova <- anova(fit) fit.anova # check for dominance # first look at how the additive model is coded ped$q1 # easier to just print out, say, the first 10 values ped$q1[1:10] # to get each of these numbers to a number representing dominance # we need to generate new numbers equal to 1-x^2 # eg, if x is -1 then x <- -1 1 - x^2 # we can do this to many observations at a time 1 - (ped$q1[1:10])^2 # and even write a function to do it for us # Note that g is just a placeholder for whatever you give the function dominance.part <- function(g) { return (1-g^2) } # now we can use this function in R window dominance.part(-1) dominance.part(0) dominance.part(1) dominance.part(ped$q1[1:10]) # fit each model fit0 <- lm(Phenotype ~ 1, data=ped) fit1 <- lm(Phenotype ~ q1, data=ped) fit2 <- lm(Phenotype ~ q1 + dominance.part(q1), data=ped) # do comparison of nested models (aka nested anova) anova(fit0, fit1, fit2) # alternatively do it with single argument version of anova() anova(fit2) # compare no effect against full model (additive and dominance) anova(fit0, fit2) # extract the pvalue from the anova table pval <- fit.anova["q1", "Pr(>F)"] pval # work out the logP logP <- -log10(pval) logP # work out the logP for all markers and store the results in a # table "chrom.scan" scan.markers(Phenotype ~ MARKER, data=ped, marker.data=marker.data) chrom.scan <- scan.markers(Phenotype ~ MARKER, data=ped, marker.data=marker.data) chrom.scan plot(logP ~ cM, data=chrom.scan, type="b") #------------------------------ # what should be our threshold? #------------------------------ # pval = 0.05? simple.threshold <- -log10(0.05) abline(h=simple.threshold, col="red") # bonferroni correct pval = 0.05 / number of markers bonferroni.threshold <- -log10(0.05/21) abline(h=bonferroni.threshold, col="blue") # Best way is to calculate threshold by permutation: # randomly shuffle the phenotype values of the animals # and redo the chromosome scan. Do this many times # to work out what logP value is exceeded 0.05 or less of the time. # The sample() function shuffles a sequence, eg, x <- c(1,2,3,4,5) sample(x) sample(x) sample(x) # make a copy of ped data perm.ped <- ped # shuffle the phenotypes so many animals have other animals phenotypes perm.ped$Phenotype <- sample(ped$Phenotype) # rescan perm.scan <- scan.markers(Phenotype ~ MARKER, data=perm.ped, marker.data=marker.data) perm.scan # all the numbers look quite low # the maximum is... max(perm.scan$logP) # do this many times, eg, 200 num.permutations <- 200 max.logPs <- numeric(num.permutations) for (i in 1:num.permutations) { # permutation step perm.ped$Phenotype <- sample(ped$Phenotype) perm.scan <- scan.markers(Phenotype ~ MARKER, data=perm.ped, marker.data=marker.data) max.logPs[i] <- max(perm.scan$logP) # show progress by printing output for each permutation cat("Permuting ", i, "/", num.permutations, ": max=", max.logPs[i], "\n") flush.console() } # calculate the logP that is exceeded only 0.05 of the time # (should be about 2.2) threshold <- quantile(max.logPs, 1 - 0.05) threshold abline(h=threshold, col="green") # look at the distribution of max logPs obtained in the permutations hist(max.logPs) abline(v=c(simple.threshold, bonferroni.threshold, threshold), col=c("red", "blue", "green")) # they exceed the simple uncorrected threshold almost half the time!