library(MASS)
library(lattice)
setwd("f:/valdar/ThursdayAfternoonAnimals/")
source("leuven_library.R")

#-----------------------------------------------------------
# Looking at an F2 cross where the QTL is one of the markers
#-----------------------------------------------------------

# read marker data
marker.data <- read.delim("f2.markers")
head(marker.data)

# read genetic data using our function
ped  <- read.ped("f2.ped", marker.names = marker.data$name)
head(ped)

# test marker association of qtl to genotype

# fit a model to a marker that's not the qtl
fit <- lm(Phenotype ~ m1, data=ped)
fit
summary(fit)

# test whether m1 ontributes significantly
fit0 <- lm(Phenotype ~ 1, data=ped)
fit0

# compare the two models
anova(fit0, fit)

# a short cut to doing this is to just give anova() the bigger model
anova(fit)

# fit a linear model, store the results in fit, look at the results
fit <- lm(Phenotype ~ q1, data=ped)
an <- anova(fit)
an

# extract the pvalue from the anova table
pval <- an["q1", "Pr(>F)"]
pval

# work out the logP
logP <- -log10(pval)
logP

# work out the logP for all markers and store the results in a
# table "chrom.scan"
scan.markers(Phenotype ~ MARKER, data=ped, marker.data=marker.data)

chrom.scan <- scan.markers(Phenotype ~ MARKER, data=ped, marker.data=marker.data)
chrom.scan

plot(logP ~ cM, data=chrom.scan, type="o")

#------------------------------
# what should be our threshold?
#------------------------------

# pval = 0.05?
simple.threshold <- -log10(0.05)
abline(h=simple.threshold, col="red")

# bonferroni correct pval = 0.05 / number of markers
bonferroni.threshold <- -log10(0.05/21)
abline(h=bonferroni.threshold, col="blue")

# Best way is to calculate threshold by permutation:
# randomly shuffle the phenotype values of the animals
# and redo the chromosome scan. Do this many times
# to work out what logP value is exceeded 0.05 or less of the time.

# The sample() function shuffles a sequence, eg,
x <- c(1,2,3,4,5)
sample(x)
sample(x)
sample(x)

# make a copy of ped data
perm.ped <- ped
# shuffle the phenotypes so many animals have other animals phenotypes
perm.ped$Phenotype <- sample(ped$Phenotype)
# rescan
perm.scan <- scan.markers(Phenotype ~ MARKER, data=perm.ped, marker.data=marker.data)
perm.scan

# all the numbers look quite low
# the maximum is...
max(perm.scan$logP)

# do this many times, eg, 200
num.permutations <- 200
max.logPs <- numeric(num.permutations)
for (i in 1:num.permutations)
{
    # permutation step
    perm.ped$Phenotype <- sample(ped$Phenotype)
    perm.scan <- scan.markers(Phenotype ~ MARKER, data=perm.ped, marker.data=marker.data)
    max.logPs[i] <- max(perm.scan$logP)

    # show progress by printing output for each permutation
    cat("Permuting ", i, "/", num.permutations, ": max=", max.logPs[i], "\n")
    flush.console()
}

# calculate the logP that is exceeded only 0.05 of the time
# (should be about 2.2)
threshold <- quantile(max.logPs, 0.95)
threshold
abline(h=threshold, col="green")

# look at the distribution of max logPs obtained in the permutations
hist(max.logPs, col="gray")
abline(v=c(simple.threshold, bonferroni.threshold, threshold), col=c("red", "blue", "green"))
# they exceed the simple uncorrected threshold almost half the time!

#--------------------------------------------------------------------------
#  F12 designs (AILs)
#--------------------------------------------------------------------------

# F12 with 1 qtl

markers.f12 <- read.delim("f12.markers")
ped.f12 <- read.ped("f12.ped", markers.f12$name)
chrom.scan <- scan.markers(Phenotype ~ MARKER, data=ped.f12, marker.data=markers.f12)
head(chrom.scan)

threshold <- 2.22

plot.chrom.scan(chrom.scan, threshold=threshold)

#-----------------------------------------
# Stop and look at results: how many QTLs?
#-----------------------------------------


# find the maximum marker
chrom.scan[which.max(chrom.scan$logP),]

# draw it on the graph
abline(v=72.0)

#--------------------
# STOP HERE AND WAIT FOR EXPLANATION
#

# fit two markers separately
anova(lm(Phenotype ~ m32, data=ped.f12))
anova(lm(Phenotype ~ m37, data=ped.f12))

# fit m32 conditional on m37
# (ie, fit m37 first, then see if m32 adds anything more)
fit0 <- lm(Phenotype ~ m37, data=ped.f12)
fit1 <- lm(Phenotype ~ m37 + m32, data=ped.f12)
anova(fit0, fit1)
# or more simply
anova(fit1)


# redo the scan conditioning on the top marker
chrom.scan <- scan.markers(Phenotype ~ m37 + MARKER, h0=Phenotype ~ m37, data=ped.f12, marker.data=markers.f12)
plot.chrom.scan(chrom.scan, col="red", add=TRUE)

#---------------------------------------------------------------
# Stop and look at results: why has the second peak disappeared?
#---------------------------------------------------------------

# F12 with two QTLs
markers.f12a <- read.delim("f12_twoqtls.markers")
ped.f12a <- read.ped("f12_twoqtls.ped", markers.f12a$name)
chrom.scan <- scan.markers(Phenotype ~ MARKER, data=ped.f12a, marker.data=markers.f12a)
plot.chrom.scan(chrom.scan, threshold=threshold)

# find the maximum peak and condition on it
chrom.scan[which.max(chrom.scan$logP),]
abline(v=72.0)

chrom.scan <- scan.markers(Phenotype ~ m37 + MARKER, h0=Phenotype ~ m37, data=ped.f12a, marker.data=markers.f12a)
plot.chrom.scan(chrom.scan, col="red", add=TRUE)

#-------------------------
# Stop and look at results
#-------------------------

# Q: how can you test whether there are further QTLs?

chrom.scan[which.max(chrom.scan$logP),]
abline(v=56.0)

chrom.scan <- scan.markers(Phenotype ~ m37 + m29 + MARKER, h0=Phenotype ~ m37 + m29, data=ped.f12a, marker.data=markers.f12a)
plot.chrom.scan(chrom.scan, col="blue", add=TRUE)

# Q: how many QTLs are there?