################################################################################################## # GeneEvolve version 0.73 # # SIMULATION PROGRAM FOR EVOLVING AND CREATING EXTENDED PEDIGREE & TWIN DATA # # by Matthew C Keller, updated 8/7/2008 # ################################################################################################## #TO RUN: 1) Open the GeneEvolve.zip file in your "working directory" - # all scripts must remain in that directory. # 2) Make changes to the parameters below (#1 of this script) by placing your desired # inputs to the *right* of the arrows (<- x # i.e., user replaces "x"). # 3) Advanced users might consider changing parameters under section 1.5 # 4) Never remove any hash marks (#). Never change anything outside of sections 1 & 1.5 # 5) Run entire script, either in BATCH mode (e.g., R CMD BATCH GeneEvolve69.R on UNIX run # line) or by highlighting the entire script and running it in an R session. # 6) At end, you will have 5 data files (extended families of different twin types) plus a # PDF summary of each GeneEvolve run and several matrices remove(list=ls()) #1 BASIC USER INPUT PARAMETERS (31) ################################################################################################# #BASICS working.directory <- "H:\\GE" # working directory where all files saved make.graphics <- "yes" # do you want to create a full graphical report at end? #DEMOGRAPHIC DETAILS: number.generations <- 10 # number of generations to evolve before pedigree data created start.pop.size <- 10000 # breeding population size at start of simulation; should be > 100 pop.growth <- rep(1,number.generations) #vector of length number.generations that tells how big # each generation is relative to the one before it; for no growth: rep(1,number.generations); # for constant 5% growth: rep(1.05,number.generations) #MODEL DETAILS: thresholds <- FALSE # EITHER a vector of thresholds corresponding to location on # standard normal OR "FALSE", meaning continuous am.model <- "I" # <- "I" is "primary phenotypic assortment" # (corr b/w mates due to their choosing similar phenotypes) # <- "II" is "social homogamy" # (corr b/w mates due to correlated environmental factors) #DATASET PARAMETERS percent.mz <- .44 # % of twins who are MZ (in pedigree creation) range.dataset.age <- c(14,90) # The pedigree datasets includes twins plus twin offspring,sibs, # spouses, and parents. This is the range of ages in that dataset range.twin.age <- c(18,80) # Range of twin ages in final dataset at each timepoint. Is a # VECTOR twice as long as # of (repeated) measurements # Ex. 1, for data of adult twins of all ages measured once: <- c(18,100) # Ex. 2, for data of twins measured at exactly 14: <- c(14,14) # Ex. 3, for data of twins measured at 14-18 and again (i.e., REPEATED MEASURES) at # 18-25: <- c(14,18,18,25) #VARIANCES: These are variance terms of 1st gen (they may change thereafter). # Its nice, but unnecessary, for them to sum to 1 #Genetic Factors: A <- .4 # var acct for by A (additive genetic variance) AA <- .0 # var acct for by AxA (additive-by-additive) epistasis. D <- .1 # var acct for by D (dominance genetic variance) #Environmental Factors: U <- .3 # var acct for by U (unique variance). # Note: in non-twins, E (eg, the E from ACE models)=U+MZ+T MZ <- .0 # var acct for by special MZ twin env (this goes into E for non-mz individuals) TW <- .0 # var acct for by special twin env (this goes into E for non-twin individuals) F <- .0 # var acct for by F (familial environment). # This is passed from parents to offspring via vertical transmission S <- .2 # var acct for by S (sibling environment). #Covariates & Moderators AGE <- .0 # var acct for by age; This term can be negative; # sqrt(abs(AGE))= beta(age) = change in phenotype over 1 SD of age (15-45) SEX <- .0 # var acct for by sex (female=1,male=0); This term can be negative # sqrt(abs(SEX))= beta(sex) = change in phenotype over 1 SD of sex A.by.AGE <- .0 # var acct for by interaction bw A & Age; # Also, the var of slopes of A across age range specified in range.pop.age A.by.SEX <- .0 # var acct for by interaction bw A & Sex; # Also, the var of slopes of A across gender. A.by.S <- .0 # var acct for by interaction bw A & S (sib env). A.by.U <- .0 # var acct for by interaction bw A & U (unique env). #COVARIANCES: (Note: when there are covariances b/w components, total var != sum(variances)) latent.AM <- rep(.0,number.generations) #VECTOR of length = number.generations; # corr b/w spouses' latent mating phenotype each gen., as determined by am.model. # NOTE: the actual phenotypic correlation bw spouses is <= to this A.S.cor <- .0 # corr b/w A & S - caused by ACTIVE g-e covariance A.U.cor <- .0 # corr b/w A & U - caused by ACTIVE g-e covariance R.Alevel.Aslope.age <- .0 #corr bw intercept & slope. Should be b/w -1 & 1. R.Alevel.Aslope.sex <- .0 #corr bw intercept & slope. Should be b/w -1 & 1. R.Alevel.Aslope.S <- .0 #corr bw intercept & slope. Should be b/w -1 & 1. R.Alevel.Aslope.U <- .0 #corr bw intercept & slope. Should be b/w -1 & 1. # Note on R.Alevel.Aslope.X: these are the corr's b/w the intercepts and slopes of A; # e.g., do people with the highest A values also have the highest changes in A across X? # if this is 1 (-1), A.by.X becomes the scalar interaction coefficient # - i.e., the PURCEL model where h2 increases (decreases) as X increases # if this is 0, A.by.X becomes nonscalar # - i.e., h2 remains ~ constant but diff genes turn on at diff values of X # WARNING: for scalar interactions, including only interaction var with no main effect var of A # will not produce what is desired! ################################################################################################# #1.5 ADVANCED USER INPUT PARAMETERS (MULTIPLE RUNS, RANDOM PARAMETERS, RUNNING MX, ETC) (17) ############################################################################ number.runs <- 1 # multiple GE runs- useful for finding bias, var/covar of parameter estimates rand.parameters <- "no" # do you want GE to use random parameters? # Choices: "no","all","A","D","F","S","AM.mod". "all" randomly chooses A,D,F,S, & AM.mod save.gen.data <- "no" # do you want to write out the datasets for each generation? # Answer "no" unless you want a detailed ancestral record continuation <- "no" # is this a continuation of a job you are now restarting? # Assures the old Parameter.Comparison file is not overwritten save.rdata <- "no" # do you want to save the .RData file? save.objects <- "no" # do you want to save all the R objects created? # Generally, you should answer "no" unless you're debugging. Options: "yes" or "no". number.genes <- 7 # number of genes affecting phenotype; Must be > 2 but <= 50. 5+ is fine sibling.age.cor <- .5 # corr b/w sibs' ages within a breeding cohort. # Higher corr. imply less spacing in years b/w sibs. range.pop.age <- range.twin.age[1:2] # range of age in the population over which the beta terms # above explain the AGE and A.by.AGE variation; I recommend placing this # the same as the first two entries in range.twin.age above real.missing <- "no" #do you want missingness patterns to emulate the ozva60k? run.cascade.model <- "no" run.stealth.model <- "no" run.corr <- "no" #gets 88 relative covariances using Mx sib.cor <- "yes" #do you want GE to compute the correlations b/w sibs for each generation? gene.model <- "common" # this must be equal to "common" or "rare"; most people should choose rare! # If "common", total variance will not usually be sum of var components due # to cor of var components (e.g., A & D) number.alleles <- 2 # in the "common" model ONLY, how many alleles per locus? mx.location <- "/usr/local/bin/mxt" #full path of location of mx (unix server example) ############################################################################ ################################################################################################# #Start (NO NEED TO ALTER THE SCRIPT BELOW THIS LINE) ################################################################################################# #2 LOAD FUNCTIONS USED IN SCRIPT & CREATE LISTS OF PARAMETER VALUES ABOVE TO ENTER INTO FUNCTIONS ############################################################################ #2.0 LOAD FUNCTIONS #change working directory setwd(working.directory) #load all PE functions source("GE.Functions_Short.R") source("GE.Function_AM.R") source("GE.Function_Reproduce.R") source("GE.Function_MakeDataFile.R") source("GE.Function_MakePhenotype.R") source("GE.Function_Run.Gene.Evolve73.R") source("GE.Function_Graph.R") #make PAR and VAR lists; remove unnecessary variables vars <- c("A", "A.S.cor","A.U.cor","A.by.AGE","A.by.S","A.by.U","AA","AGE","D","F","MZ","S","TW", "U","SEX","A.by.SEX","R.Alevel.Aslope.S","R.Alevel.Aslope.U","R.Alevel.Aslope.age", "R.Alevel.Aslope.sex","am.model","latent.AM","make.graphics","mx.location", "number.generations","number.genes","percent.mz","pop.growth","range.dataset.age", "range.pop.age","range.twin.age","save.gen.data","save.objects","sib.cor", "sibling.age.cor","start.pop.size","working.directory","run.cascade.model", "run.stealth.model","gene.model","number.alleles","real.missing","save.rdata", "number.runs","continuation","run.corr") PAR1 <- list(working.directory=working.directory, save.gen.data=save.gen.data, save.objects=save.objects,sib.cor=sib.cor, mx.location=mx.location, make.graphics=make.graphics, number.generations=number.generations, start.pop.size=start.pop.size, pop.growth=pop.growth, number.genes=number.genes, am.model=am.model, percent.mz=percent.mz, range.dataset.age=range.dataset.age, sibling.age.cor=sibling.age.cor,range.twin.age=range.twin.age, range.pop.age=range.pop.age,run.cascade.model=run.cascade.model, run.stealth.model=run.stealth.model, gene.model=gene.model, number.alleles=number.alleles,real.missing=real.missing, save.rdata=save.rdata,number.runs=number.runs,continuation=continuation, run.corr=run.corr,rand.parameters=rand.parameters,thresholds=thresholds) VAR1 <- list(A=A, AA=AA, D=D,F=F, S=S, U=U, MZ=MZ, TW=TW, SEX=SEX, AGE=AGE, A.by.SEX=A.by.SEX, A.by.AGE=A.by.AGE, A.by.S=A.by.S, A.by.U=A.by.U,latent.AM=latent.AM, A.S.cor=A.S.cor, A.U.cor=A.U.cor, R.Alevel.Aslope.sex=R.Alevel.Aslope.sex, R.Alevel.Aslope.age=R.Alevel.Aslope.age,R.Alevel.Aslope.S=R.Alevel.Aslope.S, R.Alevel.Aslope.U=R.Alevel.Aslope.U) remove(list=vars) #record time PAR1$t1 <- Sys.time() ################################################################################################# ################################################################################################# #3 RUN GENEVOLVE, RUN MX (OPTIONAL) & CREATE GRAPHICS for (iteration in 1:PAR1$number.runs){ #reset parameters each run if rand.parameters above is not "no" BOTH <- alter.parameters(P=PAR1,V=VAR1,max=1) VAR1 <- BOTH$VAR PAR1 <- BOTH$PAR remove(BOTH) #Run GeneEvolve ALL <- run.gene.evolve(PAR=PAR1,VAR=VAR1,iter=iteration) gc() #Run Cascade model {if (PAR1$run.cascade.model=='yes'|PAR1$run.stealth.model=='yes'|PAR1$run.corr=='yes'){ source("GE.Function_RunCascade.R") MX <- run.cascade.mx(ALL.PARS=ALL$PAR, iter=iteration, PE.Var2=ALL$PE.Var2, PE.Var3=ALL$PE.Var3,rel.corr=ALL$rel.correlations,filenames=ALL$TEMP$name.date, data.info=ALL$TEMP$info.widedata.updated, cont=PAR1$continuation, run.twice=TRUE)} else MX <- NULL } gc() #Generate Graphics - runs function "Generate.Graphics" which is defined in script "GE.Graph.1.R" if (PAR1$make.graphics=="yes") { ge.graphics(date.names=ALL$TEMP$name.date, PARAMS=ALL$PAR, VARIANCES=VAR1, MX.info=MX, PE.Var3=ALL$PE.Var3, PE.Var2=ALL$PE.Var2, rel.correlations=ALL$rel.correlations, track.changes=ALL$track.changes, data.info=ALL$TEMP$info.widedata.updated)} } #Save *.RData file if save.rdata=="yes" - names it by date if (PAR1$save.rdata=="yes"){ save.image(file=paste(ALL$TEMP$name.date,".RData",sep="")) } #################################################################################################