###################################################################################### # GeneEvolve version 0.70 # # AGENT BASED MODEL FOR EVOLVING AND CREATING EXTENDED PEDIGREE & TWIN DATA # # by Matthew C Keller, updated 11/19/2007 # ###################################################################################### #TO RUN: 1) open the GeneEvolve.zip file in your working directory - all files must remain in that directory. # 2) make changes to the parameters below by placing your desired inputs to the *right* of the arrows (<-). # 3) advanced users might consider changing parameters under section 1.5 # 4) run entire script, either in BATCH mode (e.g., R CMD BATCH GeneEvolve69.R) or by highlighting the whole thing and running it in an R session. remove(list=ls()) #1 USER INPUT PARAMETERS - ##################################################################################################################################### working.directory <- "C:/temp/GeneEvl" #working directory where all files saved save.gen.data <- "no" #do you want to write out the datasets for each generation? Answer "no" unless you want a detailed ancestral record make.graphics <- "yes" #do you want to create a full graphical report at the end of each run? sib.cor <- "yes" #do you want R to compute the correlations b/w sibs for each generation? #DEMOGRAPHIC DETAILS: number.generations <- 20 #number of generations to evolve before population makes twin/spouse parents, twins, sibs, spouses, & twin children start.pop.size <- 500 #breeding population size at start of simulation; should be > 100 to insure stability; to mimic ozva60k = 70500 pop.growth <- rep(1,number.generations) #vector of length number.generations that tells how big each generation is relative to the one before it; #for no growth: rep(1,number.generations); for constant 5% growth: rep(1.05,number.generations) #MODEL DETAILS: number.genes <- 5 #number of genes affecting phenotype; Must be > 2 but <= 50 am.model <- "I" # "I" = "primary phenotypic assortment" - correlation b/w mates due to their choosing similar phenotypes # "II" = "social homogamy" - mating similarity comes through correlated environmental factors # "III" = "familial social homogamy" - mating similarity comes only through correlated "F" (familial) factor #DATASET PARAMETERS percent.mz <- .44 #pct of twins who are MZ (in the last generation); to mimic ozva60k = .44 sibling.age.cor <- .5 #correlation between siblings' ages within a breeding cohort. Higher correlations imply less spacing between sibs and/or twins. range.pop.age <- c(18,80) #range of age in the population over which the beta terms below explain the AGE and A.by.AGE variation; recommend placing this the same as the first two entries in range.age.twin range.dataset.age <- c(14,90) #The full datasets include not only twins, but also offspring,sibs,spouses, and parents. This is the range of ages in that full dataset range.twin.age <- c(18,80) #range of twin ages in final dataset at each timepoint; VECTOR twice as long as # of measurements #example 1, for data of twins measured at exactly 14 and 18: range.twin.age <- c(14,14,18,18) #example 2, for data of adult twins of all ages measured once: range.twin.age <- c(18,100) #VARIANCES: #var terms of 1st gen (they may change thereafter). Its nice, but unnecessary, for them to sum to 1 #Genetic Factors: A <- .3 #var acct for by A (additive genetic variance); also, variance of the intercepts (or levels) of A AA <- .05 #var acct for by AxA (additive-by-additive) epistasis. D <- .05 #var acct for by D (dominance genetic variance) #Environmental Factors: U <- .2 #var acct for by U (unique variance). Note: in non-twins, E (eg, the E from ACE models)=U+MZ+T MZ <- .0 #var acct for by special MZ twin env (this goes into E for non-mz individuals) TW <- .0 #var acct for by special twin env (this goes into E for non-twin individuals) F <- .1 #var acct for by F (familial environment). This is passed from parents to offspring via vertical transmission S <- .1 #var acct for by S (sibling environment). Note: in only children, this is part of E. #Covariates & Moderators AGE <- .0 #var acct for by age; This term can be negative; sqrt(abs(AGE))= beta(age) = change in phenotype over 1 SD of age (15-45); may be negative SEX <- .0 #var acct for by sex (female=1,male=0); This term can be negative; sqrt(abs(SEX))= beta(sex) = change in phenotype over 1 SD of sex; may be negative A.by.AGE <- .1 #var acct for by interaction bw A & Age; Also, the var of slopes of A across age range specified in range.pop.age A.by.SEX <- .1 #var acct for by interaction bw A & Sex; Also, the var of slopes of A across gender. A.by.S <- .0 #var acct for by interaction bw A & S (sib env). A.by.U <- .0 #var acct for by interaction bw A & U (unique env). #COVARIANCES: #Note: when there are covariances between components, phenotypic variance will probably not begin the simulation at 1 latent.AM <- rep(.3,number.generations) #VECTOR of length = number.generations; correlation bw spouses' latent mating phenotype each generation, as determined by am.model. #NOTE: the actual phenotypic correlation bw spouses is <= to this A.S.cor <- .0 #correlation between A & S - caused by ACTIVE g-e covariance, or people choosing their (sib) environments A.U.cor <- .0 #correlation between A & U - caused by ACTIVE g-e covariance, or people choosing their (unique) environments R.Alevel.Aslope.age <- .0 #corr bw intercept & slope. Should be between -1 & 1. Consider making = 1, -1, or 0 for cleaner interpretation. R.Alevel.Aslope.sex <- .0 #corr bw intercept & slope. Should be between -1 & 1. Consider making = 1, -1, or 0 for cleaner interpretation. R.Alevel.Aslope.S <- .0 #corr bw intercept & slope. Should be between -1 & 1. Consider making = 1, -1, or 0 for cleaner interpretation. R.Alevel.Aslope.U <- .0 #corr bw intercept & slope. Should be between -1 & 1. Consider making = 1, -1, or 0 for cleaner interpretation. #Note on R.Alevel.Aslope.X: these are the corr's between the intercepts and slopes of A; #e.g., do people with the highest A values also have the highest positive changes in A across X? #if this is 1 (-1), A.by.X becomes the scalar interaction coefficient - i.e., the PURCEL model where h2 increases (decreases) as X increases #if this is 0, A.by.X becomes nonscalar - i.e., h2 remains ~ constant but diff genes turn on at diff values of X #WARNING: for scalar interactions, including only interaction var with no main effect var of A will not produce what is desired! #################################################################################################################################### #################################################################################################### #Start (NO NEED TO ALTER THE SCRIPT BELOW THIS LINE) ##################################################################################################### #1.5 OPTIONS FOR ADVANCED USAGE (RUNNING MX, SAVING ALL OBJECTS FOR DEBUGGING, ETC) ############################################################################ #mx.location <- "/home/genetics/genetics/matthew/bin/mxt" #full path of location of mx (unix server example) run.cascade.model <- "no" run.stealth.model <- "no" run.corr <- "no" real.missing <- "no" #do you want missingness patterns to emulate the ozva60k? number.runs <- 1 #do multiple runs of GeneEvolve - useful if running MX to find power, bias, and var/covar of parameter estimates continuation <- "no" #is this a continuation of a job that stopped and that you are now restarting? Assures the old Parameter.Comparison file is not overwritten save.rdata <- "yes" #do you want to save the .RData file? save.objects <- "no" #do you want to save all the R objects created? Generally, you should answer "no" unless you're debugging. Options: "yes" or "no". gene.model <- "rare" #this must be equal to "common" or "rare"; most people should choose "rare"!!! If "common", total variance will not usually be sum of var components due to cor of var components (e.g., A & D) number.alleles <- 2 #in the "common" model ONLY, how many alleles per locus? #convergence <- rep(0,number.generations) #***NOT YET IMPLEMENTED***; a VECTOR of length = PAR$number.generations; corr bw spouses due to PAR$convergence after marriage #working.directory <- "/lisa_vg4_lv1/HOME/matthew/PedEvolve/P39" #working directory where all files saved #working.directory <- "C:/Matts Folder/RESEARCH/PedEvolve/PE47" #working directory where all files saved #working.directory <- "/temp" #working directory where all files saved #mx.location <- "C:/Program Files/MX/bin/MXE.EXE" #full path of location of mx (windows example) mx.location <- "/usr/local/bin/mxt" #full path of location of mx (unix server example) ############################################################################ #2 LOAD FUNCTIONS USED IN SCRIPT & CREATE LISTS OF PARAMETER VALUES ABOVE TO ENTER INTO FUNCTIONS ############################################################################ #2.0 LOAD FUNCTIONS #change working directory setwd(working.directory) #load all PE functions source("GE.Functions_Short.R") source("GE.Function_AM.R") source("GE.Function_Reproduce.R") source("GE.Function_MakeDataFile.R") source("GE.Function_MakePhenotype.R") source("GE.Function_Run.Gene.Evolve71.R") source("GE.Function_RunCascade.R") source("GE.Function_Graph.R") #make PAR and VAR lists; remove unnecessary variables vars <- c("A", "A.S.cor","A.U.cor","A.by.AGE","A.by.S","A.by.U","AA","AGE","D","F","MZ","S","TW","U","SEX","A.by.SEX","R.Alevel.Aslope.S", "R.Alevel.Aslope.U","R.Alevel.Aslope.age","R.Alevel.Aslope.sex","am.model","latent.AM","make.graphics","mx.location", "number.generations","number.genes","percent.mz","pop.growth","range.dataset.age","range.pop.age", "range.twin.age","save.gen.data","save.objects","sib.cor","sibling.age.cor","start.pop.size","working.directory", "run.cascade.model","run.stealth.model","gene.model","number.alleles","real.missing","save.rdata","number.runs","continuation","run.corr") PAR1 <- list(working.directory=working.directory, save.gen.data=save.gen.data, save.objects=save.objects, sib.cor=sib.cor, mx.location=mx.location,make.graphics=make.graphics, number.generations=number.generations, start.pop.size=start.pop.size, pop.growth=pop.growth, number.genes=number.genes,am.model=am.model, percent.mz=percent.mz, range.dataset.age=range.dataset.age, sibling.age.cor=sibling.age.cor,range.twin.age=range.twin.age, range.pop.age=range.pop.age, run.cascade.model=run.cascade.model, run.stealth.model=run.stealth.model, gene.model=gene.model, number.alleles=number.alleles, real.missing=real.missing, save.rdata=save.rdata,number.runs=number.runs,continuation=continuation,run.corr=run.corr) VAR1 <- list(A=A, AA=AA, D=D,F=F, S=S, U=U, MZ=MZ, TW=TW, SEX=SEX, AGE=AGE, A.by.SEX=A.by.SEX,A.by.AGE=A.by.AGE, A.by.S=A.by.S, A.by.U=A.by.U, latent.AM=latent.AM, A.S.cor=A.S.cor, A.U.cor=A.U.cor, R.Alevel.Aslope.sex=R.Alevel.Aslope.sex,R.Alevel.Aslope.age=R.Alevel.Aslope.age, R.Alevel.Aslope.S=R.Alevel.Aslope.S, R.Alevel.Aslope.U=R.Alevel.Aslope.U) remove(list=vars);remove(vars) #record time PAR1$t1 <- Sys.time() ############################################################################ ############################################################################ #3 RUN GENEVOLVE, RUN CASCADE & CREATE GRAPHICS for (iteration in 1:PAR1$number.runs){ #Run GeneEvolve ALL <- run.gene.evolve(PAR=PAR1,VAR=VAR1) gc() #Run Cascade model MX <- run.cascade.mx(ALL.PARS=ALL$PAR, iter=iteration, PE.Var2=ALL$PE.Var2,PE.Var3=ALL$PE.Var3,rel.corr=ALL$rel.correlations, filenames=ALL$TEMP$name.date, data.info=ALL$TEMP$info.widedata.updated, cont=PAR1$continuation, run.twice=TRUE) gc() #Generate Graphics - running function "Generate.Graphics" which is defined in script "GE.Graph.1.R" if (PAR1$make.graphics=="yes") { ge.graphics(date.names=ALL$TEMP$name.date, PARAMS=ALL$PAR, VARIANCES=VAR1, MX.info=MX, PE.Var3=ALL$PE.Var3, PE.Var2=ALL$PE.Var2, rel.correlations=ALL$rel.correlations, track.changes=ALL$track.changes, data.info=ALL$TEMP$info.widedata.updated)} } #Save *.RData file if save.rdata=="yes" - names it by date if (PAR1$save.rdata=="yes"){ save.image(file=paste(ALL$TEMP$name.date,".RData",sep="")) } ############################################################################ #Sample Times for how long script takes: #Oct 18 2007. MacBook Pro 2.4 GHz Core2 Duo 4GB 667 MHz DDR2 SDRAM, 800 FSB: 16.5 min for: # number.generations <- 50 # start.pop.size <- 30000 # number.genes <- 40