##################################################################################################
# GeneEvolve version 0.73                                                                        #
# SIMULATION PROGRAM FOR EVOLVING AND CREATING EXTENDED PEDIGREE & TWIN DATA                      #
# by Matthew C Keller, updated 8/7/2008                                                          #
##################################################################################################


#TO RUN: 1) Open the GeneEvolve.zip file in your "working directory" - 
#           all scripts must remain in that directory.
#        2) Make changes to the parameters below (#1 of this script) by placing your desired
#           inputs to the *right* of the arrows (<- x  # i.e., user replaces "x").
#        3) Advanced users might consider changing parameters under section 1.5
#        4) Never remove any hash marks (#). Never change anything outside of sections 1 & 1.5
#        5) Run entire script, either in BATCH mode (e.g., R CMD BATCH GeneEvolve69.R on UNIX run 
#           line) or by highlighting the entire script and running it in an R session.
#        6) At end, you will have 5 data files (extended families of different twin types) plus a 
#           PDF summary of each GeneEvolve run and several matrices


remove(list=ls())

#1 BASIC USER INPUT PARAMETERS (31)      
#################################################################################################

#BASICS
working.directory <- "H:\\GE"     # working directory where all files saved
make.graphics <- "yes"                # do you want to create a full graphical report at end?

#DEMOGRAPHIC DETAILS:
number.generations <- 10      # number of generations to evolve before pedigree data created
start.pop.size <- 10000       # breeding population size at start of simulation; should be > 100
pop.growth <- rep(1,number.generations)  #vector of length number.generations that tells how big
    # each generation is relative to the one before it; for no growth: rep(1,number.generations); 
    # for constant 5% growth: rep(1.05,number.generations)               

#MODEL DETAILS:
thresholds <- FALSE      # EITHER a vector of thresholds corresponding to location on 
                           # standard normal OR "FALSE", meaning continuous
am.model <- "I"            # <- "I" is "primary phenotypic assortment" 
                           # (corr b/w mates due to their choosing similar phenotypes)
                           # <- "II" is "social homogamy"
                           # (corr b/w mates due to correlated environmental factors)

#DATASET PARAMETERS
percent.mz <- .44              # % of twins who are MZ (in pedigree creation)
range.dataset.age <- c(14,90)  # The pedigree datasets includes twins plus twin offspring,sibs, 
                               # spouses, and parents. This is the range of ages in that dataset
range.twin.age <- c(18,80)     # Range of twin ages in final dataset at each timepoint. Is a 
          # VECTOR twice as long as # of (repeated) measurements
          # Ex. 1, for data of adult twins of all ages measured once: <- c(18,100)
          # Ex. 2, for data of twins measured at exactly 14: <- c(14,14) 
          # Ex. 3, for data of twins measured at 14-18 and again (i.e., REPEATED MEASURES) at 
          #        18-25: <- c(14,18,18,25) 


#VARIANCES: These are variance terms of 1st gen (they may change thereafter). 
# Its nice, but unnecessary, for them to sum to 1

#Genetic Factors:
A  <- .4       # var acct for by A (additive genetic variance)
AA <- .0       # var acct for by AxA (additive-by-additive) epistasis. 
D  <- .1      # var acct for by D (dominance genetic variance)

#Environmental Factors:
U <- .3        # var acct for by U (unique variance). 
               # Note: in non-twins, E (eg, the E from ACE models)=U+MZ+T
MZ <- .0       # var acct for by special MZ twin env (this goes into E for non-mz individuals)
TW <- .0       # var acct for by special twin env (this goes into E for non-twin individuals)
F <- .0       # var acct for by F (familial environment). 
               # This is passed from parents to offspring via vertical transmission
S <- .2       # var acct for by S (sibling environment). 

#Covariates & Moderators
AGE <- .0      # var acct for by age; This term can be negative; 
               # sqrt(abs(AGE))= beta(age) = change in phenotype over 1 SD of age (15-45)
SEX <- .0      # var acct for by sex (female=1,male=0); This term can be negative 
               # sqrt(abs(SEX))= beta(sex) = change in phenotype over 1 SD of sex
A.by.AGE <- .0 # var acct for by interaction bw A & Age; 
               # Also, the var of slopes of A across age range specified in range.pop.age
A.by.SEX <- .0 # var acct for by interaction bw A & Sex; 
               # Also, the var of slopes of A across gender. 
A.by.S <- .0   # var acct for by interaction bw A & S (sib env).
A.by.U <- .0   # var acct for by interaction bw A & U (unique env).

#COVARIANCES: (Note: when there are covariances b/w components, total var != sum(variances))
latent.AM <- rep(.0,number.generations) #VECTOR of length = number.generations; 
               # corr b/w spouses' latent mating phenotype each gen., as determined by am.model.
               # NOTE: the actual phenotypic correlation bw spouses is <= to this
A.S.cor  <- .0 # corr b/w A & S - caused by ACTIVE g-e covariance
A.U.cor <- .0  # corr b/w A & U - caused by ACTIVE g-e covariance
R.Alevel.Aslope.age <- .0 #corr bw intercept & slope. Should be b/w -1 & 1.
R.Alevel.Aslope.sex <- .0 #corr bw intercept & slope. Should be b/w -1 & 1.
R.Alevel.Aslope.S <- .0   #corr bw intercept & slope. Should be b/w -1 & 1.
R.Alevel.Aslope.U <- .0   #corr bw intercept & slope. Should be b/w -1 & 1. 
 # Note on R.Alevel.Aslope.X: these are the corr's b/w the intercepts and slopes of A;
 # e.g., do people with the highest A values also have the highest changes in A across X?
 # if this is 1 (-1), A.by.X becomes the scalar interaction coefficient 
 # - i.e., the PURCEL model where h2 increases (decreases) as X increases
 # if this is 0, A.by.X becomes nonscalar 
 # - i.e., h2 remains ~ constant but diff genes turn on at diff values of X
 # WARNING: for scalar interactions, including only interaction var with no main effect var of A
 # will not produce what is desired!
#################################################################################################


#1.5 ADVANCED USER INPUT PARAMETERS (MULTIPLE RUNS, RANDOM PARAMETERS, RUNNING MX, ETC) (17)
############################################################################

number.runs <- 1    # multiple GE runs- useful for finding bias, var/covar of parameter estimates
rand.parameters <- "no" # do you want GE to use random parameters? 
        # Choices: "no","all","A","D","F","S","AM.mod". "all" randomly chooses A,D,F,S, & AM.mod
save.gen.data <- "no"   # do you want to write out the datasets for each generation? 
        # Answer "no" unless you want a detailed ancestral record
continuation <- "no"    # is this a continuation of a job you are now restarting? 
        # Assures the old Parameter.Comparison file is not overwritten
save.rdata <-  "no"    # do you want to save the .RData file?
save.objects <- "no"    # do you want to save all the R objects created? 
        # Generally, you should answer "no" unless you're debugging. Options: "yes" or "no".


number.genes <- 7 # number of genes affecting phenotype; Must be > 2 but <= 50. 5+ is fine
sibling.age.cor <- .5 # corr b/w sibs' ages within a breeding cohort. 
                      # Higher corr. imply less spacing in years b/w sibs.
range.pop.age <- range.twin.age[1:2]  # range of age in the population over which the beta terms
                      # above explain the AGE and A.by.AGE variation; I recommend placing this 
                      # the same as the first two entries in range.twin.age above


real.missing <- "no"  #do you want missingness patterns to emulate the ozva60k?
run.cascade.model <- "no"
run.stealth.model <- "no"
run.corr <- "no"      #gets 88 relative covariances using Mx
sib.cor <- "yes"      #do you want GE to compute the correlations b/w sibs for each generation?

gene.model <- "common"  # this must be equal to "common" or "rare"; most people should choose rare!
                      # If "common", total variance will not usually be sum of var components due
                      # to cor of var components (e.g., A & D)
number.alleles <- 2   # in the "common" model ONLY, how many alleles per locus? 

mx.location <-  "/usr/local/bin/mxt"  #full path of location of mx (unix server example)
############################################################################


#################################################################################################
#Start (NO NEED TO ALTER THE SCRIPT BELOW THIS LINE)
#################################################################################################


#2 LOAD FUNCTIONS USED IN SCRIPT & CREATE LISTS OF PARAMETER VALUES ABOVE TO ENTER INTO FUNCTIONS

############################################################################
#2.0 LOAD FUNCTIONS
#change working directory
setwd(working.directory)

#load all PE functions
source("GE.Functions_Short.R")
source("GE.Function_AM.R")
source("GE.Function_Reproduce.R")
source("GE.Function_MakeDataFile.R")
source("GE.Function_MakePhenotype.R")
source("GE.Function_Run.Gene.Evolve73.R")
source("GE.Function_Graph.R")

 
#make PAR and VAR lists; remove unnecessary variables
vars <- c("A", "A.S.cor","A.U.cor","A.by.AGE","A.by.S","A.by.U","AA","AGE","D","F","MZ","S","TW",
          "U","SEX","A.by.SEX","R.Alevel.Aslope.S","R.Alevel.Aslope.U","R.Alevel.Aslope.age",
          "R.Alevel.Aslope.sex","am.model","latent.AM","make.graphics","mx.location",
          "number.generations","number.genes","percent.mz","pop.growth","range.dataset.age",
          "range.pop.age","range.twin.age","save.gen.data","save.objects","sib.cor",
          "sibling.age.cor","start.pop.size","working.directory","run.cascade.model",
          "run.stealth.model","gene.model","number.alleles","real.missing","save.rdata",
          "number.runs","continuation","run.corr")

PAR1 <- list(working.directory=working.directory, save.gen.data=save.gen.data,
             save.objects=save.objects,sib.cor=sib.cor, mx.location=mx.location,
             make.graphics=make.graphics, number.generations=number.generations,
             start.pop.size=start.pop.size, pop.growth=pop.growth, number.genes=number.genes,
             am.model=am.model, percent.mz=percent.mz, range.dataset.age=range.dataset.age,
             sibling.age.cor=sibling.age.cor,range.twin.age=range.twin.age,
             range.pop.age=range.pop.age,run.cascade.model=run.cascade.model,
             run.stealth.model=run.stealth.model, gene.model=gene.model,
             number.alleles=number.alleles,real.missing=real.missing,
             save.rdata=save.rdata,number.runs=number.runs,continuation=continuation,
             run.corr=run.corr,rand.parameters=rand.parameters,thresholds=thresholds)
          
VAR1 <- list(A=A, AA=AA, D=D,F=F, S=S, U=U, MZ=MZ, TW=TW, SEX=SEX, AGE=AGE, A.by.SEX=A.by.SEX,
             A.by.AGE=A.by.AGE, A.by.S=A.by.S, A.by.U=A.by.U,latent.AM=latent.AM, A.S.cor=A.S.cor,
             A.U.cor=A.U.cor, R.Alevel.Aslope.sex=R.Alevel.Aslope.sex,
             R.Alevel.Aslope.age=R.Alevel.Aslope.age,R.Alevel.Aslope.S=R.Alevel.Aslope.S,
             R.Alevel.Aslope.U=R.Alevel.Aslope.U)

remove(list=vars)

#record time
PAR1$t1 <- Sys.time()

#################################################################################################


#################################################################################################
#3 RUN GENEVOLVE, RUN MX (OPTIONAL) & CREATE GRAPHICS

for (iteration in 1:PAR1$number.runs){

 #reset parameters each run if rand.parameters above is not "no"
  BOTH   <- alter.parameters(P=PAR1,V=VAR1,max=1)  
  VAR1 <- BOTH$VAR
  PAR1 <- BOTH$PAR
  remove(BOTH)
  
 #Run GeneEvolve
  ALL <- run.gene.evolve(PAR=PAR1,VAR=VAR1,iter=iteration)

 gc()
   
 #Run Cascade model
 {if (PAR1$run.cascade.model=='yes'|PAR1$run.stealth.model=='yes'|PAR1$run.corr=='yes'){
   source("GE.Function_RunCascade.R")
   MX <- run.cascade.mx(ALL.PARS=ALL$PAR, iter=iteration, PE.Var2=ALL$PE.Var2, 
          PE.Var3=ALL$PE.Var3,rel.corr=ALL$rel.correlations,filenames=ALL$TEMP$name.date, 
          data.info=ALL$TEMP$info.widedata.updated, cont=PAR1$continuation, run.twice=TRUE)}
  else MX <- NULL
 }

 gc()
 
 #Generate Graphics - runs function "Generate.Graphics" which is defined in script "GE.Graph.1.R"
 if (PAR1$make.graphics=="yes") {
  ge.graphics(date.names=ALL$TEMP$name.date, PARAMS=ALL$PAR, VARIANCES=VAR1, MX.info=MX, 
              PE.Var3=ALL$PE.Var3, PE.Var2=ALL$PE.Var2, rel.correlations=ALL$rel.correlations, 
              track.changes=ALL$track.changes, data.info=ALL$TEMP$info.widedata.updated)}

}
    

#Save *.RData file if save.rdata=="yes" - names it by date
if (PAR1$save.rdata=="yes"){ save.image(file=paste(ALL$TEMP$name.date,".RData",sep="")) }

#################################################################################################