#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#			Simulation program for twin pairs in R				#
#				Benjamin Neale							#
#				March 2006								#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

# '#' denotes a comment

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#													#
#				Parameters								#
#													#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

ad2<-0.5		#Additive genetic variance component (a squared)
co2<-0.3		#Common environment variance component (c squared)
en2<-0.2		#Specific environment variance component (e squared)

MZN<-5000		#Number of MZ twin pairs
DZN<-5000		#Number of DZ twin pairs

outfile<-"sim.fun"	#output file for the simulation

#Assignment in R uses a<-b or b->a in both cases b is stored in a

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#													#
#				Data generation							#
#													#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

vals<-matrix(rnorm((MZN*4+DZN*6)),,10);	#4 random normals per twin pair for latent factors

one<-matrix(1,MZN,1);			#Zygosity for MZs

two<-matrix(2,DZN,1);			#Zygosity for DZs

ad<-sqrt(ad2);				#Unsquared additive genetic component

co<-sqrt(co2);				#Unsquared common environment component

en<-sqrt(en2);				#Unsquared specific environment component

#MZ data generation follows: 

MZ<-matrix(cbind(one,ad*vals[,1]+co*vals[,2]+en*vals[,3],ad*vals[,1]+co*vals[,2]+en*vals[,4]),,3);

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#ad*vals[,1] is the additive genetic component times a random normal			#
#co*vals[,2] is the common environment component times a random normal			#
#en*vals[,3] is the specific environment component times a random normal		#
#As MZs correlate completely we use 1 random variable for A and C				#
#while MZ do not correlate on specific environment (en*vals[,3] and en*vals[,4])	#
#so we have to use separate random variables for each individual twin			#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

#square root of .5

srf<-sqrt(.5);

#DZ data generation follows:

DZ<-matrix(cbind(two,srf*ad*vals[,5]+srf*ad*vals[,6]+co*vals[,7]+en*vals[,8],srf*ad*vals[,5]+srf*ad*vals[,9]+co*vals[,7]+en*vals[,10]),,3);

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#As DZs correlate only 50% on A we use 3 random variables for the A			#
#1 for the shared piece and then 2 for the specific pieces					#
#Common environment and specific environment work in the same manner as the MZs	#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

write.table(rbind(MZ,DZ),file=outfile,row.names=FALSE,col.names=FALSE)