/******************************************************************* %ghimport 0.11 Copyright (c) 2000 & 2001 Jeff Lessem, Institute for Behavioral Genetics, University of Colorado; and Stacey Cherny, Wellcome Trust Centre for Human Genetics, University of Oxford See http://ibgwww.Colorado.EDU/~lessem/software/qms2.html#license for the terms under which this software may be used. Import a GENEHUNTER ibd file into a SAS Software dataset. Usage: ibdfile= ibd file to analyze. This is the file that is output by "pre-sas.sh". REQUIRED phenfile= The file used to read phenotype scores from. REQUIRED markers= The number of loci at which you have markers. This is needed so that when reading in the phenotype file the macro knows how many columns to skip before reading the phenotype values. REQUIRED missing= The number used to represent missing phenotype values. This will be converted to the SAS Software standard "." notation. This will default to "." if not specified. OPTIONAL, DEFAULT="." asis= Set to 1 to use the data exactly as read in, no selection on the affection status or double entry is performed. This assumes that the first sib is the proband. This may produce a datafile which is statistically inapropriate for some analysis. 0 selects probands based on the affection status code and performs double entry when there are multiple probands in a family. The default of reading the data in with selection and double entry is that the QMS2 macro was primarily written to perform the DF model, so it is setup by default to be correct for a DF analysis. OPTIONAL, DEFAULT=0 outdata= The dataset to save the imported data into. This defaults to "data.ibdphen" (the temporary dataset named "ibdphen"). OPTIONAL, DEFAULT="ibdphen" For example to call this from within a SAS Software script the following commands would work: %ghimport(ibdfile=dump-sas.ibd, phenfile=pheno.dat, markers=10, missing=99.000, asis=0, outdata=mydata.wave1); or %ghimport(ibdfile=dump-sas.ibd, phenfile=pheno.dat, markers=10); *********************************************************************/ %macro ghimport( ibdfile=macro-error, phenfile=macro-error, markers=macro-error, missing=., asis=0, outdata=ibdphen); data ibd; /* the dataset ibd will contain the ibd probabilities */ infile "&ibdfile"; /* The columns are assigned to the following variables, where pos is the position on the chromosome, pedigree is the ID number of the family, indnum1 is the ID number of the first family member of the pairing and indnum2 is the ID number of the second member of the pairing, ibd0 ibd1 ibd2 represent the probability of the pairing having ibd 0, .5 or 1 at the pos(ition). This routine could be modified to import ibd files from other programs than GENEHUNTER by changing the order of the variables read and adding or removing variables as necessary. */ input pos pedigree indnum1 indnum2 prior0 prior1 prior2 ibd0 ibd1 ibd2 ; run; data phen; /* the dataset phen will contain the phenotype scores */ infile "&phenfile"; /* The phenotypes are read from a GENEHUNTER input file. The variable "proband" designates the affection status with 0=unknown 1=unaffected 2=affected. For the purposes of this macro 0,1=not a proband; 2=proband */ input pedigree indnum1 mothnum fathnum sex1 proband1 x1-x%eval(&markers*2) phen1; drop x1-x%eval(&markers*2) mothnum fathnum; if phen1 eq &missing then phen1=.; run; /* sort the datasets in preparation to merge them */ proc sort data=ibd; by pedigree indnum1; proc sort data=phen; by pedigree indnum1; /* merge the phenotype of the first individual into the ibd file */ data ibdphen; merge ibd phen ; by pedigree indnum1; run; /* rename the variables in the pheno dataset to be set for individual 2 */ proc datasets library=work; modify phen; rename sex1=sex2 phen1=phen2 indnum1=indnum2 proband1=proband2; run; /* sort ibdphen by indnum2 for next merge */ proc sort data=ibdphen; by pedigree indnum2; data ibdphen; merge ibdphen phen; by pedigree indnum2; /* drop pairings with missing phenotypes */ if phen1 ne . and phen2 ne .; /* drop the parents and lines with missing individuals */ /* no assumption of the parents id number, use the prior probability score */ if indnum1 ne . and indnum2 ne . and (prior0 eq .25 and prior0 ne .) and (prior1 eq .5 and prior1 ne .) and (prior2 eq .25 and prior2 ne .); df=1; /* all forward pairings are an independent observation */ dbl=0; /* is this the original or the double entered version? */ drop prior0 prior1 prior2; run; /* Double enter the data */ data backward; set ibdphen; /* save into dummy variables */ indnumx=indnum1; sexx=sex1; phenx=phen1; probandx=proband1; /* move 1 to 2 */ indnum1=indnum2; sex1=sex2; phen1=phen2; proband1=proband2; /* move dummies back to 1 */ indnum2=indnumx; sex2=sexx; phen2=phenx; proband2=probandx; /* drop dummy variables */ drop indnumx sexx phenx probandx; /* double entered cases do not add new information */ df=0; /* set double entered to true */ dbl=1; run; data ibdphen; /* Combine the single and the double entered datasets */ set ibdphen backward; /* compute pihat from the ibd probabilities */ pihat = .5 * ibd1 + ibd2 ; run; proc sort data=ibdphen; by pos; /* This part handles selection */ %if &asis=0 %then %do; /* do selection */ data &outdata; set ibdphen; /* This removes non-proband predictor phenotypes */ if proband1 eq 2; /* If the first instance of a pairing has been dropped then add back a degree of freedom */ if proband1 eq 2 and (proband2 eq 0 or proband2 eq 1) and df eq 0 then df=1; run; %end; /* selection */ %else %do; /* No selection */ data &outdata; set ibdphen; if dbl ne 1; run; %end; /* No selection */ %mend ghimport; /* ghimport */