Computerintensive Methoden - Coalescent Theory - Project B
The following data were taken from the segregating sites in a sequence of nucleotides from the Y chromosome of 131 Northern Africans. Thirteen segregating sites were found and 7 different alleles. At each site 0 represents the ancestral variant (as observed in the majority of a reasonably large sample of chimpanzees). The alleles observed and their frequencies are given below.
Engine.php: unknown value "HTML" for attribute "output"in
alleles=matrix(c( 0,0,0,0,0,0,0,0,0,0,0,0,0, 1,0,0,1,0,1,0,0,0,0,0,0,0, 1,0,0,1,1,0,0,0,0,0,0,0,0, 1,0,0,1,0,0,1,1,0,0,0,1,1, 1,0,0,1,0,0,1,1,1,1,1,0,0, 1,1,1,0,0,0,0,0,0,0,0,0,0, 1,0,0,1,0,0,1,0,0,0,0,0,0), ncol=13, byrow=T, dimnames=list(c("A","E","F","G","J","K","R"),1:13))
freq=c(10,70,1,1,35,6,8)
library(xtable)
print(xtable(alleles, caption="Alleles", digits=0),type="html")
Task 1
Calculate the matrix given the Hamming distance between each allele.
REngine.php: > rpdf<-'/var/www/localhost/htdocs/StatWiki/Rfiles/R/f33d66e7d5ca93ab7d7541b36e02a025aee53c72_%i.pdf' > rpdfno<-0 > rhtml<-'' > rfiles<-'/var/www/localhost/htdocs/StatWiki/Rfiles/R/' > source('/var/www/localhost/htdocs/StatWiki/Rfiles/R/@.R') > rout<-'text' > cat('<!--- Start of program --->\n') <!--- Start of program ---> > d=dist(alleles,method="manhattan") Error in as.matrix(x) : object 'alleles' not found Calls: dist -> as.matrix Execution haltedin
d=dist(alleles,method="manhattan") d
Task 2
Calculate the nucleotide diversity.
REngine.php: > rpdf<-'/var/www/localhost/htdocs/StatWiki/Rfiles/R/75ddcc19af3fc18606822b7c21de96e02de6c2af_%i.pdf' > rpdfno<-0 > rhtml<-'' > rfiles<-'/var/www/localhost/htdocs/StatWiki/Rfiles/R/' > source('/var/www/localhost/htdocs/StatWiki/Rfiles/R/@.R') > rout<-'text' > cat('<!--- Start of program --->\n') <!--- Start of program ---> > theta.pi=sum(as.dist(as.matrix(d) * (freq %o% freq)))*(2/(sum(freq)*(sum(freq)-1))) Error in as.matrix(d) : object 'd' not found Calls: as.dist -> as.matrix Execution haltedin
theta.pi=sum(as.dist(as.matrix(d) * (freq %o% freq)))*(2/(sum(freq)*(sum(freq)-1))) theta.pi
Task 3
Carry out the Tajima test to verify the Wright-Fisher model.
REngine.php: > rpdf<-'/var/www/localhost/htdocs/StatWiki/Rfiles/R/eec288b6020f1e64579fbff733dfcaa82660a9f4_%i.pdf' > rpdfno<-0 > rhtml<-'' > rfiles<-'/var/www/localhost/htdocs/StatWiki/Rfiles/R/' > source('/var/www/localhost/htdocs/StatWiki/Rfiles/R/@.R') > rout<-'text' > cat('<!--- Start of program --->\n') <!--- Start of program ---> > S=dim(alleles)[2] Error: object 'alleles' not found Execution haltedin
S=dim(alleles)[2] n=sum(freq)
theta.l=S/sum(1/(1:(n-1)))
an=sum(1/1:(n-1)) bn=sum(1/((1:(n-1))^2))
e1 = (n+1)/(3*an * (n-1)) - 1/an^2 e2 = 1/(an^2+bn) * ( (2*(n^2+n+3))/(9*n*(n-1)) - (n+2)/(n*an) + bn/an^2 )
var.theta=e1*S + e2*S*(S-1)
D=(theta.pi - theta.l)/sqrt(var.theta)
REngine.php: <!--- Start of program ---> Error in names(D) = "D" : names() applied to a non-vector Execution haltedin
names(D)="D"
est=c(theta.l, theta.pi) names(est)=c("Theta L", "Theta Pi")
ret=list(statistic=D, method="Tajima Test", estimate=est, p.value=2*(1-pnorm(abs(D)))) class(ret)="htest" ret