data(iris) plot(iris[,1:4], pch=20, col=c("red","blue","orange")[as.numeric(iris$Species)]) # Rapport variance inter/variance intra var.names=c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") nb=length(var.names) for(k in 1:nb) { print(var.names[k]) print(anova(aov(iris[,k]~iris$Species))) } # les variables de pétales sont plus discriminantes ! # Densité marginales univariées ISE=which(iris$Species == "setosa") IVI=which(iris$Species == "virginica") IVE=which(iris$Species == "versicolor") br=seq(min(iris[,3]),max(iris[,3]),length.out=30) hist(iris[ISE,3],breaks=br, col="red", prob=TRUE,ylim=c(0,2.5)) hist(iris[IVE,3],breaks=br, col="blue", prob=TRUE,add=TRUE) hist(iris[IVI,3],breaks=br, col="orange", prob=TRUE,add=TRUE) lines(density(iris[ISE,3]), lwd=1) lines(density(iris[IVE,3]), lwd=1) lines(density(iris[IVI,3]), lwd=1) abs=seq(min(iris[,3]),max(iris[,3]),0.1) lines(abs, dnorm(abs,mean(iris[ISE,3]), sd(iris[ISE,3])), lwd=2, lty=2) lines(abs, dnorm(abs,mean(iris[IVE,3]), sd(iris[IVE,3])), lwd=2, lty=2) lines(abs, dnorm(abs,mean(iris[IVI,3]), sd(iris[IVI,3])), lwd=2, lty=2) # test de Fisher d'égalité des variances pour Virginica et Versicolor var(iris$Petal.Length[IVE])/var(iris$Petal.Length[IVI]) qf(0.025,length(IVE), length(IVI)) qf(0.975,length(IVE), length(IVI)) var(iris$Petal.Width[IVE])/var(iris$Petal.Width[IVI]) cov(iris$Petal.Width[IVE],iris$Petal.Length[IVE])/var(iris$Petal.Width[IVI],iris$Petal.Length[IVI]) # Analyse discriminante avec un Modele homoscédastique Gaussien bool=(iris$Species=="virginica" | iris$Species=="versicolor") sum(bool) muVE=array(data=c(mean(iris$Petal.Length[IVE]),mean(iris$Petal.Width[IVE])),dim=c(2,1)) muVI=array(data=c(mean(iris$Petal.Length[IVI]),mean(iris$Petal.Width[IVI])),dim=c(2,1)) Sigma=array(data=c(var(iris$Petal.Length[bool]), cov(iris$Petal.Width[bool],iris$Petal.Length[bool]), cov(iris$Petal.Width[bool],iris$Petal.Length[bool]), var(iris$Petal.Width[bool])), dim=c(2,2)) Sigma.inv=solve(Sigma) e=array(data=c(1), dim=c(nrow(x),1)) x=as.matrix(iris[bool,3:4]) affectation=(2*(x-e%*%(t(muVE+muVI)/2)) %*% Sigma.inv %*%(muVE-muVI)>0 ) table(affectation, iris$Species[bool]) plot(iris[bool,3:4], pch=20, col=c("blue","orange")[as.numeric(iris$Species)]) points(iris[bool,3:4], pch=c(1,2)[affectation+1], col=c("blue","orange")[affectation+1], cex=3) # Densités bivariées library(KernSmooth) kde.VE=bkde2D(iris[IVE,3:4], bandwidth=c(0.2,2)) image(kde.VE$x1,kde.VE$x2, kde.VE$fhat) contour(kde.VE$x1,kde.VE$x2, kde.VE$fhat, add=TRUE) points(iris[IVE,3:4]) kde.VI=bkde2D(iris[IVI,3:4], bandwidth=c(0.2,2)) image(kde.VI$x1,kde.VI$x2, kde.VI$fhat) contour(kde.VI$x1,kde.VI$x2, kde.VI$fhat, add=TRUE) points(iris[IVI,3:4]) # Méthode des plus proches voisins KernelEst=function(points,h,x) { n=nrow(x) m=nrow(points) yy1=array(data=c(points[,1]), dim=c(m,n)) xx1=t(array(data=c(x[,1]), dim=c(n,m))) z1=(xx1-yy1)/h yy2=array(data=c(points[,2]), dim=c(m,n)) xx2=t(array(data=c(x[,2]), dim=c(n,m))) z2=(xx2-yy2)/h return((1/(n*h^2))*as.vector(rowSums(dnorm(z1,0,sd=1)*dnorm(z2,0,sd=1)))) } h=0.6 tirage<-sample.int(sum(bool),70, replace=FALSE) tirage=sort(tirage) ech.appr=(iris[bool,])[tirage,] nrow(ech.appr) ech.class=(iris[bool,])[-tirage,] nrow(ech.class) fVE=KernelEst(ech.class[,3:4],h,ech.appr[(ech.appr$Species=="versicolor"),3:4]) fVI=KernelEst(ech.class[,3:4],h,ech.appr[(ech.appr$Species=="virginica"),3:4]) affectation2=(fVE