Upload
aletheiaaiehtela
View
90
Download
0
Tags:
Embed Size (px)
Citation preview
Análisis multivariante en R: aplicación en ecología
Rosana Ferrero
5 de febrero de 2014
Índice
1. Análisis descriptivo de datos multivariantes con R 21.1. EJEMPLO EN R. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2
1
1. Análisis descriptivo de datos multivariantes con R
1.1. EJEMPLO EN R.
Este famoso conjunto de datos del iris (de Fisher o Anderson) da las medidas en centímetros de lalongitud de las variables sépalo y la anchura y la longitud y la anchura del pétalo, respectivamente, por 50flores de cada uno de 3 especies de iris. Las especies son setosa Iris, versicolor y virginica.
iris es una trama de datos con 150 casos (filas) y 5 variables (columnas) con nombre Sepal.Length,Sepal.Width, Petal.Length, Petal.Width, y especies.
data(iris) #abrimos el archivo de datos
# El archivo contiene un encabezado con los nombres de las variables# (header=T) y utiliza la comna como decimal (dec= , )head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species## 1 5.1 3.5 1.4 0.2 setosa## 2 4.9 3.0 1.4 0.2 setosa## 3 4.7 3.2 1.3 0.2 setosa## 4 4.6 3.1 1.5 0.2 setosa## 5 5.0 3.6 1.4 0.2 setosa## 6 5.4 3.9 1.7 0.4 setosa
tail(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species## 145 6.7 3.3 5.7 2.5 virginica## 146 6.7 3.0 5.2 2.3 virginica## 147 6.3 2.5 5.0 1.9 virginica## 148 6.5 3.0 5.2 2.0 virginica## 149 6.2 3.4 5.4 2.3 virginica## 150 5.9 3.0 5.1 1.8 virginica
names(iris) #nombres de los datos
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"## [5] "Species"
str(iris)
## 'data.frame': 150 obs. of 5 variables:## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
# I. explorar cada variable por separado
# 1) variables cuantitativassummary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
2
## Min. :4.30 Min. :2.00 Min. :1.00 Min. :0.1## 1st Qu.:5.10 1st Qu.:2.80 1st Qu.:1.60 1st Qu.:0.3## Median :5.80 Median :3.00 Median :4.35 Median :1.3## Mean :5.84 Mean :3.06 Mean :3.76 Mean :1.2## 3rd Qu.:6.40 3rd Qu.:3.30 3rd Qu.:5.10 3rd Qu.:1.8## Max. :7.90 Max. :4.40 Max. :6.90 Max. :2.5## Species## setosa :50## versicolor:50## virginica :50######
var(iris$Sepal.Length)
## [1] 0.6857
hist(iris$Sepal.Length)plot(density(iris$Sepal.Length))
Histogram of iris$Sepal.Length
iris$Sepal.Length
Fre
quen
cy
4 5 6 7 8
05
1015
2025
30
3
4 5 6 7 8
0.0
0.1
0.2
0.3
0.4
density.default(x = iris$Sepal.Length)
N = 150 Bandwidth = 0.2736
Den
sity
# 2) variables cualitativastable(iris$Species)
#### setosa versicolor virginica## 50 50 50
pie(table(iris$Species))barplot(table(iris$Species))
4
setosa versicolor virginica
010
2030
4050
# II. explorar las variables en conjuntocov(iris[, 1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width## Sepal.Length 0.68569 -0.04243 1.2743 0.5163## Sepal.Width -0.04243 0.18998 -0.3297 -0.1216## Petal.Length 1.27432 -0.32966 3.1163 1.2956## Petal.Width 0.51627 -0.12164 1.2956 0.5810
cor(iris[, 1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width## Sepal.Length 1.0000 -0.1176 0.8718 0.8179## Sepal.Width -0.1176 1.0000 -0.4284 -0.3661## Petal.Length 0.8718 -0.4284 1.0000 0.9629## Petal.Width 0.8179 -0.3661 0.9629 1.0000
aggregate(Sepal.Length ~ Species, summary, data = iris)
## Species Sepal.Length.Min. Sepal.Length.1st Qu.## 1 setosa 4.30 4.80## 2 versicolor 4.90 5.60## 3 virginica 4.90 6.22## Sepal.Length.Median Sepal.Length.Mean Sepal.Length.3rd Qu.## 1 5.00 5.01 5.20## 2 5.90 5.94 6.30
6
## 3 6.50 6.59 6.90## Sepal.Length.Max.## 1 5.80## 2 7.00## 3 7.90
boxplot(Sepal.Length ~ Species, data = iris)with(iris, plot(Sepal.Length, Sepal.Width, col = Species, pch = as.numeric(Species)))plot(iris$Petal.Length, iris$Petal.Width, pch = 21, bg = c("red", "green3",
"blue")[unclass(iris$Species)], main = "Iris Data")pairs(iris[1:4], main = "Iris Data", pch = 21, bg = c("red", "green3",
"blue")[unclass(iris$Species)])
setosa versicolor virginica
4.5
5.0
5.5
6.0
6.5
7.0
7.5
8.0
7
Sepal.Length
2.0 2.5 3.0 3.5 4.0 0.5 1.0 1.5 2.0 2.5
4.5
6.0
7.5
2.0
3.0
4.0
Sepal.Width
Petal.Length
13
57
4.5 5.5 6.5 7.5
0.5
1.5
2.5
1 2 3 4 5 6 7
Petal.Width
Iris Data
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {usr <- par("usr")on.exit(par(usr))par(usr = c(0, 1, 0, 1))r <- abs(cor(x, y))txt <- format(c(r, 0.123456789), digits = digits)[1]txt <- paste(prefix, txt, sep = "")if (missing(cex.cor))
cex.cor <- 0.8/strwidth(txt)text(0.5, 0.5, txt, cex = cex.cor * r)
}pairs(iris[1:4], main = "Iris Data", pch = 21, bg = c("red", "green3",
"blue")[unclass(iris$Species)], upper.panel = panel.cor)
10
Sepal.Length
2.0 2.5 3.0 3.5 4.0
0.12 0.87
0.5 1.0 1.5 2.0 2.5
4.5
6.0
7.5
0.82
2.0
3.0
4.0
Sepal.Width 0.43 0.37
Petal.Length
13
57
0.96
4.5 5.5 6.5 7.5
0.5
1.5
2.5
1 2 3 4 5 6 7
Petal.Width
Iris Data
# gráficos 3Dlibrary(scatterplot3d)scatterplot3d(iris$Petal.Width, iris$Sepal.Length, iris$Sepal.Width)# library(rgl) plot3d(iris$Petal.Width, iris$Sepal.Length,# iris$Sepal.Width)
11
0.0 0.5 1.0 1.5 2.0 2.5
2.0
2.5
3.0
3.5
4.0
4.5
45
67
8
iris$Petal.Width
iris$
Sep
al.L
engt
h
iris$
Sep
al.W
idth
# gráficos más complejos: ggplot2library(ggplot2)qplot(Sepal.Length, Sepal.Width, data = iris, facets = Species ~ .)qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point",
"smooth"), color = Species, size = Petal.Width, method = "lm")qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point",
"smooth"), color = Species, size = Petal.Width, method = "lm", facets = ~Species)qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point",
"smooth"), color = Species, size = Petal.Width, method = "lm", facets = Species ~.)
12
2.0
2.5
3.0
3.5
4.0
4.5
2.0
2.5
3.0
3.5
4.0
4.5
2.0
2.5
3.0
3.5
4.0
4.5
setosaversicolor
virginica
5 6 7 8Sepal.Length
Sep
al.W
idth
13
4
5
6
7
8
2.0 2.5 3.0 3.5 4.0 4.5Sepal.Width
Sep
al.L
engt
h
Species
setosa
versicolor
virginica
Petal.Width
0.5
1.0
1.5
2.0
2.5
14
setosa versicolor virginica
4
5
6
7
8
2.0 2.5 3.0 3.5 4.0 4.52.0 2.5 3.0 3.5 4.0 4.52.0 2.5 3.0 3.5 4.0 4.5Sepal.Width
Sep
al.L
engt
h
Species
setosa
versicolor
virginica
Petal.Width
0.5
1.0
1.5
2.0
2.5
15
4
5
6
7
8
4
5
6
7
8
4
5
6
7
8
setosaversicolor
virginica
2.0 2.5 3.0 3.5 4.0 4.5Sepal.Width
Sep
al.L
engt
h
Species
setosa
versicolor
virginica
Petal.Width
0.5
1.0
1.5
2.0
2.5
# gráficos más complejos: ggplot2library(reshape2)iris$flower_id <- rownames(iris)
iris_melted <- melt(iris)
## Using Species, flower_id as id variables
head(iris_melted)
## Species flower_id variable value## 1 setosa 1 Sepal.Length 5.1## 2 setosa 2 Sepal.Length 4.9## 3 setosa 3 Sepal.Length 4.7## 4 setosa 4 Sepal.Length 4.6## 5 setosa 5 Sepal.Length 5.0## 6 setosa 6 Sepal.Length 5.4
split_variable <- strsplit(as.character(iris_melted$variable), split = "\\.")iris_melted$flower_part <- sapply(split_variable, "[", 1)iris_melted$measurement_type <- sapply(split_variable, "[", 2)iris_melted$variable <- NULLhead(iris_melted)
## Species flower_id value flower_part measurement_type## 1 setosa 1 5.1 Sepal Length
16
## 2 setosa 2 4.9 Sepal Length## 3 setosa 3 4.7 Sepal Length## 4 setosa 4 4.6 Sepal Length## 5 setosa 5 5.0 Sepal Length## 6 setosa 6 5.4 Sepal Length
iris_cast <- dcast(iris_melted, formula = flower_id + Species + flower_part ~measurement_type)
qplot(x = Width, y = Length, data = iris_cast, geom = c("point", "smooth"),color = Species, method = "lm", facets = flower_part ~ Species)
head(iris_cast)
## flower_id Species flower_part Length Width## 1 1 setosa Petal 1.4 0.2## 2 1 setosa Sepal 5.1 3.5## 3 10 setosa Petal 1.5 0.1## 4 10 setosa Sepal 4.9 3.1## 5 100 versicolor Petal 4.1 1.3## 6 100 versicolor Sepal 5.7 2.8
setosa versicolor virginica
2
4
6
8
2
4
6
8
Petal
Sepal
0 1 2 3 4 0 1 2 3 4 0 1 2 3 4Width
Leng
th
Species
setosa
versicolor
virginica
ggplot(data = iris_cast, aes(x = Width, y = Length)) + geom_point() + facet_grid(Species ~flower_part, scale = "free") + geom_smooth(method = "lm") + theme_bw(base_size = 24)
17
Petal Sepal
123456
34567
5678
setosaversicolor
virginica
0.0 0.5 1.0 1.5 2.0 2.52.0 2.5 3.0 3.5 4.0 4.5Width
Leng
th
my_plot <- ggplot(data = iris_cast, aes(x = Width, y = Length, shape = flower_part,color = flower_part)) + geom_point() + facet_grid(~Species) + geom_smooth(method = "lm")
my_plot
18
setosa versicolor virginica
2
4
6
8
0 1 2 3 4 0 1 2 3 4 0 1 2 3 4Width
Leng
th
flower_part
Petal
Sepal
library(ggthemes)my_plot + theme_excel(base_size = 24)my_plot + theme_wsj(base_size = 18)
19
setosa versicolor virginica
2
4
6
8
0 1 2 3 4 0 1 2 3 4 0 1 2 3 4Width
Leng
th
flower_part
Petal
Sepal
20