library(outlierensembles)
library(ggplot2)
library(outlierensembles)
Let us add some anomalies inside an ring.
set.seed(1)
runif(803)
r1 <-rnorm(803, mean=5)
r2 <- 2*pi*r1;
theta = 2
R1 <- 2
R2 <- r2+R2;
dist = dist * cos(theta)
x = dist * sin(theta)
y =
data.frame(
X <-x1 = x,
x2 = y
) c(rep(0,800), rep(1,3))
labs <- dim(X)[1]
nn <- lof_auc <- cof_auc <- rep(0, 10)
knn_auc <- 0
mu <- cbind(rnorm(3,mu, sd=0.2), rnorm(3,0, sd=0.2))
z <-801:803, 1:2] <- z
X[ggplot(X, aes(x1, x2)) + geom_point()
Let us find outliers using DDoutlier R package and use the IRT ensemble to construct an ensemble score.
DDoutlier::KNN_AGG(X, k_min=10, k_max=20)
y1 <- DDoutlier::LOF(X, k=10)
y2 <- DDoutlier::COF(X, k=10)
y3 <- DDoutlier::INFLO(X, k=10)
y4 <- DDoutlier::KDEOS(X, k_min=10, k_max=20)
y5 <- DDoutlier::LDF(X, k=10)
y6 <- DDoutlier::LDOF(X, k=10)
y7 <- cbind.data.frame(y1, y2, y3, y4, y5, y6, y7)
Y <- irt_ensemble(Y)
ens1 <- cbind.data.frame(X, ens1$scores)
df <-colnames(df)[3] <- "IRT"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=IRT)) + scale_color_gradient(low="yellow", high="red")
Then we do the greedy ensemble.
greedy_ensemble(Y)
ens2 <- cbind.data.frame(X, ens2$scores)
df <-colnames(df)[3] <- "Greedy"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=Greedy)) + scale_color_gradient(low="yellow", high="red")
We do the ICWA ensemble next.
icwa_ensemble(Y)
ens3 <- cbind.data.frame(X, ens3)
df <-colnames(df)[3] <- "ICWA"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=ICWA)) + scale_color_gradient(low="yellow", high="red")
Next, we use the maximum scores to build the ensemble.
max_ensemble(Y)
ens4 <- cbind.data.frame(X, ens4)
df <-colnames(df)[3] <- "Max"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=Max)) + scale_color_gradient(low="yellow", high="red")
Then, we use the a threshold sum to construct the ensemble.
threshold_ensemble(Y)
ens5 <- cbind.data.frame(X, ens5)
df <-colnames(df)[3] <- "Threshold"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=Threshold)) + scale_color_gradient(low="yellow", high="red")
Finally, we use the mean values as the ensemble score.
average_ensemble(Y)
ens6 <- cbind.data.frame(X, ens6)
df <-colnames(df)[3] <- "Average"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=Average)) + scale_color_gradient(low="yellow", high="red")