Skip to content

Commit 7283e22

Browse files
author
OVVO-Financial
committed
NNS 11.1 Beta
1 parent 584196e commit 7283e22

10 files changed

+205
-5
lines changed

DESCRIPTION

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: NNS
22
Type: Package
33
Title: Nonlinear Nonparametric Statistics
4-
Version: 11.0
5-
Date: 2025-01-09
4+
Version: 11.1
5+
Date: 2025-02-13
66
Authors@R: c(
77
person("Fred", "Viole", role=c("aut","cre"), email="[email protected]"),
88
person("Roberto", "Spadim", role=c("ctb"))

NAMESPACE

+3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export(NNS.CDF)
1414
export(NNS.FSD)
1515
export(NNS.FSD.uni)
1616
export(NNS.MC)
17+
export(NNS.SD.cluster)
1718
export(NNS.SD.efficient.set)
1819
export(NNS.SSD)
1920
export(NNS.SSD.uni)
@@ -79,6 +80,7 @@ importFrom(stats,.preformat.ts)
7980
importFrom(stats,acf)
8081
importFrom(stats,aggregate)
8182
importFrom(stats,approx)
83+
importFrom(stats,as.dist)
8284
importFrom(stats,coef)
8385
importFrom(stats,complete.cases)
8486
importFrom(stats,cor)
@@ -93,6 +95,7 @@ importFrom(stats,embed)
9395
importFrom(stats,fivenum)
9496
importFrom(stats,frequency)
9597
importFrom(stats,hat)
98+
importFrom(stats,hclust)
9699
importFrom(stats,is.ts)
97100
importFrom(stats,lm)
98101
importFrom(stats,median)

NNS_11.0.tar.gz

-1.18 MB
Binary file not shown.

NNS_11.1.tar.gz

1.18 MB
Binary file not shown.

NNS_11.0.zip NNS_11.1.zip

852 KB
Binary file not shown.

R/SD_Cluster.R

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#' NNS SD-based Clustering
2+
#'
3+
#' Clusters a set of variables by iteratively extracting Stochastic Dominance (SD)-efficient sets,
4+
#' subject to a minimum cluster size.
5+
#'
6+
#' @param data A numeric matrix or data frame of variables to be clustered.
7+
#' @param degree Numeric options: (1, 2, 3). Degree of stochastic dominance test.
8+
#' @param type Character, either \code{"continuous"} (default) or \code{"discrete"}; specifies the type of CDF.
9+
#' @param min_cluster Integer. The minimum number of elements required for a valid cluster.
10+
#' @param dendrogram Logical; \code{FALSE} (default). If \code{TRUE}, a dendrogram is produced based on a simple "distance" measure between clusters.
11+
#'
12+
#' @return
13+
#' A list with the following components:
14+
#' \itemize{
15+
#' \item \code{Clusters}: A named list of cluster memberships where each element is the set of variable names belonging to that cluster.
16+
#' \item \code{Dendrogram} (optional): If \code{dendrogram = TRUE}, an \code{hclust} object is also returned.
17+
#' }
18+
#'
19+
#' @details
20+
#' The function applies \code{\link{NNS.SD.efficient.set}} iteratively, peeling off the SD-efficient set at each step
21+
#' if it meets or exceeds \code{min_cluster} in size, until no more subsets can be extracted or all variables are exhausted.
22+
#' Variables in each SD-efficient set form a cluster, with any remaining variables aggregated into the final cluster if it meets
23+
#' the \code{min_cluster} threshold.
24+
#'
25+
#' @author Fred Viole, OVVO Financial Systems
26+
#'
27+
#' @references Viole, F. and Nawrocki, D. (2016) "LPM Density Functions for the Computation of the SD Efficient Set." Journal of Mathematical Finance, 6, 105-126. \doi{10.4236/jmf.2016.61012}.
28+
#'
29+
#' Viole, F. (2017) "A Note on Stochastic Dominance." \doi{10.2139/ssrn.3002675}
30+
#'
31+
#' @examples
32+
#' \dontrun{
33+
#' set.seed(123)
34+
#' x <- rnorm(100)
35+
#' y <- rnorm(100)
36+
#' z <- rnorm(100)
37+
#' A <- cbind(x, y, z)
38+
#'
39+
#' # Perform SD-based clustering (degree 1), requiring at least 2 elements per cluster
40+
#' results <- NNS.SD.cluster(data = A, degree = 1, min_cluster = 2)
41+
#' print(results$Clusters)
42+
#'
43+
#' # Produce a dendrogram as well
44+
#' results_with_dendro <- NNS.SD.cluster(data = A, degree = 1, min_cluster = 2, dendrogram = TRUE)
45+
#' plot(results_with_dendro$Dendrogram)
46+
#' }
47+
#'
48+
#' @export
49+
50+
51+
NNS.SD.cluster <- function(data, degree = 1, type = "continuous", min_cluster = 1, dendrogram = FALSE) {
52+
clusters <- list()
53+
iteration <- 1
54+
55+
# Ensure the input data is a matrix
56+
remaining_data <- as.matrix(data)
57+
58+
# Continue clustering until the number of remaining columns is less than or equal to min_cluster
59+
while (ncol(remaining_data) > min_cluster) {
60+
# Use the original NNS.SD.efficient.set call as provided
61+
SD_set <- NNS.SD.efficient.set(remaining_data, degree = degree, type = type, status = FALSE)
62+
63+
if (length(SD_set) == 0) {
64+
break
65+
}
66+
67+
# Store the SD-efficient set as a cluster
68+
clusters[[paste0("Cluster_", iteration)]] <- SD_set
69+
70+
# Remove the identified SD set from remaining_data
71+
remaining_data <- remaining_data[, !(colnames(remaining_data) %in% SD_set), drop = FALSE]
72+
73+
# Ensure remaining_data remains a matrix
74+
remaining_data <- as.matrix(remaining_data)
75+
76+
iteration <- iteration + 1
77+
78+
# If the number of remaining columns is now less than or equal to min_cluster, add them as the final cluster
79+
if (ncol(remaining_data) <= min_cluster) {
80+
clusters[[paste0("Cluster_", iteration)]] <- colnames(remaining_data)
81+
break
82+
}
83+
}
84+
85+
# If there are still variables left (and not already added), add them as the final cluster
86+
if (ncol(remaining_data) > min_cluster && !paste0("Cluster_", iteration) %in% names(clusters)) {
87+
clusters[[paste0("Cluster_", iteration)]] <- colnames(remaining_data)
88+
}
89+
90+
# Check if the final cluster has fewer elements than min_cluster; if so, merge it with the previous cluster (if one exists)
91+
final_cluster_name <- paste0("Cluster_", length(clusters))
92+
if (length(clusters[[final_cluster_name]]) < min_cluster && length(clusters) > 1) {
93+
previous_cluster_name <- paste0("Cluster_", length(clusters) - 1)
94+
clusters[[previous_cluster_name]] <- c(clusters[[previous_cluster_name]], clusters[[final_cluster_name]])
95+
clusters[[final_cluster_name]] <- NULL
96+
}
97+
98+
# Flatten the clusters into a single vector and generate cluster labels
99+
all_vars <- unlist(clusters)
100+
cluster_labels <- unlist(lapply(seq_along(clusters), function(i) rep(i, length(clusters[[i]]))))
101+
102+
103+
if(dendrogram){
104+
# Ensure there are at least two variables for hierarchical clustering
105+
if (length(all_vars) < 2) {
106+
warning("Not enough variables for hierarchical clustering. Returning clusters only.")
107+
return(list("Clusters" = clusters, "Dendrogram" = NULL))
108+
}
109+
110+
# Create a distance matrix based on cluster labels
111+
dist_matrix <- as.dist(outer(cluster_labels, cluster_labels, function(a, b) abs(a - b)))
112+
# For a "dist" object, assign labels using the Labels attribute instead of rownames.
113+
attr(dist_matrix, "Labels") <- all_vars
114+
115+
# Perform hierarchical clustering
116+
hc <- hclust(dist_matrix, method = "complete")
117+
118+
plot(hc,
119+
main = paste0("Hierarchical Clustering of Stochastic Dominance Sets \nSD Degree: ", degree),
120+
xlab = "Variables",
121+
ylab = "SD Distance",
122+
sub = ""
123+
)
124+
125+
126+
return(list("Clusters" = clusters, "Dendrogram" = hc))
127+
} else return(list("Clusters" = clusters))
128+
}
129+

R/gvload.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#' @importFrom graphics abline boxplot legend lines par plot points segments text matplot title axis mtext barplot hist strwidth polygon
55
#' @importFrom quantmod getSymbols
66
#' @importFrom Rfast colmeans rowmeans rowsums comb_n
7-
#' @importFrom stats coef cor cov lm na.omit sd median complete.cases resid uniroot aggregate density hat qnorm model.matrix fivenum acf qt ecdf time approx embed frequency is.ts runif start ts optim quantile optimize dnorm dlnorm dexp dt t.test wilcox.test .preformat.ts var poly
7+
#' @importFrom stats coef cor cov lm na.omit sd median complete.cases resid uniroot aggregate density hat qnorm model.matrix fivenum acf qt ecdf time approx embed frequency is.ts runif start ts optim quantile optimize dnorm dlnorm dexp dt t.test wilcox.test .preformat.ts var poly hclust as.dist
88
#' @importFrom utils globalVariables head tail combn flush.console
99
#' @importFrom xts to.monthly
1010
#' @importFrom zoo as.yearmon index

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44

55

6-
[![packageversion](https://img.shields.io/badge/NNS%20version-11.0-blue.svg?style=flat-square)](https://github.com/OVVO-Financial/NNS/commits/NNS-Beta-Version) [![Licence](https://img.shields.io/badge/licence-GPL--3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.en.html)
6+
[![packageversion](https://img.shields.io/badge/NNS%20version-11.1-blue.svg?style=flat-square)](https://github.com/OVVO-Financial/NNS/commits/NNS-Beta-Version) [![Licence](https://img.shields.io/badge/licence-GPL--3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.en.html)
77

88
<h2 style="margin: 0; padding: 0; border: none; height: 40px;"></h2>
99

@@ -56,7 +56,7 @@ Please see https://github.com/OVVO-Financial/NNS/blob/NNS-Beta-Version/examples/
5656
title = {NNS: Nonlinear Nonparametric Statistics},
5757
author = {Fred Viole},
5858
year = {2016},
59-
note = {R package version 11.0},
59+
note = {R package version 11.1},
6060
url = {https://CRAN.R-project.org/package=NNS},
6161
}
6262
```

man/NNS.SD.cluster.Rd

+68
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/NNS.dll

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)