-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathREADME.Rmd
130 lines (115 loc) · 5.04 KB
/
README.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
---
title: "bipartite README"
output:
html_document:
keep_md: true
---
```{r, echo = FALSE}
library(knitr)
opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "man/figures/README-",
fig.align = "center",
fig.retina = 2,
out.width = "75%",
dpi = 96
)
knit_hooks$set(pngquant = hook_pngquant)
```
# finbipartite
This repo contains numerical implementations of algorithms to estimate weighted, undirected, (possibly k-component) bipartite graphs.
## Installation
**finbipartite** depends on the development version of **spectralGraphTopology**.
You can install the development version from GitHub:
```{r, eval = FALSE}
> devtools::install_github("convexfi/spectralGraphTopology")
> devtools::install_github("convexfi/finbipartite")
```
#### Microsoft Windows
On MS Windows environments, make sure to install the most recent version of ``Rtools``.
## Usage
### Clustering a network of S&P500 stocks
```{r plot_sp500_stocks_network, message=FALSE}
library(fitHeavyTail)
library(xts)
library(quantmod)
library(igraph)
library(finbipartite)
library(readr)
set.seed(42)
# load SP500 stock prices into an xts table
stock_prices <- readRDS("examples/stocks/sp500-data-2016-2021.rds")
# number of sectors
q <- 8
# number of stocks
r <- ncol(stock_prices) - q
# total nodes in the graph
p <- r + q
colnames(stock_prices)[1:r]
colnames(stock_prices)[(r+1):p]
selected_sectors <- c("Consumer Staples", "Energy", "Financials",
"Health Care", "Industrials", "Materials",
"Real Estate", "Utilities")
# compute log-returns
log_returns <- diff(log(stock_prices), na.pad = FALSE)
# fit an univariate Student-t distribution to the log-returns of the market
# to obtain an estimate of the degrees of freedom (nu)
mkt_index <- Ad(getSymbols("^GSPC",
from = index(stock_prices[1]), to = index(stock_prices[nrow(stock_prices)]),
auto.assign = FALSE,
verbose = FALSE))
mkt_index_log_returns <- diff(log(mkt_index), na.pad = FALSE)
nu <- fit_mvt(mkt_index_log_returns,
nu="MLE-diag-resampled")$nu
# learn a bipartite graph with k = 8 components
graph_mrf <- learn_heavy_tail_kcomp_bipartite_graph(scale(log_returns),
r = r,
q = q,
k = 8,
nu = nu,
learning_rate = 1,
verbose = FALSE)
# save predicted labels
labels_pred <- c()
for(i in c(1:r)){
labels_pred <- c(labels_pred, which.max(graph_mrf$B[i, ]))
}
# build network
SP500 <- read_csv("examples/stocks/SP500-sectors.csv")
stock_sectors <- SP500$GICS.Sector[SP500$Symbol %in% colnames(stock_prices)[1:r]]
stock_sectors_index <- as.numeric(as.factor(stock_sectors))
net <- graph_from_adjacency_matrix(graph_mrf$adjacency, mode = "undirected", weighted = TRUE)
colors <- c("#55efc4", "#ff7675", "#0984e3", "#a29bfe", "#B33771", "#48dbfb", "#FDA7DF", "#C4E538")
V(net)$color <- c(colors[stock_sectors_index], colors)
V(net)$type <- c(rep(FALSE, r), rep(TRUE, q))
V(net)$cluster <- c(stock_sectors_index, c(1:q))
E(net)$color <- apply(as.data.frame(get.edgelist(net)), 1,
function(x) ifelse(V(net)$cluster[x[1]] == V(net)$cluster[x[2]],
colors[V(net)$cluster[x[1]]], 'grey'))
# where do our predictions differ from GICS?
mask <- labels_pred != stock_sectors_index
node_labels <- colnames(stock_prices)[1:r]
node_labels[!mask] <- NA
# plot network
plot(net, vertex.size = c(rep(3, r), rep(5, q)),
vertex.label = c(node_labels, selected_sectors),
vertex.label.cex = 0.7, vertex.label.dist = 1.0,
vertex.frame.color = c(colors[stock_sectors_index], colors),
layout = layout_nicely(net),
vertex.label.family = "Helvetica", vertex.label.color = "black",
vertex.shape = c(rep("circle", r), rep("square", q)),
edge.width = 4*E(net)$weight)
```
## Citation
If you made use of this software please consider citing:
- [J. V. de M. Cardoso](https://mirca.github.io), [J. Ying](https://github.com/jxying),
[D. P. Palomar](https://www.danielppalomar.com) (2022).
[Learning Bipartite Graphs: Heavy Tails and Multiple Components](https://papers.nips.cc/paper_files/paper/2022/hash/5adff4d5402703418f7210a4004e1314-Abstract-Conference.html).
[Advances in Neural Information Processing Systems](https://neurips.cc/Conferences/2022) (NeurIPS’22).
## Links
- [RFinance'23 Slides](https://github.com/mirca/rfinance-talk/blob/main/rfinance.pdf)
- [NeurIPS’22 Slides](https://palomar.home.ece.ust.hk/papers/2022/CardosoYingPalomar-NeurIPS2022-slides.pdf)
- [NeurIPS'22 Poster](https://palomar.home.ece.ust.hk/papers/2022/CardosoYingPalomar-NeurIPS2022-poster.pdf)
- [NeurIPS'22 Supplementary Material](https://papers.nips.cc/paper_files/paper/2022/file/5adff4d5402703418f7210a4004e1314-Supplemental-Conference.pdf)
- [CRAN Package](https://cran.r-project.org/package=finbipartite)