Skip to content

Commit dc29edf

Browse files
Lucas-PratesGui-FernandesBR
authored andcommitted
ENH: implementing a draft version of the Multivarite Rejectio Sampler (MRS).
1 parent 72d9310 commit dc29edf

File tree

1 file changed

+220
-0
lines changed

1 file changed

+220
-0
lines changed
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
"""
2+
Multivariate Rejection Sampling Module for RocketPy
3+
4+
Notes
5+
-----
6+
This module is still under active development, and some features or attributes may
7+
change in future versions. Users are encouraged to check for updates and read the
8+
latest documentation.
9+
"""
10+
11+
import json
12+
from random import random
13+
14+
from rocketpy._encoders import RocketPyEncoder
15+
16+
17+
class MultivariateRejectionSampler:
18+
"""Class that performs Multivariate Rejection Sampling (MRS) from MonteCarlo
19+
results.
20+
"""
21+
22+
def __init__(
23+
self,
24+
montecarlo_filepath,
25+
mrs_filepath,
26+
distribution_dict,
27+
):
28+
"""Initializes Multivariate Rejection Sampler (MRS) class
29+
30+
Parameters
31+
----------
32+
montecarlo_filepath : str
33+
Filepath prefixes to the files created from a MonteCarlo simulation
34+
results.
35+
mrs_filepath : str
36+
Filepath prefix to MRS obtained samples. The files created follow the same
37+
structure as those created by the MonteCarlo class.
38+
distribution : dict
39+
Dictionary whose keys contain the name whose distribution changed. The values
40+
are tuples or lists with two entries. The first entry is a probability
41+
density (mass) function for the old distribution, while the second entry
42+
is the probability density function for the new distribution.
43+
44+
Returns
45+
-------
46+
None
47+
"""
48+
self.montecarlo_filepath = montecarlo_filepath
49+
self.mrs_filepath = mrs_filepath
50+
self.distribution_dict = distribution_dict
51+
self.original_sample_size = 0
52+
self.sup_ratio = 1
53+
self.expected_sample_size = None
54+
self.final_sample_size = None
55+
# TODO: is there a better way to construct input/output_list?
56+
# Iterating and appending over lists is costly. However, the
57+
# alternative, reading the file twice to get the number of lines,
58+
# also does not seem to be a good option.
59+
self.output_list = []
60+
self.input_list = []
61+
self.__setup_input()
62+
self.__load_output()
63+
64+
def __setup_input(self):
65+
"""Loads, validate and compute information from monte carlo
66+
input with a single read from the file.
67+
68+
This function does three things:
69+
1) Load: Loads the input data from MonteCarlo into python
70+
objects so the sampling process does not require reading from
71+
disk;
72+
2) Validate: Validates that the keys in 'distribution_dict' exist in
73+
the input json created by the monte carlo;
74+
3) Compute: Computes the supremum of the probability ratios, used in the
75+
sample function.
76+
77+
While these three tasks could be disentangled to get clearer
78+
code, the implementation as done here only requires a single
79+
read from disk.
80+
"""
81+
input_filename = f"{self.montecarlo_filepath}.inputs.txt"
82+
83+
try:
84+
input_file = open(input_filename, "r+", encoding="utf-8")
85+
except FileNotFoundError as e:
86+
raise FileNotFoundError(
87+
f"Input file from monte carlo {input_filename} " "not found!"
88+
) from e
89+
90+
for line in input_file.readlines():
91+
try:
92+
# loads data
93+
line_json = json.loads(line)
94+
self.input_list.append(line_json)
95+
self.original_sample_size += 1
96+
97+
prob_ratio = 1
98+
for parameter in self.distribution_dict.keys():
99+
# checks dictionary keys
100+
if parameter not in line_json.keys():
101+
raise ValueError(
102+
f"Parameter key {parameter} from 'distribution_dict' "
103+
"not found in input file!"
104+
)
105+
parameter_value = line_json[parameter]
106+
107+
prob_ratio *= self.__compute_probability_ratio(
108+
parameter, parameter_value
109+
)
110+
# updates the supremum of the ratio
111+
self.sup_ratio = max(self.sup_ratio, prob_ratio)
112+
except Exception as e:
113+
raise ValueError(
114+
"An error occurred while reading "
115+
f"the monte carlo input file {input_filename}!"
116+
) from e
117+
118+
self.expected_sample_size = self.original_sample_size // self.sup_ratio
119+
input_file.close()
120+
121+
def __load_output(self):
122+
"""Load data from monte carlo outputs."""
123+
output_filename = f"{self.montecarlo_filepath}.outputs.txt"
124+
sample_size_output = 0 # sanity check
125+
126+
try:
127+
output_file = open(output_filename, "r+", encoding="utf-8")
128+
except FileNotFoundError as e:
129+
raise FileNotFoundError(
130+
f"Output file from monte carlo {output_filename} " "not found!"
131+
) from e
132+
133+
for line in output_file.readlines():
134+
try:
135+
line_json = json.loads(line)
136+
self.output_list.append(line_json)
137+
sample_size_output += 1
138+
except Exception as e:
139+
raise ValueError(
140+
"An error occurred while reading "
141+
f"the monte carlo output file {output_filename}!"
142+
) from e
143+
144+
if self.original_sample_size != sample_size_output:
145+
raise ValueError(
146+
"Monte carlo input and output files have a different "
147+
"number of samples!"
148+
)
149+
150+
output_file.close()
151+
152+
def sample(self):
153+
"""Performs rejection sampling and saves data
154+
155+
Returns
156+
-------
157+
None
158+
"""
159+
160+
mrs_input_file = open(f"{self.mrs_filepath}.inputs.txt", "w+", encoding="utf-8")
161+
mrs_output_file = open(
162+
f"{self.mrs_filepath}.outputs.txt", "w+", encoding="utf-8"
163+
)
164+
mrs_error_file = open(f"{self.mrs_filepath}.errors.txt", "w+", encoding="utf-8")
165+
166+
# compute sup ratio
167+
for line_input_json, line_output_json in zip(self.input_list, self.output_list):
168+
acceptance_prob = 1 / self.sup_ratio # probability the sample is accepted
169+
for parameter in self.distribution_dict.keys():
170+
parameter_value = line_input_json[parameter]
171+
acceptance_prob *= self.__compute_probability_ratio(
172+
parameter,
173+
parameter_value,
174+
)
175+
# sample is accepted, write output
176+
if random() < acceptance_prob:
177+
mrs_input_file.write(
178+
json.dumps(line_input_json, cls=RocketPyEncoder) + "\n"
179+
)
180+
mrs_output_file.write(
181+
json.dumps(line_output_json, cls=RocketPyEncoder) + "\n"
182+
)
183+
184+
mrs_input_file.close()
185+
mrs_output_file.close()
186+
mrs_error_file.close()
187+
188+
def __compute_probability_ratio(self, parameter, parameter_value):
189+
"""Computes the ratio of the new probability to the old probability
190+
191+
Parameters
192+
----------
193+
parameter : str
194+
Name of the parameter to evaluate the probability.
195+
parameter_value : any
196+
Value of the parameter to be passed to the density functions.
197+
198+
Returns
199+
-------
200+
float
201+
The ratio of the new probability density function (numerator)
202+
to the old one (denominator).
203+
204+
Raises
205+
------
206+
ValueError
207+
Raises exception if an error occurs when computing the ratio.
208+
"""
209+
try:
210+
old_pdf = self.distribution_dict[parameter][0]
211+
new_pdf = self.distribution_dict[parameter][1]
212+
probability_ratio = new_pdf(parameter_value) / old_pdf(parameter_value)
213+
except Exception as e:
214+
raise ValueError(
215+
"An error occurred while evaluating the "
216+
"ratio for 'distribution_dict' probability "
217+
f"parameter key {parameter}!"
218+
) from e
219+
220+
return probability_ratio

0 commit comments

Comments
 (0)