Skip to content

Commit 892c4a8

Browse files
authored
fix: Drop unused TextPoolGenerator (#141)
Closes #139
2 parents 08e8b86 + f7dca93 commit 892c4a8

1 file changed

Lines changed: 1 addition & 202 deletions

File tree

tpchgen/src/text.rs

Lines changed: 1 addition & 202 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,7 @@
55
//!
66
//! <https://github.com/trinodb/tpch/blob/master/src/main/java/io/trino/tpch/TextPool.java>
77
8-
use crate::{
9-
distribution::{Distribution, Distributions},
10-
random::RowRandomInt,
11-
};
8+
use crate::{distribution::Distributions, random::RowRandomInt};
129
use std::sync::OnceLock;
1310

1411
/// Pool of random text that follows TPC-H grammar.
@@ -157,201 +154,3 @@ impl TextPool {
157154
}
158155
}
159156
}
160-
161-
#[derive(Debug)]
162-
pub struct TextPoolGenerator {
163-
size: usize,
164-
165-
grammars: ParsedDistribution,
166-
noun_phrases: ParsedDistribution,
167-
verb_phrases: ParsedDistribution,
168-
prepositions: IndexedDistribution,
169-
terminators: IndexedDistribution,
170-
adverbs: IndexedDistribution,
171-
verbs: IndexedDistribution,
172-
auxiliaries: IndexedDistribution,
173-
articles: IndexedDistribution,
174-
adjectives: IndexedDistribution,
175-
nouns: IndexedDistribution,
176-
}
177-
178-
impl TextPoolGenerator {
179-
const MAX_SENTENCE_LENGTH: usize = 256;
180-
181-
pub fn new(size: usize, distributions: &Distributions) -> Self {
182-
TextPoolGenerator {
183-
size,
184-
grammars: ParsedDistribution::new(distributions.grammar()),
185-
noun_phrases: ParsedDistribution::new(distributions.noun_phrase()),
186-
verb_phrases: ParsedDistribution::new(distributions.verb_phrase()),
187-
prepositions: IndexedDistribution::new(distributions.prepositions()),
188-
terminators: IndexedDistribution::new(distributions.terminators()),
189-
adverbs: IndexedDistribution::new(distributions.adverbs()),
190-
verbs: IndexedDistribution::new(distributions.verbs()),
191-
auxiliaries: IndexedDistribution::new(distributions.auxiliaries()),
192-
articles: IndexedDistribution::new(distributions.articles()),
193-
adjectives: IndexedDistribution::new(distributions.adjectives()),
194-
nouns: IndexedDistribution::new(distributions.nouns()),
195-
}
196-
}
197-
198-
pub fn generate(&mut self) -> String {
199-
let mut output = String::with_capacity(self.size + Self::MAX_SENTENCE_LENGTH);
200-
let mut random_int = RowRandomInt::new(933588178, i32::MAX);
201-
202-
while output.len() < self.size {
203-
self.generate_sentence(&mut output, &mut random_int);
204-
}
205-
output.truncate(self.size);
206-
output
207-
}
208-
209-
fn generate_sentence(&self, builder: &mut String, random: &mut RowRandomInt) {
210-
let index = self.grammars.get_random_index(random);
211-
for token in self.grammars.get_tokens(index) {
212-
match token {
213-
'V' => self.generate_verb_phrase(builder, random),
214-
'N' => self.generate_noun_phrase(builder, random),
215-
'P' => {
216-
let preposition = self.prepositions.random_value(random);
217-
builder.push_str(preposition);
218-
builder.push_str(" the ");
219-
self.generate_noun_phrase(builder, random);
220-
}
221-
'T' => {
222-
// trim trailing space
223-
// terminators should abut previous word
224-
builder.pop();
225-
let terminator = self.terminators.random_value(random);
226-
builder.push_str(terminator);
227-
}
228-
_ => panic!("Unknown token '{}'", token),
229-
}
230-
231-
if !builder.ends_with(' ') {
232-
builder.push(' ');
233-
}
234-
}
235-
}
236-
237-
fn generate_verb_phrase(&self, builder: &mut String, random: &mut RowRandomInt) {
238-
let index = self.verb_phrases.get_random_index(random);
239-
for token in self.verb_phrases.get_tokens(index) {
240-
match token {
241-
'D' => builder.push_str(self.adverbs.random_value(random)),
242-
'V' => builder.push_str(self.verbs.random_value(random)),
243-
'X' => builder.push_str(self.auxiliaries.random_value(random)),
244-
_ => panic!("Unknown token '{}'", token),
245-
}
246-
247-
// string may end with a comma or such
248-
builder.push_str(self.verb_phrases.get_bonus_text(index));
249-
250-
// add a space
251-
builder.push(' ');
252-
}
253-
}
254-
255-
fn generate_noun_phrase(&self, builder: &mut String, random: &mut RowRandomInt) {
256-
let index = self.noun_phrases.get_random_index(random);
257-
for token in self.noun_phrases.get_tokens(index) {
258-
match token {
259-
'A' => builder.push_str(self.articles.random_value(random)),
260-
'J' => builder.push_str(self.adjectives.random_value(random)),
261-
'D' => builder.push_str(self.adverbs.random_value(random)),
262-
'N' => builder.push_str(self.nouns.random_value(random)),
263-
_ => panic!("Unknown token '{}'", token),
264-
}
265-
266-
// string may end with a comma or such
267-
builder.push_str(self.noun_phrases.get_bonus_text(index));
268-
269-
// add a space
270-
builder.push(' ');
271-
}
272-
}
273-
}
274-
275-
#[derive(Debug)]
276-
struct IndexedDistribution {
277-
random_table: Vec<String>,
278-
}
279-
280-
impl IndexedDistribution {
281-
fn new(distribution: &Distribution) -> Self {
282-
let max_weight = distribution.get_weight(distribution.size() - 1);
283-
let mut random_table = vec![String::new(); max_weight as usize];
284-
285-
let mut value_index = 0;
286-
for (i, item) in random_table.iter_mut().enumerate() {
287-
if i >= distribution.get_weight(value_index) as usize {
288-
value_index += 1;
289-
}
290-
*item = distribution.get_value(value_index).to_string();
291-
}
292-
293-
IndexedDistribution { random_table }
294-
}
295-
296-
fn random_value(&self, random: &mut RowRandomInt) -> &str {
297-
let random_index = random.next_int(0, self.random_table.len() as i32 - 1) as usize;
298-
&self.random_table[random_index]
299-
}
300-
}
301-
302-
#[derive(Debug)]
303-
struct ParsedDistribution {
304-
parsed_distribution: Vec<Vec<char>>,
305-
bonus_text: Vec<String>,
306-
random_table: Vec<usize>,
307-
}
308-
309-
impl ParsedDistribution {
310-
fn new(distribution: &Distribution) -> Self {
311-
let size = distribution.size();
312-
let mut parsed_distribution = Vec::with_capacity(size);
313-
let mut bonus_text = Vec::with_capacity(size);
314-
315-
for i in 0..size {
316-
let value = distribution.get_value(i);
317-
let tokens: Vec<&str> = value.split_whitespace().collect();
318-
319-
let mut chars = Vec::with_capacity(tokens.len());
320-
for token in &tokens {
321-
chars.push(token.chars().next().unwrap());
322-
bonus_text.push(token[1..].to_string());
323-
}
324-
parsed_distribution.push(chars);
325-
}
326-
327-
let max_weight = distribution.get_weight(size - 1);
328-
let mut random_table = vec![0; max_weight as usize];
329-
330-
let mut value_index = 0;
331-
for (i, item) in random_table.iter_mut().enumerate() {
332-
if i >= distribution.get_weight(value_index) as usize {
333-
value_index += 1;
334-
}
335-
*item = value_index;
336-
}
337-
338-
ParsedDistribution {
339-
parsed_distribution,
340-
bonus_text,
341-
random_table,
342-
}
343-
}
344-
345-
fn get_random_index(&self, random: &mut RowRandomInt) -> usize {
346-
let random_index = random.next_int(0, self.random_table.len() as i32 - 1) as usize;
347-
self.random_table[random_index]
348-
}
349-
350-
fn get_tokens(&self, index: usize) -> &[char] {
351-
&self.parsed_distribution[index]
352-
}
353-
354-
fn get_bonus_text(&self, index: usize) -> &str {
355-
&self.bonus_text[index]
356-
}
357-
}

0 commit comments

Comments
 (0)