Skip to content

Commit 577f26e

Browse files
authored
Merge pull request #154 from openvenues/setup_datadir_functions
Setup datadir functions
2 parents a2b84a0 + bbc9172 commit 577f26e

12 files changed

+124
-30
lines changed

configure.ac

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- Autoconf -*-
22
# Process this file with autoconf to produce a configure script.
33

4-
AC_INIT([libpostal], [0.3])
4+
AC_INIT([libpostal], [0.3.3])
55

66
AM_INIT_AUTOMAKE([foreign subdir-objects])
77
AC_CONFIG_SRCDIR([src])

src/address_dictionary.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121

2222
#define ALL_LANGUAGES "all"
2323

24-
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_expansions" PATH_SEPARATOR "address_dictionary.dat"
24+
#define ADDRESS_DICTIONARY_DATA_FILE "address_dictionary.dat"
25+
#define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR PATH_SEPARATOR ADDRESS_DICTIONARY_DATA_FILE
2526

2627
#define NULL_CANONICAL_INDEX -1
2728

src/features.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ void feature_array_add(cstring_array *features, size_t count, ...) {
1212
cstring_array_start_token(features);
1313

1414
bool strip_separator = true;
15-
char_array_append_vjoined(features->str, FEATURE_SEPARATOR_CHAR, strip_separator, count, args);
15+
char_array_add_vjoined(features->str, FEATURE_SEPARATOR_CHAR, strip_separator, count, args);
1616
va_end(args);
1717
}
1818

src/file_utils.c

+15
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,21 @@ bool is_relative_path(struct dirent *ent) {
3636
return strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0;
3737
}
3838

39+
char *path_vjoin(int n, va_list args) {
40+
char_array *path = char_array_new();
41+
if (path == NULL) return NULL;
42+
char_array_add_vjoined(path, PATH_SEPARATOR, true, n, args);
43+
return char_array_to_string(path);
44+
}
45+
46+
char *path_join(int n, ...) {
47+
va_list args;
48+
va_start(args, n);
49+
char *path = path_vjoin(n, args);
50+
va_end(args);
51+
return path;
52+
}
53+
3954
inline uint64_t file_deserialize_uint64(unsigned char *buf) {
4055
return ((uint64_t)buf[0] << 56) |
4156
((uint64_t)buf[1] << 48) |

src/file_utils.h

+4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <sys/types.h>
1010

1111
#include "libpostal_config.h"
12+
#include "string_utils.h"
1213

1314
#ifdef HAVE_DIRENT_H
1415
#include <dirent.h>
@@ -55,6 +56,9 @@ char *file_getline(FILE * f);
5556

5657
bool is_relative_path(struct dirent *ent);
5758

59+
char *path_join(int n, ...);
60+
char *path_vjoin(int n, va_list args);
61+
5862
uint64_t file_deserialize_uint64(unsigned char *buf);
5963
bool file_read_uint64(FILE *file, uint64_t *value);
6064
bool file_write_uint64(FILE *file, uint64_t value);

src/libpostal.c

+76-15
Original file line numberDiff line numberDiff line change
@@ -1054,47 +1054,108 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
10541054
return parsed;
10551055
}
10561056

1057-
bool libpostal_setup(void) {
1058-
if (!transliteration_module_setup(NULL)) {
1059-
log_error("Error loading transliteration module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
1057+
bool libpostal_setup_datadir(char *datadir) {
1058+
char *transliteration_path = NULL;
1059+
char *numex_path = NULL;
1060+
char *address_dictionary_path = NULL;
1061+
1062+
if (datadir != NULL) {
1063+
transliteration_path = path_join(3, datadir, LIBPOSTAL_TRANSLITERATION_SUBDIR, TRANSLITERATION_DATA_FILE);
1064+
numex_path = path_join(3, datadir, LIBPOSTAL_NUMEX_SUBDIR, NUMEX_DATA_FILE);
1065+
address_dictionary_path = path_join(3, datadir, LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR, ADDRESS_DICTIONARY_DATA_FILE);
1066+
}
1067+
1068+
if (!transliteration_module_setup(transliteration_path)) {
1069+
log_error("Error loading transliteration module, dir=%s\n", transliteration_path);
10601070
return false;
10611071
}
10621072

1063-
if (!numex_module_setup(NULL)) {
1064-
log_error("Error loading numex module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
1073+
if (!numex_module_setup(numex_path)) {
1074+
log_error("Error loading numex module, dir=%s\n", numex_path);
10651075
return false;
10661076
}
10671077

1068-
if (!address_dictionary_module_setup(NULL)) {
1069-
log_error("Error loading dictionary module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
1078+
if (!address_dictionary_module_setup(address_dictionary_path)) {
1079+
log_error("Error loading dictionary module, dir=%s\n", address_dictionary_path);
10701080
return false;
10711081
}
10721082

1083+
if (transliteration_path != NULL) {
1084+
free(transliteration_path);
1085+
}
1086+
1087+
if (numex_path != NULL) {
1088+
free(numex_path);
1089+
}
1090+
1091+
if (address_dictionary_path != NULL) {
1092+
free(address_dictionary_path);
1093+
}
1094+
10731095
return true;
10741096
}
10751097

1076-
bool libpostal_setup_language_classifier(void) {
1077-
if (!language_classifier_module_setup(NULL)) {
1078-
log_error("Error loading language classifier, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
1098+
bool libpostal_setup(void) {
1099+
return libpostal_setup_datadir(NULL);
1100+
}
1101+
1102+
bool libpostal_setup_language_classifier_datadir(char *datadir) {
1103+
char *language_classifier_dir = NULL;
1104+
1105+
if (datadir != NULL) {
1106+
language_classifier_dir = path_join(2, datadir, LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR);
1107+
}
1108+
1109+
if (!language_classifier_module_setup(language_classifier_dir)) {
1110+
log_error("Error loading language classifier, dir=%s\n", language_classifier_dir);
10791111
return false;
10801112
}
1113+
1114+
if (language_classifier_dir != NULL) {
1115+
free(language_classifier_dir);
1116+
}
1117+
10811118
return true;
10821119
}
10831120

1084-
bool libpostal_setup_parser(void) {
1085-
if (!geodb_module_setup(NULL)) {
1086-
log_error("Error loading geodb module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
1121+
bool libpostal_setup_language_classifier(void) {
1122+
return libpostal_setup_language_classifier_datadir(NULL);
1123+
}
1124+
1125+
bool libpostal_setup_parser_datadir(char *datadir) {
1126+
char *parser_dir = NULL;
1127+
char *geodb_dir = NULL;
1128+
1129+
if (datadir != NULL) {
1130+
parser_dir = path_join(2, datadir, LIBPOSTAL_ADDRESS_PARSER_SUBDIR);
1131+
geodb_dir = path_join(2, datadir, LIBPOSTAL_GEODB_SUBDIR);
1132+
}
1133+
1134+
if (!geodb_module_setup(geodb_dir)) {
1135+
log_error("Error loading geodb module, dir=%s\n", geodb_dir);
10871136
return false;
10881137
}
10891138

1090-
if (!address_parser_module_setup(NULL)) {
1091-
log_error("Error loading address parser module, LIBPOSTAL_DATA_DIR=%s\n", LIBPOSTAL_DATA_DIR);
1139+
if (!address_parser_module_setup(parser_dir)) {
1140+
log_error("Error loading address parser module, dir=%s\n", parser_dir);
10921141
return false;
10931142
}
10941143

1144+
if (parser_dir != NULL) {
1145+
free(parser_dir);
1146+
}
1147+
1148+
if (geodb_dir != NULL) {
1149+
free(geodb_dir);
1150+
}
1151+
10951152
return true;
10961153
}
10971154

1155+
bool libpostal_setup_parser(void) {
1156+
return libpostal_setup_parser_datadir(NULL);
1157+
}
1158+
10981159
void libpostal_teardown(void) {
10991160
transliteration_module_teardown();
11001161

src/libpostal.h

+3
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,15 @@ address_parser_response_t *parse_address(char *address, address_parser_options_t
9292
// Setup/teardown methods
9393

9494
bool libpostal_setup(void);
95+
bool libpostal_setup_datadir(char *datadir);
9596
void libpostal_teardown(void);
9697

9798
bool libpostal_setup_parser(void);
99+
bool libpostal_setup_parser_datadir(char *datadir);
98100
void libpostal_teardown_parser(void);
99101

100102
bool libpostal_setup_language_classifier(void);
103+
bool libpostal_setup_language_classifier_datadir(char *datadir);
101104
void libpostal_teardown_language_classifier(void);
102105

103106
#ifdef __cplusplus

src/libpostal_config.h

+14-6
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,20 @@
1212
#error LIBPOSTAL_DATA_DIR not defined!
1313
#endif
1414

15-
#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_parser"
16-
#define LIBPOSTAL_DICTIONARIES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "dictionaries"
17-
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geonames"
18-
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "geodb"
19-
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "language_classifier"
20-
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR "transliteration"
15+
#define LIBPOSTAL_ADDRESS_PARSER_SUBDIR "address_parser"
16+
#define LIBPOSTAL_ADDRESS_PARSER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_PARSER_SUBDIR
17+
#define LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR "address_expansions"
18+
#define LIBPOSTAL_ADDRESS_EXPANSIONS_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR
19+
#define LIBPOSTAL_GEONAMES_SUBDIR "geonames"
20+
#define LIBPOSTAL_GEONAMES_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEONAMES_SUBDIR
21+
#define LIBPOSTAL_GEODB_SUBDIR "geodb"
22+
#define LIBPOSTAL_GEODB_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_GEODB_SUBDIR
23+
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR "language_classifier"
24+
#define LIBPOSTAL_LANGUAGE_CLASSIFIER_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_LANGUAGE_CLASSIFIER_SUBDIR
25+
#define LIBPOSTAL_NUMEX_SUBDIR "numex"
26+
#define LIBPOSTAL_NUMEX_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_NUMEX_SUBDIR
27+
#define LIBPOSTAL_TRANSLITERATION_SUBDIR "transliteration"
28+
#define LIBPOSTAL_TRANSLITERATION_DIR LIBPOSTAL_DATA_DIR PATH_SEPARATOR LIBPOSTAL_TRANSLITERATION_SUBDIR
2129

2230
#define GEODB_BLOOM_FILTER_SIZE 100000000
2331
#define GEODB_BLOOM_FILTER_ERROR 0.001

src/numex.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
#include "trie.h"
2121
#include "trie_search.h"
2222

23-
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR "numex.dat"
23+
#define NUMEX_DATA_FILE "numex.dat"
24+
#define DEFAULT_NUMEX_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "numex" PATH_SEPARATOR NUMEX_DATA_FILE
2425

2526
#define LATIN_LANGUAGE_CODE "la"
2627

src/string_utils.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ inline void char_array_add_len(char_array *array, char *str, size_t len) {
595595
}
596596

597597

598-
void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args) {
598+
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args) {
599599
if (count <= 0) {
600600
return;
601601
}
@@ -625,15 +625,15 @@ void char_array_append_vjoined(char_array *array, char *separator, bool strip_se
625625
inline void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
626626
va_list args;
627627
va_start(args, count);
628-
char_array_append_vjoined(array, separator, strip_separator, count, args);
628+
char_array_add_vjoined(array, separator, strip_separator, count, args);
629629
va_end(args);
630630
}
631631

632632
inline void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...) {
633633
char_array_strip_nul_byte(array);
634634
va_list args;
635635
va_start(args, count);
636-
char_array_append_vjoined(array, separator, strip_separator, count, args);
636+
char_array_add_vjoined(array, separator, strip_separator, count, args);
637637
va_end(args);
638638
}
639639

src/string_utils.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ void char_array_cat_vprintf(char_array *array, char *format, va_list args);
136136
void char_array_cat_printf(char_array *array, char *format, ...);
137137

138138
// Mainly for paths or delimited strings
139-
void char_array_append_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
139+
void char_array_add_vjoined(char_array *array, char *separator, bool strip_separator, int count, va_list args);
140140
void char_array_add_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
141141
void char_array_cat_joined(char_array *array, char *separator, bool strip_separator, int count, ...);
142142

src/transliterate.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515

1616
#define LATIN_ASCII "latin-ascii"
1717

18-
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR "transliteration.dat"
18+
#define TRANSLITERATION_DATA_FILE "transliteration.dat"
19+
#define DEFAULT_TRANSLITERATION_PATH LIBPOSTAL_TRANSLITERATION_DIR PATH_SEPARATOR TRANSLITERATION_DATA_FILE
1920

2021
#define MAX_TRANS_NAME_LEN 100
2122

0 commit comments

Comments
 (0)