@@ -260,6 +260,7 @@ json yaml_base_emitter::base_emitter_node(std::string layout,
260
260
node[" hyde" ][" owner" ] = default_tag_value;
261
261
node[" hyde" ][" tags" ].emplace_back (std::move (tag));
262
262
node[" hyde" ][" brief" ] = default_tag_value;
263
+ node[" hyde" ][" version" ] = hyde_version ();
263
264
264
265
return node;
265
266
}
@@ -392,6 +393,7 @@ void yaml_base_emitter::check_notify(const std::string& filepath,
392
393
std::cerr << filepath << " @" << escaped_nodepath << " ['" << escaped_key
393
394
<< " ']: " << validate_message << " \n " ;
394
395
} break ;
396
+ case yaml_mode::transcribe:
395
397
case yaml_mode::update: {
396
398
std::cout << filepath << " @" << escaped_nodepath << " ['" << escaped_key
397
399
<< " ']: " << update_message << " \n " ;
@@ -980,6 +982,78 @@ bool yaml_base_emitter::check_object_array(const std::string& filepath,
980
982
981
983
/* *************************************************************************************************/
982
984
985
+ std::vector<std::string> object_keys (const json& j) {
986
+ std::vector<std::string> result;
987
+
988
+ for (auto iter{j.begin ()}, last{j.end ()}; iter != last; ++iter) {
989
+ result.push_back (static_cast <const std::string&>(iter.key ()));
990
+ }
991
+
992
+ return result;
993
+ }
994
+
995
+ template <class T >
996
+ inline void move_append (T& dst, T&& src) {
997
+ dst.insert (dst.end (), std::make_move_iterator (src.begin ()), std::make_move_iterator (src.end ()));
998
+ }
999
+
1000
+ struct transcribe_pair {
1001
+ std::string src;
1002
+ std::string dst;
1003
+ };
1004
+
1005
+ using transcribe_pairs = std::vector<transcribe_pair>;
1006
+
1007
+ // This is O(N^2), where N is the size of both `src` and `dst`. Therefore transcription
1008
+ // should only be run when it is shown to be necessary. At the same time, if your code base
1009
+ // has enough overrides to really slow this algorithm down, hyde's performance is the least
1010
+ // of your concerns.
1011
+ transcribe_pairs derive_transcribe_pairs (const json& src, const json& dst) {
1012
+ std::vector<std::string> src_keys = object_keys (src);
1013
+ std::vector<std::string> dst_keys = object_keys (dst);
1014
+
1015
+ if (src_keys.size () != dst_keys.size ()) {
1016
+ std::cerr << " WARNING: transcription key count mismatch\n " ;
1017
+ }
1018
+
1019
+ transcribe_pairs result;
1020
+
1021
+ while (!src_keys.empty ()) {
1022
+ transcribe_pair cur_pair;
1023
+
1024
+ // pop a key off the old name set
1025
+ cur_pair.src = std::move (src_keys.back ());
1026
+ src_keys.pop_back ();
1027
+
1028
+ // find the best match of the dst keys to the src key
1029
+ std::size_t best_match = std::numeric_limits<std::size_t >::max ();
1030
+ std::size_t best_index = 0 ;
1031
+ for (std::size_t i = 0 ; i < dst_keys.size (); ++i) {
1032
+ // generate the diff score of the src key and the candidate dst
1033
+ std::size_t cur_match = diff_score (cur_pair.src , dst_keys[i]);
1034
+
1035
+ if (cur_match > best_match) {
1036
+ continue ;
1037
+ }
1038
+
1039
+ // if this dst candidate is better than what we've seen, remember that.
1040
+ best_match = cur_match;
1041
+ best_index = i;
1042
+ }
1043
+
1044
+ // pair the best match dst and src keys and remove dst
1045
+ cur_pair.dst = std::move (dst_keys[best_index]);
1046
+ dst_keys.erase (dst_keys.begin () + best_index);
1047
+
1048
+ // save off the pair and repeat
1049
+ result.emplace_back (std::move (cur_pair));
1050
+ }
1051
+
1052
+ return result;
1053
+ }
1054
+
1055
+ /* *************************************************************************************************/
1056
+
983
1057
bool yaml_base_emitter::check_map (const std::string& filepath,
984
1058
const json& have_node,
985
1059
const json& expected_node,
@@ -1013,38 +1087,68 @@ bool yaml_base_emitter::check_map(const std::string& filepath,
1013
1087
}
1014
1088
1015
1089
const json& have = have_node[key];
1090
+ bool failure{false };
1091
+ json result_map;
1016
1092
1017
- std::vector<std::string> keys;
1018
-
1019
- for (auto iter{have.begin ()}, last{have.end ()}; iter != last; ++iter) {
1020
- keys.push_back (static_cast <const std::string&>(iter.key ()));
1021
- }
1022
- for (auto iter{expected.begin ()}, last{expected.end ()}; iter != last; ++iter) {
1023
- keys.push_back (static_cast <const std::string&>(iter.key ()));
1024
- }
1093
+ if (key == " overloads" && _mode == yaml_mode::transcribe) {
1094
+ /*
1095
+ It is common during the upgrade from one version of hyde to another that the underlying
1096
+ clang tooling will output different symbol names for a given symbol (e.g., a namespace
1097
+ may get removed or added.) Although the symbol is unchanged, because its `expected` name
1098
+ differs from the `have` name, hyde will consider the symbols different, remove the old name
1099
+ and insert the new one. This wipes out any previous documentation under the old name that
1100
+ should have been migrated to the new name.
1101
+
1102
+ The solution here is very specialized. For the "overloads" key only, we gather the name
1103
+ of each overload in both the `have` and `expected` set. We then pair them up according
1104
+ to how well they match to one another (using the Meyers' string diff algorithm; two strings
1105
+ with less "patchwork" between them are considered a better match). Ideally this results in
1106
+ key pairs that represent the same symbol, just with different names. Then we call the
1107
+ `proc` with `have[old_name]` and `expected[new_name]` which will migrate any documentation
1108
+ from the old name to the new.
1109
+
1110
+ This capability assumes the overload count of both `have` and `expected` are the same.
1111
+ If any new functions are created or removed between upgrades in the clang driver (e.g.,
1112
+ a new compiler-generated routine is created and documented) that will have to be managed
1113
+ manually. Assuming the count is the same, it also assumes there is a 1:1 mapping from the
1114
+ set of old names to the set of new names. This implies the transcription mode should be
1115
+ done as a separate step from an update. In other words, a transcription assumes the
1116
+ documentation is actually the same between the `have` and `expected` sets, it is _just the
1117
+ overload names_ that have changed, so map the old-named documentation to the new-named
1118
+ documentation as reasonably as possible.
1119
+ */
1120
+ for (const auto & pair : derive_transcribe_pairs (have, expected)) {
1121
+ const std::string curnodepath = nodepath + " ['" + pair.dst + " ']" ;
1122
+ failure |= proc (filepath, have[pair.src ], expected[pair.dst ], curnodepath,
1123
+ result_map[pair.dst ]);
1124
+ }
1125
+ } else {
1126
+ std::vector<std::string> keys;
1025
1127
1026
- std::sort (keys. begin (), keys. end ( ));
1027
- keys. erase ( std::unique ( keys. begin (), keys. end ()), keys. end ( ));
1128
+ move_append (keys, object_keys (have ));
1129
+ move_append ( keys, object_keys (expected ));
1028
1130
1029
- bool failure{false };
1131
+ std::sort (keys.begin (), keys.end ());
1132
+ keys.erase (std::unique (keys.begin (), keys.end ()), keys.end ());
1030
1133
1031
- json result_map;
1032
- for (const auto & subkey : keys) {
1033
- std::string curnodepath = nodepath + " ['" + subkey + " ']" ;
1134
+ for (const auto & subkey : keys) {
1135
+ const std::string curnodepath = nodepath + " ['" + subkey + " ']" ;
1034
1136
1035
- if (!expected.count (subkey)) {
1036
- // Issue #75: only remove non-root keys to allow non-hyde YAML into the file.
1037
- if (!at_root) {
1038
- notify (" extraneous map key: `" + subkey + " `" , " map key removed: `" + subkey + " `" );
1137
+ if (!expected.count (subkey)) {
1138
+ // Issue #75: only remove non-root keys to allow non-hyde YAML into the file.
1139
+ if (!at_root) {
1140
+ notify (" extraneous map key: `" + subkey + " `" ,
1141
+ " map key removed: `" + subkey + " `" );
1142
+ failure = true ;
1143
+ }
1144
+ } else if (!have.count (subkey)) {
1145
+ notify (" map key missing: `" + subkey + " `" , " map key inserted: `" + subkey + " `" );
1146
+ result_map[subkey] = expected[subkey];
1039
1147
failure = true ;
1148
+ } else {
1149
+ failure |=
1150
+ proc (filepath, have[subkey], expected[subkey], curnodepath, result_map[subkey]);
1040
1151
}
1041
- } else if (!have.count (subkey)) {
1042
- notify (" map key missing: `" + subkey + " `" , " map key inserted: `" + subkey + " `" );
1043
- result_map[subkey] = expected[subkey];
1044
- failure = true ;
1045
- } else {
1046
- failure |=
1047
- proc (filepath, have[subkey], expected[subkey], curnodepath, result_map[subkey]);
1048
1152
}
1049
1153
}
1050
1154
@@ -1103,6 +1207,24 @@ std::pair<bool, json> yaml_base_emitter::merge(const std::string& filepath,
1103
1207
check_editable_scalar (filepath, have_hyde, expected_hyde, " " , merged_hyde, " brief" );
1104
1208
failure |= check_scalar_array (filepath, have_hyde, expected_hyde, " " , merged_hyde, " tags" );
1105
1209
1210
+ // We don't want to use `check_scalar` on the version key. If the versions mismatch its not
1211
+ // necessarily a validation error (as the docs may match OK), but something we want to warn
1212
+ // about. Then in transcription/update we want to hard-set the value to the version of this
1213
+ // tool.
1214
+
1215
+ switch (_mode) {
1216
+ case yaml_mode::validate: {
1217
+ if (!have_hyde.count (" version" ) ||
1218
+ static_cast <const std::string&>(have_hyde[" version" ]) != hyde_version ()) {
1219
+ std::cerr << " INFO: Validation phase with a mismatched version of hyde. Consider updating then/or transcribing.\n " ;
1220
+ }
1221
+ } break ;
1222
+ case yaml_mode::update:
1223
+ case yaml_mode::transcribe: {
1224
+ merged_hyde[" version" ] = hyde_version ();
1225
+ } break ;
1226
+ }
1227
+
1106
1228
failure |= do_merge (filepath, have_hyde, expected_hyde, merged_hyde);
1107
1229
}
1108
1230
@@ -1264,7 +1386,7 @@ documentation parse_documentation(const std::filesystem::path& path, bool fixup_
1264
1386
const auto front_matter_end = contents_end + front_matter_end_k.size ();
1265
1387
std::string yaml_src = have_contents.substr (0 , front_matter_end);
1266
1388
have_contents.erase (0 , front_matter_end);
1267
-
1389
+
1268
1390
result._remainder = std::move (have_contents);
1269
1391
result._json = yaml_to_json (load_yaml (path));
1270
1392
@@ -1342,6 +1464,7 @@ bool yaml_base_emitter::reconcile(json expected,
1342
1464
case hyde::yaml_mode::validate: {
1343
1465
// do nothing
1344
1466
} break ;
1467
+ case hyde::yaml_mode::transcribe:
1345
1468
case hyde::yaml_mode::update: {
1346
1469
failure = write_documentation ({std::move (merged), std::move (remainder )}, path);
1347
1470
} break ;
@@ -1354,6 +1477,7 @@ bool yaml_base_emitter::reconcile(json expected,
1354
1477
std::cerr << relative_path << " : required file does not exist\n " ;
1355
1478
failure = true ;
1356
1479
} break ;
1480
+ case hyde::yaml_mode::transcribe:
1357
1481
case hyde::yaml_mode::update: {
1358
1482
// Add update. No remainder yet, as above.
1359
1483
// REVISIT: Refactor all this into a call to write_documentation,
0 commit comments