@@ -260,6 +260,7 @@ json yaml_base_emitter::base_emitter_node(std::string layout,
260260 node[" hyde" ][" owner" ] = default_tag_value;
261261 node[" hyde" ][" tags" ].emplace_back (std::move (tag));
262262 node[" hyde" ][" brief" ] = default_tag_value;
263+ node[" hyde" ][" version" ] = hyde_version ();
263264
264265 return node;
265266}
@@ -392,6 +393,7 @@ void yaml_base_emitter::check_notify(const std::string& filepath,
392393 std::cerr << filepath << " @" << escaped_nodepath << " ['" << escaped_key
393394 << " ']: " << validate_message << " \n " ;
394395 } break ;
396+ case yaml_mode::transcribe:
395397 case yaml_mode::update: {
396398 std::cout << filepath << " @" << escaped_nodepath << " ['" << escaped_key
397399 << " ']: " << update_message << " \n " ;
@@ -980,6 +982,78 @@ bool yaml_base_emitter::check_object_array(const std::string& filepath,
980982
981983/* *************************************************************************************************/
982984
985+ std::vector<std::string> object_keys (const json& j) {
986+ std::vector<std::string> result;
987+
988+ for (auto iter{j.begin ()}, last{j.end ()}; iter != last; ++iter) {
989+ result.push_back (static_cast <const std::string&>(iter.key ()));
990+ }
991+
992+ return result;
993+ }
994+
995+ template <class T >
996+ inline void move_append (T& dst, T&& src) {
997+ dst.insert (dst.end (), std::make_move_iterator (src.begin ()), std::make_move_iterator (src.end ()));
998+ }
999+
1000+ struct transcribe_pair {
1001+ std::string src;
1002+ std::string dst;
1003+ };
1004+
1005+ using transcribe_pairs = std::vector<transcribe_pair>;
1006+
1007+ // This is O(N^2), where N is the size of both `src` and `dst`. Therefore transcription
1008+ // should only be run when it is shown to be necessary. At the same time, if your code base
1009+ // has enough overrides to really slow this algorithm down, hyde's performance is the least
1010+ // of your concerns.
1011+ transcribe_pairs derive_transcribe_pairs (const json& src, const json& dst) {
1012+ std::vector<std::string> src_keys = object_keys (src);
1013+ std::vector<std::string> dst_keys = object_keys (dst);
1014+
1015+ if (src_keys.size () != dst_keys.size ()) {
1016+ std::cerr << " WARNING: transcription key count mismatch\n " ;
1017+ }
1018+
1019+ transcribe_pairs result;
1020+
1021+ while (!src_keys.empty ()) {
1022+ transcribe_pair cur_pair;
1023+
1024+ // pop a key off the old name set
1025+ cur_pair.src = std::move (src_keys.back ());
1026+ src_keys.pop_back ();
1027+
1028+ // find the best match of the dst keys to the src key
1029+ std::size_t best_match = std::numeric_limits<std::size_t >::max ();
1030+ std::size_t best_index = 0 ;
1031+ for (std::size_t i = 0 ; i < dst_keys.size (); ++i) {
1032+ // generate the diff score of the src key and the candidate dst
1033+ std::size_t cur_match = diff_score (cur_pair.src , dst_keys[i]);
1034+
1035+ if (cur_match > best_match) {
1036+ continue ;
1037+ }
1038+
1039+ // if this dst candidate is better than what we've seen, remember that.
1040+ best_match = cur_match;
1041+ best_index = i;
1042+ }
1043+
1044+ // pair the best match dst and src keys and remove dst
1045+ cur_pair.dst = std::move (dst_keys[best_index]);
1046+ dst_keys.erase (dst_keys.begin () + best_index);
1047+
1048+ // save off the pair and repeat
1049+ result.emplace_back (std::move (cur_pair));
1050+ }
1051+
1052+ return result;
1053+ }
1054+
1055+ /* *************************************************************************************************/
1056+
9831057bool yaml_base_emitter::check_map (const std::string& filepath,
9841058 const json& have_node,
9851059 const json& expected_node,
@@ -1013,38 +1087,68 @@ bool yaml_base_emitter::check_map(const std::string& filepath,
10131087 }
10141088
10151089 const json& have = have_node[key];
1090+ bool failure{false };
1091+ json result_map;
10161092
1017- std::vector<std::string> keys;
1018-
1019- for (auto iter{have.begin ()}, last{have.end ()}; iter != last; ++iter) {
1020- keys.push_back (static_cast <const std::string&>(iter.key ()));
1021- }
1022- for (auto iter{expected.begin ()}, last{expected.end ()}; iter != last; ++iter) {
1023- keys.push_back (static_cast <const std::string&>(iter.key ()));
1024- }
1093+ if (key == " overloads" && _mode == yaml_mode::transcribe) {
1094+ /*
1095+ It is common during the upgrade from one version of hyde to another that the underlying
1096+ clang tooling will output different symbol names for a given symbol (e.g., a namespace
1097+ may get removed or added.) Although the symbol is unchanged, because its `expected` name
1098+ differs from the `have` name, hyde will consider the symbols different, remove the old name
1099+ and insert the new one. This wipes out any previous documentation under the old name that
1100+ should have been migrated to the new name.
1101+
1102+ The solution here is very specialized. For the "overloads" key only, we gather the name
1103+ of each overload in both the `have` and `expected` set. We then pair them up according
1104+ to how well they match to one another (using the Meyers' string diff algorithm; two strings
1105+ with less "patchwork" between them are considered a better match). Ideally this results in
1106+ key pairs that represent the same symbol, just with different names. Then we call the
1107+ `proc` with `have[old_name]` and `expected[new_name]` which will migrate any documentation
1108+ from the old name to the new.
1109+
1110+ This capability assumes the overload count of both `have` and `expected` are the same.
1111+ If any new functions are created or removed between upgrades in the clang driver (e.g.,
1112+ a new compiler-generated routine is created and documented) that will have to be managed
1113+ manually. Assuming the count is the same, it also assumes there is a 1:1 mapping from the
1114+ set of old names to the set of new names. This implies the transcription mode should be
1115+ done as a separate step from an update. In other words, a transcription assumes the
1116+ documentation is actually the same between the `have` and `expected` sets, it is _just the
1117+ overload names_ that have changed, so map the old-named documentation to the new-named
1118+ documentation as reasonably as possible.
1119+ */
1120+ for (const auto & pair : derive_transcribe_pairs (have, expected)) {
1121+ const std::string curnodepath = nodepath + " ['" + pair.dst + " ']" ;
1122+ failure |= proc (filepath, have[pair.src ], expected[pair.dst ], curnodepath,
1123+ result_map[pair.dst ]);
1124+ }
1125+ } else {
1126+ std::vector<std::string> keys;
10251127
1026- std::sort (keys. begin (), keys. end ( ));
1027- keys. erase ( std::unique ( keys. begin (), keys. end ()), keys. end ( ));
1128+ move_append (keys, object_keys (have ));
1129+ move_append ( keys, object_keys (expected ));
10281130
1029- bool failure{false };
1131+ std::sort (keys.begin (), keys.end ());
1132+ keys.erase (std::unique (keys.begin (), keys.end ()), keys.end ());
10301133
1031- json result_map;
1032- for (const auto & subkey : keys) {
1033- std::string curnodepath = nodepath + " ['" + subkey + " ']" ;
1134+ for (const auto & subkey : keys) {
1135+ const std::string curnodepath = nodepath + " ['" + subkey + " ']" ;
10341136
1035- if (!expected.count (subkey)) {
1036- // Issue #75: only remove non-root keys to allow non-hyde YAML into the file.
1037- if (!at_root) {
1038- notify (" extraneous map key: `" + subkey + " `" , " map key removed: `" + subkey + " `" );
1137+ if (!expected.count (subkey)) {
1138+ // Issue #75: only remove non-root keys to allow non-hyde YAML into the file.
1139+ if (!at_root) {
1140+ notify (" extraneous map key: `" + subkey + " `" ,
1141+ " map key removed: `" + subkey + " `" );
1142+ failure = true ;
1143+ }
1144+ } else if (!have.count (subkey)) {
1145+ notify (" map key missing: `" + subkey + " `" , " map key inserted: `" + subkey + " `" );
1146+ result_map[subkey] = expected[subkey];
10391147 failure = true ;
1148+ } else {
1149+ failure |=
1150+ proc (filepath, have[subkey], expected[subkey], curnodepath, result_map[subkey]);
10401151 }
1041- } else if (!have.count (subkey)) {
1042- notify (" map key missing: `" + subkey + " `" , " map key inserted: `" + subkey + " `" );
1043- result_map[subkey] = expected[subkey];
1044- failure = true ;
1045- } else {
1046- failure |=
1047- proc (filepath, have[subkey], expected[subkey], curnodepath, result_map[subkey]);
10481152 }
10491153 }
10501154
@@ -1103,6 +1207,24 @@ std::pair<bool, json> yaml_base_emitter::merge(const std::string& filepath,
11031207 check_editable_scalar (filepath, have_hyde, expected_hyde, " " , merged_hyde, " brief" );
11041208 failure |= check_scalar_array (filepath, have_hyde, expected_hyde, " " , merged_hyde, " tags" );
11051209
1210+ // We don't want to use `check_scalar` on the version key. If the versions mismatch its not
1211+ // necessarily a validation error (as the docs may match OK), but something we want to warn
1212+ // about. Then in transcription/update we want to hard-set the value to the version of this
1213+ // tool.
1214+
1215+ switch (_mode) {
1216+ case yaml_mode::validate: {
1217+ if (!have_hyde.count (" version" ) ||
1218+ static_cast <const std::string&>(have_hyde[" version" ]) != hyde_version ()) {
1219+ std::cerr << " INFO: Validation phase with a mismatched version of hyde. Consider updating then/or transcribing.\n " ;
1220+ }
1221+ } break ;
1222+ case yaml_mode::update:
1223+ case yaml_mode::transcribe: {
1224+ merged_hyde[" version" ] = hyde_version ();
1225+ } break ;
1226+ }
1227+
11061228 failure |= do_merge (filepath, have_hyde, expected_hyde, merged_hyde);
11071229 }
11081230
@@ -1264,7 +1386,7 @@ documentation parse_documentation(const std::filesystem::path& path, bool fixup_
12641386 const auto front_matter_end = contents_end + front_matter_end_k.size ();
12651387 std::string yaml_src = have_contents.substr (0 , front_matter_end);
12661388 have_contents.erase (0 , front_matter_end);
1267-
1389+
12681390 result._remainder = std::move (have_contents);
12691391 result._json = yaml_to_json (load_yaml (path));
12701392
@@ -1342,6 +1464,7 @@ bool yaml_base_emitter::reconcile(json expected,
13421464 case hyde::yaml_mode::validate: {
13431465 // do nothing
13441466 } break ;
1467+ case hyde::yaml_mode::transcribe:
13451468 case hyde::yaml_mode::update: {
13461469 failure = write_documentation ({std::move (merged), std::move (remainder)}, path);
13471470 } break ;
@@ -1354,6 +1477,7 @@ bool yaml_base_emitter::reconcile(json expected,
13541477 std::cerr << relative_path << " : required file does not exist\n " ;
13551478 failure = true ;
13561479 } break ;
1480+ case hyde::yaml_mode::transcribe:
13571481 case hyde::yaml_mode::update: {
13581482 // Add update. No remainder yet, as above.
13591483 // REVISIT: Refactor all this into a call to write_documentation,
0 commit comments