[GOFF] Refactor writing GOFF records #93855

redstar · 2024-05-30T18:07:15Z

A GOFF file is basically a sequence of records. Change the YAML mapping for GOFF files to reflect this. As result, creating files for testing, e.g. without a module header, becomes possible.

llvmbot · 2024-05-30T18:07:45Z

@llvm/pr-subscribers-objectyaml

Author: Kai Nacke (redstar)

Changes

A GOFF file is basically a sequence of records. Change the YAML mapping for GOFF files to reflect this. As result, creating files for testing, e.g. without a module header, becomes possible.

Patch is 29.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93855.diff

11 Files Affected:

(modified) llvm/include/llvm/ObjectYAML/GOFFYAML.h (+74-12)
(modified) llvm/lib/ObjectYAML/GOFFEmitter.cpp (+113-154)
(modified) llvm/lib/ObjectYAML/GOFFYAML.cpp (+76-18)
(removed) llvm/test/tools/yaml2obj/GOFF/GOFF-header-settings.yaml (-26)
(removed) llvm/test/tools/yaml2obj/GOFF/GOFF-no-header.yaml (-7)
(added) llvm/test/tools/yaml2obj/GOFF/end-amode-const.yaml (+15)
(added) llvm/test/tools/yaml2obj/GOFF/end-long-name.yaml (+70)
(added) llvm/test/tools/yaml2obj/GOFF/end-only.yaml (+22)
(renamed) llvm/test/tools/yaml2obj/GOFF/header-end.yaml (+8-6)
(added) llvm/test/tools/yaml2obj/GOFF/header-only.yaml (+18)
(added) llvm/test/tools/yaml2obj/GOFF/invalid-record.yaml (+7)

diff --git a/llvm/include/llvm/ObjectYAML/GOFFYAML.h b/llvm/include/llvm/ObjectYAML/GOFFYAML.h
index f9bf45e95bd3a..16219058e969b 100644
--- a/llvm/include/llvm/ObjectYAML/GOFFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/GOFFYAML.h
@@ -25,25 +25,87 @@ namespace llvm {
 // to use yaml::IO, we use these structures which are closer to the source.
 namespace GOFFYAML {
 
-struct FileHeader {
-  uint32_t TargetEnvironment = 0;
-  uint32_t TargetOperatingSystem = 0;
-  uint16_t CCSID = 0;
-  StringRef CharacterSetName;
-  StringRef LanguageProductIdentifier;
-  uint32_t ArchitectureLevel = 0;
-  std::optional<uint16_t> InternalCCSID;
-  std::optional<uint8_t> TargetSoftwareEnvironment;
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, GOFF_AMODE)
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, GOFF_ENDFLAGS)
+
+// The GOFF format uses different kinds of logical records. The format imposes
+// some rules on those records (e.g. the module header must come first, no
+// forward references to records, etc.). However, to be able to specify invalid
+// GOFF files, we treat all records the same way.
+struct RecordBase {
+  enum RecordBaseKind {
+    RBK_ModuleHeader,
+    RBK_RelocationDirectory,
+    RBK_Symbol,
+    RBK_Text,
+    RBK_DeferredLength,
+    RBK_EndOfModule
+  };
+
+private:
+  const RecordBaseKind Kind;
+
+protected:
+  RecordBase(RecordBaseKind Kind) : Kind(Kind) {}
+
+public:
+  RecordBaseKind getKind() const { return Kind; }
+};
+using RecordPtr = std::unique_ptr<RecordBase>;
+
+struct ModuleHeader : public RecordBase {
+  ModuleHeader() : RecordBase(RBK_ModuleHeader) {}
+
+  uint32_t ArchitectureLevel;
+  uint16_t PropertiesLength;
+  std::optional<yaml::BinaryRef> Properties;
+
+  static bool classof(const RecordBase *S) {
+    return S->getKind() == RBK_ModuleHeader;
+  }
+};
+
+struct EndOfModule : public RecordBase {
+  EndOfModule() : RecordBase(RBK_EndOfModule) {}
+
+  GOFF_ENDFLAGS Flags;
+  GOFF_AMODE AMODE;
+  uint32_t RecordCount;
+  uint32_t ESDID;
+  uint32_t Offset;
+  uint16_t NameLength;
+  StringRef EntryName;
+
+  static bool classof(const RecordBase *S) {
+    return S->getKind() == RBK_EndOfModule;
+  }
 };
 
 struct Object {
-  FileHeader Header;
-  Object();
+  // A GOFF file is a sequence of records.
+  std::vector<RecordPtr> Records;
 };
 } // end namespace GOFFYAML
 } // end namespace llvm
 
-LLVM_YAML_DECLARE_MAPPING_TRAITS(GOFFYAML::FileHeader)
+LLVM_YAML_DECLARE_ENUM_TRAITS(GOFFYAML::GOFF_AMODE)
+LLVM_YAML_DECLARE_ENUM_TRAITS(GOFFYAML::GOFF_ENDFLAGS)
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(GOFFYAML::RecordPtr)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(GOFFYAML::RecordPtr)
+
+LLVM_YAML_DECLARE_MAPPING_TRAITS(GOFFYAML::ModuleHeader)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(GOFFYAML::EndOfModule)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(GOFFYAML::Object)
 
+namespace llvm {
+namespace yaml {
+
+template <> struct CustomMappingTraits<GOFFYAML::RecordPtr> {
+  static void inputOne(IO &IO, StringRef Key, GOFFYAML::RecordPtr &Elem);
+  static void output(IO &IO, GOFFYAML::RecordPtr &Elem);
+};
+
+} // namespace yaml
+} // namespace llvm
 #endif // LLVM_OBJECTYAML_GOFFYAML_H
diff --git a/llvm/lib/ObjectYAML/GOFFEmitter.cpp b/llvm/lib/ObjectYAML/GOFFEmitter.cpp
index 345904407e1d2..a702aefd5076d 100644
--- a/llvm/lib/ObjectYAML/GOFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/GOFFEmitter.cpp
@@ -11,11 +11,12 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ObjectYAML/ObjectYAML.h"
+#include "llvm/BinaryFormat/GOFF.h"
+#include "llvm/ObjectYAML/GOFFYAML.h"
 #include "llvm/ObjectYAML/yaml2obj.h"
 #include "llvm/Support/ConvertEBCDIC.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -25,10 +26,10 @@ namespace {
 // Common flag values on records.
 enum {
   // Flag: This record is continued.
-  Rec_Continued = 1,
+  Rec_Continued = 1 << 0,
 
   // Flag: This record is a continuation.
-  Rec_Continuation = 1 << (8 - 6 - 1),
+  Rec_Continuation = 1 << 1,
 };
 
 template <typename ValueType> struct BinaryBeImpl {
@@ -62,119 +63,90 @@ ZerosImpl zeros(const size_t NumBytes) { return ZerosImpl{NumBytes}; }
 
 // The GOFFOstream is responsible to write the data into the fixed physical
 // records of the format. A user of this class announces the start of a new
-// logical record and the size of its payload. While writing the payload, the
-// physical records are created for the data. Possible fill bytes at the end of
-// a physical record are written automatically.
-class GOFFOstream : public raw_ostream {
+// logical record, and writes the full logical block. The physical records are
+// created while the content is written to the underlying stream. Possible fill
+// bytes at the end of a physical record are written automatically.
+// The implementation aims at simplicity, not speed.
+class GOFFOStream {
 public:
-  explicit GOFFOstream(raw_ostream &OS)
-      : OS(OS), LogicalRecords(0), RemainingSize(0), NewLogicalRecord(false) {
-    SetBufferSize(GOFF::PayloadLength);
-  }
-
-  ~GOFFOstream() { finalize(); }
+  explicit GOFFOStream(raw_ostream &OS)
+      : OS(OS), CurrentType(GOFF::RecordType(-1)) {}
 
-  void makeNewRecord(GOFF::RecordType Type, size_t Size) {
-    fillRecord();
-    CurrentType = Type;
-    RemainingSize = Size;
-    if (size_t Gap = (RemainingSize % GOFF::PayloadLength))
-      RemainingSize += GOFF::PayloadLength - Gap;
-    NewLogicalRecord = true;
-    ++LogicalRecords;
+  GOFFOStream &operator<<(StringRef Str) {
+    write(Str);
+    return *this;
   }
 
-  void finalize() { fillRecord(); }
-
-  uint32_t logicalRecords() { return LogicalRecords; }
+  void newRecord(GOFF::RecordType Type) { CurrentType = Type; }
 
 private:
   // The underlying raw_ostream.
   raw_ostream &OS;
 
-  // The number of logical records emitted so far.
-  uint32_t LogicalRecords;
-
-  // The remaining size of this logical record, including fill bytes.
-  size_t RemainingSize;
-
   // The type of the current (logical) record.
   GOFF::RecordType CurrentType;
 
-  // Signals start of new record.
-  bool NewLogicalRecord;
-
-  // Return the number of bytes left to write until next physical record.
-  // Please note that we maintain the total number of bytes left, not the
-  // written size.
-  size_t bytesToNextPhysicalRecord() {
-    size_t Bytes = RemainingSize % GOFF::PayloadLength;
-    return Bytes ? Bytes : GOFF::PayloadLength;
-  }
-
   // Write the record prefix of a physical record, using the current record
   // type.
-  static void writeRecordPrefix(raw_ostream &OS, GOFF::RecordType Type,
-                                size_t RemainingSize,
-                                uint8_t Flags = Rec_Continuation) {
-    uint8_t TypeAndFlags = Flags | (Type << 4);
-    if (RemainingSize > GOFF::RecordLength)
-      TypeAndFlags |= Rec_Continued;
-    OS << binaryBe(static_cast<unsigned char>(GOFF::PTVPrefix))
-       << binaryBe(static_cast<unsigned char>(TypeAndFlags))
-       << binaryBe(static_cast<unsigned char>(0));
-  }
+  void writeRecordPrefix(uint8_t Flags);
 
-  // Fill the last physical record of a logical record with zero bytes.
-  void fillRecord() {
-    assert((GetNumBytesInBuffer() <= RemainingSize) &&
-           "More bytes in buffer than expected");
-    size_t Remains = RemainingSize - GetNumBytesInBuffer();
-    if (Remains) {
-      assert((Remains < GOFF::RecordLength) &&
-             "Attempting to fill more than one physical record");
-      raw_ostream::write_zeros(Remains);
-    }
-    flush();
-    assert(RemainingSize == 0 && "Not fully flushed");
-    assert(GetNumBytesInBuffer() == 0 && "Buffer not fully empty");
-  }
+  // Write a logical record.
+  void write(StringRef Str);
+};
 
-  // See raw_ostream::write_impl.
-  void write_impl(const char *Ptr, size_t Size) override {
-    assert((RemainingSize >= Size) && "Attempt to write too much data");
-    assert(RemainingSize && "Logical record overflow");
-    if (!(RemainingSize % GOFF::PayloadLength)) {
-      writeRecordPrefix(OS, CurrentType, RemainingSize,
-                        NewLogicalRecord ? 0 : Rec_Continuation);
-      NewLogicalRecord = false;
-    }
-    assert(!NewLogicalRecord &&
-           "New logical record not on physical record boundary");
-
-    size_t Idx = 0;
-    while (Size > 0) {
-      size_t BytesToWrite = bytesToNextPhysicalRecord();
-      if (BytesToWrite > Size)
-        BytesToWrite = Size;
-      OS.write(Ptr + Idx, BytesToWrite);
-      Idx += BytesToWrite;
-      Size -= BytesToWrite;
-      RemainingSize -= BytesToWrite;
-      if (Size) {
-        writeRecordPrefix(OS, CurrentType, RemainingSize);
-      }
-    }
+void GOFFOStream::writeRecordPrefix(uint8_t Flags) {
+  uint8_t TypeAndFlags = Flags | (CurrentType << 4);
+  OS << binaryBe(static_cast<unsigned char>(GOFF::PTVPrefix))
+     << binaryBe(static_cast<unsigned char>(TypeAndFlags))
+     << binaryBe(static_cast<unsigned char>(0));
+}
+
+void GOFFOStream::write(StringRef Str) {
+  // The flags are determined by the flags of the prvious record, and by the
+  // remaining size of data.
+  uint8_t Flags = 0;
+  size_t Ptr = 0;
+  size_t Size = Str.size();
+  while (Size >= GOFF::RecordContentLength) {
+    if (Flags) {
+      Flags |= Rec_Continuation;
+      if (Size == GOFF::RecordContentLength)
+        Flags &= ~Rec_Continued;
+    } else
+      Flags |= (Size == GOFF::RecordContentLength) ? 0 : Rec_Continued;
+    writeRecordPrefix(Flags);
+    OS.write(&Str.data()[Ptr], GOFF::RecordContentLength);
+    Size -= GOFF::RecordContentLength;
+    Ptr += GOFF::RecordContentLength;
   }
+  if (Size) {
+    Flags &= ~Rec_Continued;
+    writeRecordPrefix(Flags);
+    OS.write(&Str.data()[Ptr], Size);
+    OS.write_zeros(GOFF::RecordContentLength - Size);
+  }
+}
+
+// A LogicalRecord buffers the data of a record.
+class LogicalRecord : public raw_svector_ostream {
+  GOFFOStream &OS;
+  SmallVector<char, 0> Buffer;
+
+  void anchor() override {};
 
-  // Return the current position within the stream, not counting the bytes
-  // currently in the buffer.
-  uint64_t current_pos() const override { return OS.tell(); }
+public:
+  LogicalRecord(GOFFOStream &OS) : raw_svector_ostream(Buffer), OS(OS) {}
+  ~LogicalRecord() override { OS << str(); }
+
+  LogicalRecord &operator<<(yaml::BinaryRef B) {
+    B.writeAsBinary(*this);
+    return *this;
+  }
 };
 
 class GOFFState {
-  void writeHeader(GOFFYAML::FileHeader &FileHdr);
-  void writeEnd();
+  void writeHeader(GOFFYAML::ModuleHeader &ModHdr);
+  void writeEnd(GOFFYAML::EndOfModule &EndMod);
 
   void reportError(const Twine &Msg) {
     ErrHandler(Msg);
@@ -185,8 +157,6 @@ class GOFFState {
             yaml::ErrorHandler ErrHandler)
       : GW(OS), Doc(Doc), ErrHandler(ErrHandler), HasError(false) {}
 
-  ~GOFFState() { GW.finalize(); }
-
   bool writeObject();
 
 public:
@@ -194,72 +164,61 @@ class GOFFState {
                         yaml::ErrorHandler ErrHandler);
 
 private:
-  GOFFOstream GW;
+  GOFFOStream GW;
   GOFFYAML::Object &Doc;
   yaml::ErrorHandler ErrHandler;
   bool HasError;
 };
 
-void GOFFState::writeHeader(GOFFYAML::FileHeader &FileHdr) {
-  SmallString<16> CCSIDName;
-  if (std::error_code EC =
-          ConverterEBCDIC::convertToEBCDIC(FileHdr.CharacterSetName, CCSIDName))
-    reportError("Conversion error on " + FileHdr.CharacterSetName);
-  if (CCSIDName.size() > 16) {
-    reportError("CharacterSetName too long");
-    CCSIDName.resize(16);
-  }
-  SmallString<16> LangProd;
-  if (std::error_code EC = ConverterEBCDIC::convertToEBCDIC(
-          FileHdr.LanguageProductIdentifier, LangProd))
-    reportError("Conversion error on " + FileHdr.LanguageProductIdentifier);
-  if (LangProd.size() > 16) {
-    reportError("LanguageProductIdentifier too long");
-    LangProd.resize(16);
-  }
-
-  GW.makeNewRecord(GOFF::RT_HDR, GOFF::PayloadLength);
-  GW << binaryBe(FileHdr.TargetEnvironment)     // TargetEnvironment
-     << binaryBe(FileHdr.TargetOperatingSystem) // TargetOperatingSystem
-     << zeros(2)                                // Reserved
-     << binaryBe(FileHdr.CCSID)                 // CCSID
-     << CCSIDName                               // CharacterSetName
-     << zeros(16 - CCSIDName.size())            // Fill bytes
-     << LangProd                                // LanguageProductIdentifier
-     << zeros(16 - LangProd.size())             // Fill bytes
-     << binaryBe(FileHdr.ArchitectureLevel);    // ArchitectureLevel
-  // The module propties are optional. Figure out if we need to write them.
-  uint16_t ModPropLen = 0;
-  if (FileHdr.TargetSoftwareEnvironment)
-    ModPropLen = 3;
-  else if (FileHdr.InternalCCSID)
-    ModPropLen = 2;
-  if (ModPropLen) {
-    GW << binaryBe(ModPropLen) << zeros(6);
-    if (ModPropLen >= 2)
-      GW << binaryBe(FileHdr.InternalCCSID ? *FileHdr.InternalCCSID : 0);
-    if (ModPropLen >= 3)
-      GW << binaryBe(FileHdr.TargetSoftwareEnvironment
-                         ? *FileHdr.TargetSoftwareEnvironment
-                         : 0);
-  }
+void GOFFState::writeHeader(GOFFYAML::ModuleHeader &ModHdr) {
+  GW.newRecord(GOFF::RT_HDR);
+  LogicalRecord LR(GW);
+  LR << zeros(45)                          // Reserved.
+     << binaryBe(ModHdr.ArchitectureLevel) // The architecture level.
+     << binaryBe(ModHdr.PropertiesLength)  // Length of module properties.
+     << zeros(6);                          // Reserved.
+  if (ModHdr.Properties)
+    LR << *ModHdr.Properties; // Module properties.
 }
 
-void GOFFState::writeEnd() {
-  GW.makeNewRecord(GOFF::RT_END, GOFF::PayloadLength);
-  GW << binaryBe(uint8_t(0)) // No entry point
-     << binaryBe(uint8_t(0)) // No AMODE
-     << zeros(3)             // Reserved
-     << binaryBe(GW.logicalRecords());
-  // No entry point yet. Automatically fill remaining space with zero bytes.
-  GW.finalize();
+void GOFFState::writeEnd(GOFFYAML::EndOfModule &EndMod) {
+  SmallString<16> EntryName;
+  if (std::error_code EC =
+          ConverterEBCDIC::convertToEBCDIC(EndMod.EntryName, EntryName))
+    reportError("Conversion error on " + EndMod.EntryName);
+
+  GW.newRecord(GOFF::RT_END);
+  LogicalRecord LR(GW);
+  LR << binaryBe(uint8_t(EndMod.Flags)) // The flags.
+     << binaryBe(uint8_t(EndMod.AMODE)) // The addressing mode.
+     << zeros(3)                        // Reserved.
+     << binaryBe(EndMod.RecordCount)    // The record count.
+     << binaryBe(EndMod.ESDID)          // ESDID of the entry point.
+     << zeros(4)                        // Reserved.
+     << binaryBe(EndMod.Offset)         // Offset of entry point.
+     << binaryBe(EndMod.NameLength)     // Length of external name.
+     << EntryName;                      // Name of the entry point.
 }
 
 bool GOFFState::writeObject() {
-  writeHeader(Doc.Header);
-  if (HasError)
-    return false;
-  writeEnd();
+  for (auto &RecPtr : Doc.Records) {
+    auto *Rec = RecPtr.get();
+    switch (Rec->getKind()) {
+    case GOFFYAML::RecordBase::RBK_ModuleHeader:
+      writeHeader(*static_cast<GOFFYAML::ModuleHeader *>(Rec));
+      break;
+    case GOFFYAML::RecordBase::RBK_EndOfModule:
+      writeEnd(*static_cast<GOFFYAML::EndOfModule *>(Rec));
+      break;
+    case GOFFYAML::RecordBase::RBK_RelocationDirectory:
+    case GOFFYAML::RecordBase::RBK_Symbol:
+    case GOFFYAML::RecordBase::RBK_Text:
+    case GOFFYAML::RecordBase::RBK_DeferredLength:
+      llvm_unreachable(("Not yet implemented"));
+    }
+    if (HasError)
+      return false;
+  }
   return true;
 }
 
diff --git a/llvm/lib/ObjectYAML/GOFFYAML.cpp b/llvm/lib/ObjectYAML/GOFFYAML.cpp
index ae857980a521b..cad61b759e01e 100644
--- a/llvm/lib/ObjectYAML/GOFFYAML.cpp
+++ b/llvm/lib/ObjectYAML/GOFFYAML.cpp
@@ -12,34 +12,92 @@
 
 #include "llvm/ObjectYAML/GOFFYAML.h"
 #include "llvm/BinaryFormat/GOFF.h"
-#include <string.h>
 
 namespace llvm {
-namespace GOFFYAML {
 
-Object::Object() {}
+namespace yaml {
 
-} // namespace GOFFYAML
+void ScalarEnumerationTraits<GOFFYAML::GOFF_AMODE>::enumeration(
+    IO &IO, GOFFYAML::GOFF_AMODE &Value) {
+#define ECase(X) IO.enumCase(Value, #X, GOFF::ESD_##X)
+  ECase(AMODE_None);
+  ECase(AMODE_24);
+  ECase(AMODE_31);
+  ECase(AMODE_ANY);
+  ECase(AMODE_64);
+  ECase(AMODE_MIN);
+#undef ECase
+  IO.enumFallback<Hex8>(Value);
+}
 
-namespace yaml {
+void ScalarEnumerationTraits<GOFFYAML::GOFF_ENDFLAGS>::enumeration(
+    IO &IO, GOFFYAML::GOFF_ENDFLAGS &Value) {
+#define ECase(X) IO.enumCase(Value, #X, unsigned(GOFF::END_##X) << 6)
+  ECase(EPR_None);
+  ECase(EPR_EsdidOffset);
+  ECase(EPR_ExternalName);
+  ECase(EPR_Reserved);
+#undef ECase
+  IO.enumFallback<Hex8>(Value);
+}
+
+void MappingTraits<GOFFYAML::ModuleHeader>::mapping(
+    IO &IO, GOFFYAML::ModuleHeader &ModHdr) {
+  IO.mapOptional("ArchitectureLevel", ModHdr.ArchitectureLevel, 0);
+  IO.mapOptional("PropertiesLength", ModHdr.PropertiesLength, 0);
+  IO.mapOptional("Properties", ModHdr.Properties);
+}
+
+void MappingTraits<GOFFYAML::EndOfModule>::mapping(IO &IO,
+                                                   GOFFYAML::EndOfModule &End) {
+  IO.mapOptional("Flags", End.Flags, 0);
+  IO.mapOptional("AMODE", End.AMODE, 0);
+  IO.mapOptional("RecordCount", End.RecordCount, 0);
+  IO.mapOptional("ESDID", End.ESDID, 0);
+  IO.mapOptional("Offset", End.Offset, 0);
+  IO.mapOptional("NameLength", End.NameLength, 0);
+  IO.mapOptional("EntryName", End.EntryName);
+}
+
+void CustomMappingTraits<GOFFYAML::RecordPtr>::inputOne(
+    IO &IO, StringRef Key, GOFFYAML::RecordPtr &Elem) {
+  if (Key == "ModuleHeader") {
+    GOFFYAML::ModuleHeader ModHdr;
+    IO.mapRequired("ModuleHeader", ModHdr);
+    Elem = std::make_unique<GOFFYAML::ModuleHeader>(std::move(ModHdr));
+  } else if (Key == "End") {
+    GOFFYAML::EndOfModule End;
+    IO.mapRequired("End", End);
+    Elem = std::make_unique<GOFFYAML::EndOfModule>(std::move(End));
+  } else if (Key == "RelocationDirectory" || Key == "Symbol" || Key == "Text" ||
+             Key == "Length")
+    IO.setError(Twine("Not yet implemented ").concat(Key));
+  else
+    IO.setError(Twine("Unknown record type name ").concat(Key));
+}
 
-void MappingTraits<GOFFYAML::FileHeader>::mapping(
-    IO &IO, GOFFYAML::FileHeader &FileHdr) {
-  IO.mapOptional("TargetEnvironment", FileHdr.TargetEnvironment, 0);
-  IO.mapOptional("TargetOperatingSystem", FileHdr.TargetOperatingSystem, 0);
-  IO.mapOptional("CCSID", FileHdr.CCSID, 0);
-  IO.mapOptional("CharacterSetName", FileHdr.CharacterSetName, "");
-  IO.mapOptional("LanguageProductIdentifier", FileHdr.LanguageProductIdentifier,
-                 "");
-  IO.mapOptional("ArchitectureLevel", FileHdr.ArchitectureLevel, 1);
-  IO.mapOptional("InternalCCSID", FileHdr.InternalCCSID);
-  IO.mapOptional("TargetSoftwareEnvironment",
-                 FileHdr.TargetSoftwareEnvironment);
+void CustomMappingTraits<GOFFYAML::RecordPtr>::output(
+    IO &IO, GOFFYAML::RecordPtr &Elem) {
+  switch (Elem->getKind()) {
+  case GOFFYAML::RecordBase::RBK_ModuleHeader:
+    IO.mapRequired("ModuleHeader",
+                   *static_cast<GOFFYAML::ModuleHeader *>(Elem.get()));
+    break;
+  case GOFFYAML::RecordBase::RBK_EndOfModule:
+    IO.mapRequired("End", *static_cast<GOFFYAML::EndOfModule *>(Elem.get()));
+    break;
+  case GOFFYAML::RecordBase::RBK_RelocationDirectory:
+  case GOFFYAML::RecordBase::RBK_Symbol:
+  case GOFFYAML::RecordBase::RBK_Text:
+  case GOFFYAML::RecordBase::RBK_DeferredLength:
+    llvm_unreachable(("Not yet implemented"));
+  }
 }
 
 void MappingTraits<GOFFYAML::Object>::mapping(IO &IO, GOFFYAML::Object &Obj) {
   IO.mapTag("!GOFF", true);
-  IO.mapRequired("FileHeader", Obj.Header);
+  EmptyContext Context;
+  yamlize(IO, Obj.Records, false, Context);
 }
 
 } // namespace yaml
diff --git a/llvm/test/tools/yaml2obj/GOFF/GOFF-header-settings.yaml b/llvm/test/tools/yaml2obj/GOFF/GOFF-header-settings.yaml
deleted file mode 100644
index 1971c407199fb..0000000000000
--- a/llvm/test/tools/yaml2obj/GOFF/GOFF-header-settings.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-# RUN: yaml2obj %s | od -v -An -tx1 | FileCheck --ignore-case %s
-
-## Verify that GOFF Header is correct.
-# CHECK:      03 f0 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-# CHECK-NEXT: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-# CHECK-NEXT: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-# CHECK-NEXT: 00 00 01 00 03 00 00 00 00 0...
[truncated]

jh7370

I've reviewed the code. Will check the tests another time.

llvm/include/llvm/ObjectYAML/GOFFYAML.h

llvm/lib/ObjectYAML/GOFFEmitter.cpp

Everybody0523 · 2024-06-08T06:27:24Z

llvm/test/tools/yaml2obj/GOFF/end-long-name.yaml

+# CHECK1-EMPTY:
+
+## 1 record fully used.
+## Flags = 0 => no continuation, not continued.


Suggested change

## Flags = 0 => no continuation, not continued.

## Flags = 0 => not a continuation, not continued.

Maybe it's just me but when I read "no continuation" I'm not sure whether it means the record in question has no continuation or that the record itself is not a continuation.

Changed, thanks!

Everybody0523 · 2024-06-08T06:30:36Z

llvm/include/llvm/ObjectYAML/GOFFYAML.h

+// forward references to records, etc.). However, to be able to specify invalid
+// GOFF files, we treat all records the same way.
+struct RecordBase {
+  enum class Kind {


Could you just use the RecordType enum from llvm/BinaryFormat/GOFF.h here?

Those are currently the same, but I do not think that it is good to couple the record type with the class hierarchy discriminator. E.g. in C++ I could model a relocation entry as a record, and treat it as a sub-record of the relocation directory. I currently do not do this, but coupling the discriminator to the record type would be the design decision that I will never do that, which seems a bit limiting to me.

- Change comment in test case based on reviewer comment - Add a test case testing error message for conversion error

jh7370

Still not got to the tests in depth yet, but these might be better reviewed by somebody with GOFF expertise anyway.

llvm/lib/ObjectYAML/GOFFEmitter.cpp

llvm/test/tools/yaml2obj/GOFF/end-invalid-name.yaml

llvm/lib/ObjectYAML/GOFFYAML.cpp

- Reword error message - Put quotes around value in error message

llvm/lib/ObjectYAML/GOFFYAML.cpp

EsmeYi · 2024-06-26T05:33:31Z

llvm/lib/ObjectYAML/GOFFEmitter.cpp

+  LR << binaryBe(uint8_t(EndMod.Flags)) // The flags.
+     << binaryBe(uint8_t(EndMod.AMODE)) // The addressing mode.
+     << zeros(3)                        // Reserved.
+     << binaryBe(EndMod.RecordCount)    // The record count.


If the record count is not specified, I think it would be better to fill in an actual calculated value.

I've run into trouble with the record count field. If it is zero, and the binder abends when trying to do a link, then I can use the Unix "dd" command to slice up the object and try and narrow down what is causing the abend. If the record count is correct to start with then slicing up the object will make it incorrect and remove a tool from my toolbox which then made my life more difficult.

To do this, you can specify the record count field as zero in yaml. And what I mean is when the field is omitted, i.e. has no value (neither zero nor any other value).

Correct, fields that are missing are filled with zero bytes in GOFF. A count of zero means the record count is unspecified.

All non-specified fields are set to zero. (That's in GOFFYAML.cpp). I removed setting the block counter automatically because it would be a value not specified in the input file. If this is useful then I can add a special label to use in the yaml file.

Thanks for the explanation. I'm not strongly against defaulting all omitted fields to zero. Other targets (like ELF and XCOFF) follow a mechanism to fill these fields with calculated values. If GOFF doesn't require it, I think that's acceptable.

With the ELF version of yaml2obj, the intent of the calculated fields is so that minimal YAML can be used to create a valid ELF for testing purposes. In many places, a 0 value as the default makes sense though. This helps keep the signal-to-noise ratio in the tests down to a minimum. The aim should be the same here, so use 0 where a real value isn't needed for tools to be able to read the object, and use a calculated field where it is needed.

llvm/lib/ObjectYAML/GOFFYAML.cpp

EsmeYi · 2024-06-27T06:35:10Z

llvm/lib/ObjectYAML/GOFFEmitter.cpp

+  LogicalRecord LR(GW);
+  LR << zeros(45)                          // Reserved.
+     << binaryBe(ModHdr.ArchitectureLevel) // The architecture level.
+     << binaryBe(ModHdr.PropertiesLength)  // Length of module properties.


When this field is omitted in yaml, would it be better to fill it with a calculated value?

EsmeYi · 2024-06-27T06:36:01Z

llvm/lib/ObjectYAML/GOFFEmitter.cpp

+     << binaryBe(EndMod.ESDID)          // ESDID of the entry point.
+     << zeros(4)                        // Reserved.
+     << binaryBe(EndMod.Offset)         // Offset of entry point.
+     << binaryBe(EndMod.NameLength)     // Length of external name.


Ditto. It can be filled with a calculated value when it's omitted.

Same comment as above. And I don't think the name field can ever be zero. A private symbol gets the name " " (a single space) which the binder then turns into something specific to that part of the GOFF file. I've since forgotten the details, but they don't matter here.

kpneal · 2024-06-28T19:09:37Z

llvm/test/tools/yaml2obj/GOFF/end-long-name.yaml

+## 1 record fully used.
+## Flags = 0 => not a continuation, not continued.
+# CHECK2:      03 40 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+# CHECK2-NEXT: 00 00 00 00 00 00 00 00 00 00 c6 96 96 c2 81 99


The Name Length field is zero. From the documentation: "This length cannot be zero if a name is present."

Either we're writing invalid GOFF or the documentation needs an update.

Or perhaps a comment in the test that says we're purposely ignoring the name length field for this test.

kpneal · 2024-06-28T19:13:37Z

llvm/test/tools/yaml2obj/GOFF/end-only.yaml

@@ -0,0 +1,22 @@
+# RUN: yaml2obj %s | od -v -An -tx1 | FileCheck --ignore-case %s
+
+## Verify that the GOFF end record is correct, with all fields having a value.


Can you change this to state that we're verifying that all fields have the value we expect but that the GOFF is still invalid? We have a lot of developers who don't know GOFF and stating that the record "is correct" won't be helpful to them.

kpneal · 2024-06-28T19:17:27Z

llvm/test/tools/yaml2obj/GOFF/header-only.yaml

+- ModuleHeader:
+    ArchitectureLevel: 1
+    PropertiesLength: 3
+    Properties: 0333ff


Are module properties documented anywhere? The only example I've ever seen was GOFF produced by the Binder, and that GOFF had module properties that were english text. Given that, it doesn't seem right to treat this field as an opaque sequence of bytes in the yaml.

jh7370 · 2024-07-03T07:59:05Z

llvm/test/tools/yaml2obj/GOFF/end-long-name.yaml

@@ -0,0 +1,74 @@
+# RUN: awk 'BEGIN {str = sprintf("%47s", "")} {gsub("@FILL@", str); print}' < %s | \


I'm not sure how portable this awk line is, but in any case, I don't think it's needed, if all you're trying to do is to set a specific string for different cases. You can use yaml2obj's -D option to specify a specific value that replaces [[VARNAME]] in the fields. The variable can even have a default value. Finally, with a bit of additional work in yaml2obj, you can specify something like [[VARNAME=<none>]] and yaml2obj will treat the field as not being present at all (see ELF for some examples of all of the above).

jh7370 · 2024-07-03T07:59:55Z

llvm/test/tools/yaml2obj/GOFF/end-long-name.yaml

@@ -0,0 +1,74 @@
+# RUN: awk 'BEGIN {str = sprintf("%47s", "")} {gsub("@FILL@", str); print}' < %s | \
+# RUN:   yaml2obj | od -v -An -tx1 | \


I would prefer to have yaml2obj generate the output object on disk and then use od or whatever to check its contents. This makes it easier to debug the output file if the test starts failing.

jh7370 · 2024-07-03T08:00:20Z

llvm/test/tools/yaml2obj/GOFF/end-long-name.yaml

@@ -0,0 +1,74 @@
+# RUN: awk 'BEGIN {str = sprintf("%47s", "")} {gsub("@FILL@", str); print}' < %s | \
+# RUN:   yaml2obj | od -v -An -tx1 | \
+# RUN:   FileCheck --ignore-case --check-prefix CHECK1 %s


--ignore-case shouldn't be necessary if you use the right options with od?

jh7370 · 2024-07-03T08:04:29Z

llvm/test/tools/yaml2obj/GOFF/end-long-name.yaml

+# RUN:   FileCheck --ignore-case --check-prefix CHECK4 %s
+
+
+## Verify that the entry name is written correctly over multiple records.


Normally we put the comment before the test case run line that it's related to. Indeed, a common pattern in more recent yaml2obj tests is the following:

## High-level comment summarising the tests in the file as a whole. # RUN: General setup code, e.g. creating a test output directory. Not always needed. ## Comment describing individual test case. # RUN: Test case commands # CHECK: Check directive(s) for test case ## Second test case comment # RUN: Second test case commands # CHECK-2: Check directive(s) for second test case, if needed ... <Common YAML>

Variations on the above where multiple different YAML docs are needed tend to have that YAML at the end of the block of test cases that share it.

MaskRay · 2024-07-03T22:35:59Z

llvm/test/tools/yaml2obj/GOFF/invalid-record.yaml

@@ -0,0 +1,7 @@
+# RUN: not yaml2obj %s 2>&1 | FileCheck %s
+
+# CHECK: YAML:{{[0-9]*}}:{{[0-9]*}}: error: unknown record type name 'Foo'


Note, we usually omit the part before error:.

Just use CHECK: error: xxx (k typo below?)

[GOFF] Refactor writing GOFF records

da97d3d

A GOFF file is basically a sequence of records. Change the YAML mapping for GOFF files to reflect this. As result, creating files for testing, e.g. without a module header, becomes possible.

llvmbot added the objectyaml label May 30, 2024

redstar requested a review from jh7370 May 30, 2024 18:07

redstar requested review from MaskRay and chapuni May 30, 2024 18:07

This was referenced May 30, 2024

[yaml2obj][GOFF] Fix writing GOFF header #91383

Closed

[GOFF] Add writing text records to yaml2obj #93863

Open

redstar added 2 commits May 31, 2024 10:50

Replace combination of sed and bash expression with awk in LIT test

d850899

Change awk statement in LIT test again

ebd2444

jh7370 reviewed Jun 6, 2024

View reviewed changes

redstar added 2 commits June 7, 2024 13:14

Changes based on reviewer comments.

a778e25

Remove additional braces around argument list

6f26aa8

Everybody0523 reviewed Jun 8, 2024

View reviewed changes

Small changes

641f644

- Change comment in test case based on reviewer comment - Add a test case testing error message for conversion error

jh7370 reviewed Jun 20, 2024

View reviewed changes

llvm/lib/ObjectYAML/GOFFEmitter.cpp Outdated Show resolved Hide resolved

llvm/test/tools/yaml2obj/GOFF/end-invalid-name.yaml Show resolved Hide resolved

llvm/lib/ObjectYAML/GOFFYAML.cpp Outdated Show resolved Hide resolved

Updates based on reviewer comments

020589f

- Reword error message - Put quotes around value in error message

redstar requested a review from kpneal June 21, 2024 16:51

chenzheng1030 requested review from EsmeYi and diggerlin June 24, 2024 00:42

EsmeYi reviewed Jun 26, 2024

View reviewed changes

EsmeYi reviewed Jun 27, 2024

View reviewed changes

kpneal reviewed Jun 28, 2024

View reviewed changes

jh7370 reviewed Jul 3, 2024

View reviewed changes

MaskRay reviewed Jul 3, 2024

View reviewed changes

	## Flags = 0 => no continuation, not continued.
	## Flags = 0 => not a continuation, not continued.

		@@ -0,0 +1,22 @@
		# RUN: yaml2obj %s \| od -v -An -tx1 \| FileCheck --ignore-case %s

		## Verify that the GOFF end record is correct, with all fields having a value.

		@@ -0,0 +1,74 @@
		# RUN: awk 'BEGIN {str = sprintf("%47s", "")} {gsub("@FILL@", str); print}' < %s \| \

		@@ -0,0 +1,74 @@
		# RUN: awk 'BEGIN {str = sprintf("%47s", "")} {gsub("@FILL@", str); print}' < %s \| \
		# RUN: yaml2obj \| od -v -An -tx1 \| \

		# RUN: FileCheck --ignore-case --check-prefix CHECK4 %s


		## Verify that the entry name is written correctly over multiple records.

		@@ -0,0 +1,7 @@
		# RUN: not yaml2obj %s 2>&1 \| FileCheck %s

		# CHECK: YAML:{{[0-9]}}:{{[0-9]}}: error: unknown record type name 'Foo'

[GOFF] Refactor writing GOFF records #93855

Are you sure you want to change the base?

[GOFF] Refactor writing GOFF records #93855

Uh oh!

Conversation

redstar commented May 30, 2024

Uh oh!

llvmbot commented May 30, 2024

Uh oh!

jh7370 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

jh7370 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!