Skip to content

track PC and assert at beginning of blocks #387

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 35 commits into from
May 22, 2025
Merged

track PC and assert at beginning of blocks #387

merged 35 commits into from
May 22, 2025

Conversation

katrinafyi
Copy link
Member

@katrinafyi katrinafyi commented Mar 28, 2025

this makes a fairly significant change to the GTIRB loader, with the goal of
supporting assertions about the PC at the beginning of each block, to make
sure that the PC matches the expected address.

this is done by changing the loader to maintain PC assignments and
inserting a default increment if there is no branch. each procedure in the
lifted program also gains a requires clause, to require PC is equal to its
entry point, and an ensures clause, to ensure PC is equal to R30.

this functionality is gated behind the --pc flag. this has 3 options:

  • none (the default) will not produce any PC statements (as close as possible, this is meant to match the previous behaviour before this PR)
  • keep will emit PC increment and assignment statements, but no assertions. this should let you inspect the PC behaviour without adding a verification burden. the PC operations are liable to be optimised away by BASIL transforms.
  • assert will emit PC statements, assertions at beginnings of blocks, and requires/ensures for all procedures.

example usage:

./mill run --load-directory-gtirb src/test/correct/cjump/gcc --dump-il basic --pc assert && cat basic-output.il  
memory mem : map bv8[bv64];
var CF : bv1;
var NF : bv1;
var R0 : bv64;
var R1 : bv64;
var R30 : bv64;
var VF : bv1;
var ZF : bv1;
var _PC : bv64;

let entry_procedure = main_1812;

proc main_1812() -> ()
{
  name = "main";
  address = 0x714;
  entry_block = "main_1812__0__YwDxm6NpRSaYnJwZf~bgAg";
  blocks = [
    block main_1812__0__YwDxm6NpRSaYnJwZf~bgAg {address = 0x714} [
      assert bveq(_PC:bv64, 0x714:bv64);
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R0: bv64 := 0x11000:bv64;
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R0: bv64 := bvadd(R0:bv64, 0x14:bv64);
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R1: bv64 := 0x1:bv64;
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      store le mem R0:bv64 extract(32, 0, R1:bv64) 32;
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R0: bv64 := 0x11000:bv64;
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R0: bv64 := bvadd(R0:bv64, 0x14:bv64);
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      var load3: bv32 := load le mem R0:bv64 32;
      R0: bv64 := zero_extend(32, load3:bv32);
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      var Cse0__5_3_7: bv32 := bvadd(extract(32, 0, R0:bv64), 0x0:bv32);
      VF: bv1 := bvnot(bvcomp(Cse0__5_3_7:bv32, Cse0__5_3_7:bv32));
      CF: bv1 := bvnot(bvcomp(zero_extend(1, Cse0__5_3_7:bv32), bvadd(zero_extend(1, extract(32, 0, R0:bv64)), 0x100000000:bv33)));
      ZF: bv1 := bvcomp(Cse0__5_3_7:bv32, 0x0:bv32);
      NF: bv1 := extract(32, 31, Cse0__5_3_7:bv32);
      goto(main_1812__0__YwDxm6NpRSaYnJwZf~bgAg_goto_main_1812__2__WXfzVKBzTQ2pcN9SDnwGAQ, main_1812__0__YwDxm6NpRSaYnJwZf~bgAg_goto_main_1812__1__YoxtYzmmTLarbLScWy3SCQ)
    ];
    block main_1812__0__YwDxm6NpRSaYnJwZf~bgAg_goto_main_1812__1__YoxtYzmmTLarbLScWy3SCQ [
      assume boolnot(bveq(ZF:bv1, 0x1:bv1));
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      assert bveq(_PC:bv64, 0x738:bv64);
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R0: bv64 := 0x11000:bv64;
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R0: bv64 := bvadd(R0:bv64, 0x18:bv64);
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R1: bv64 := 0x3:bv64;
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      store le mem R0:bv64 extract(32, 0, R1:bv64) 32;
      _PC: bv64 := 0x75c:bv64;
      goto(main_1812__1__YoxtYzmmTLarbLScWy3SCQ)
    ];
    block main_1812__1__YoxtYzmmTLarbLScWy3SCQ {address = 0x738} [
      goto(main_1812__3__RpUJmfB5RPyIGv0DK5At_g)
    ];
    block main_1812__0__YwDxm6NpRSaYnJwZf~bgAg_goto_main_1812__2__WXfzVKBzTQ2pcN9SDnwGAQ [
      assume bveq(ZF:bv1, 0x1:bv1);
      _PC: bv64 := 0x67c:bv64;
      assert bveq(_PC:bv64, 0x74c:bv64); // <===============
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R0: bv64 := 0x11000:bv64;
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R0: bv64 := bvadd(R0:bv64, 0x18:bv64);
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R1: bv64 := 0x2:bv64;
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      store le mem R0:bv64 extract(32, 0, R1:bv64) 32;
      goto(main_1812__2__WXfzVKBzTQ2pcN9SDnwGAQ)
    ];
    block main_1812__2__WXfzVKBzTQ2pcN9SDnwGAQ {address = 0x74c} [
      goto(main_1812__3__RpUJmfB5RPyIGv0DK5At_g)
    ];
    block main_1812__3__RpUJmfB5RPyIGv0DK5At_g {address = 0x75c} [
      assert bveq(_PC:bv64, 0x75c:bv64);
      _PC: bv64 := bvadd(_PC:bv64, 0x4:bv64);
      R0: bv64 := 0x0:bv64;
      _PC: bv64 := R30:bv64;
      goto(main_1812_basil_return)
    ];
    block main_1812_basil_return [
      return ()
    ]
  ];
};

however, the checked-in gts files were generated with a bug leading to incorrect PC addresses (UQ-PAC/gtirb-semantics#27). this leads to discrepancies like the lines indicated by <===. this is a direct jump, but the addresses from the branch instruction and the block address differ.

in some cases (but not this example), this leads to false verification failures. here is a verification which will fail:

./mill run --load-directory-gtirb src/test/correct/ifbranches/clang --dump-il asd --pc assert --verify

by using the updated test case files from #288, you can observe the verification succeeding.

extra changes affecting existing code

  • change SpecificationLoader parser to also understand _PC as an implicit global variable (like the registers)
  • added a helper function into BProgram which executes Boogie to verify the program
  • added List- and Map-related functional helper utils.
  • changed IRToBoogie and IRToBoogieNoVC to handle requiresExpr/ensuresExpr
  • fixed undeclared mem issue in IRToBoogieNoVC

bugs / todo

  • simplifier and/or parameter form is broken with the additional requires/ensures clauses which reference _PC and R30. it doesn't seem to want to inject these into the parameter list and they're leftover as global variables. without any connection to the boogie procedure, this leads to verification failure.
  • ci is not happy for some reason. it works on my machine ;-;

katrinafyi added 6 commits May 9, 2025 17:49
This reverts commit 1765045.
this makes a fairly significant change to the GTIRB loader by inserting
PC increment statements after instructions and checking the PC at the
entry of blocks. this is done by changing the loader to maintain PC and
branchtaken assignments, and inserting a default increment if there is
no branch.

this surely has bugs
@katrinafyi
Copy link
Member Author

idk why it produces invalid boogie. it's because of an empty target list but idk why

  ./src/test/incorrect/basicassign/gcc_O2/basicassign_gtirb.bpl(112,5): error: Error: goto label 'goto' is undefined

@katrinafyi katrinafyi changed the title feat: maintain program counter and assert at beginning of blocks track PC and assert at beginning of blocks May 12, 2025
this fixes the problemetic cross pattern we saw, where there were extra
blocks inserted before the original successor blocks.

still remaining: some empty blocks?

     R0, Gamma_R0 := zero_extend32_32(load14), Gamma_load14;
     assert Gamma_R0;
     goto main_1536__0__FF2SjjvqSsOWEiG9v2p92Q_goto_main_1536__2__MYSV1omOQM2b08Jj~q4Iuw, main_1536__0__FF2SjjvqSsOWEiG9v2p92Q_goto_main_1536__1__5dIr9jyaTQaHqLa9MRdoFQ;
+  main_1536__3__smO~Vm6qTomoCpzw26RqNg:
+    assume {:captureState "main_1536__3__smO~Vm6qTomoCpzw26RqNg"} true;
+    goto ;
   main_1536__0__FF2SjjvqSsOWEiG9v2p92Q_goto_main_1536__1__5dIr9jyaTQaHqLa9MRdoFQ:
     assume {:captureState "main_1536__0__FF2SjjvqSsOWEiG9v2p92Q_goto_main_1536__1__5dIr9jyaTQaHqLa9MRdoFQ"} true;
     assume (!((R0[32:0] == 0bv32) == false));
this lets us catch cases outside of gtirb, and it allows
the RemoveUnreachableBlocks phase to run before printing
the warnings.
this fixes the DifferentialAnalysisTest seeing program counters
in its loaded IR, but it is maybe undesirable to do a transformation
in the load() method.

regardless, the current approach (of keep PC, then remove if unwanted)
seems to necessitate this, as later parts of the runutils pipeline
make assumptions that PC wont be present and fail in unexpected ways
otherwise.
@katrinafyi katrinafyi marked this pull request as ready for review May 21, 2025 07:39
@ailrst ailrst merged commit b45683f into main May 22, 2025
10 checks passed
@katrinafyi katrinafyi deleted the gts-pc2 branch May 24, 2025 04:34
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants