|
1 | 1 | /* |
2 | | - * Copyright (c) 2015-2018, Intel Corporation |
| 2 | + * Copyright (c) 2015-2019, Intel Corporation |
3 | 3 | * |
4 | 4 | * Redistribution and use in source and binary forms, with or without |
5 | 5 | * modification, are permitted provided that the following conditions are met: |
|
56 | 56 | #include "parser/unsupported.h" |
57 | 57 | #include "parser/utf8_validate.h" |
58 | 58 | #include "rose/rose_build.h" |
| 59 | +#include "rose/rose_internal.h" |
59 | 60 | #include "som/slot_manager_dump.h" |
60 | 61 | #include "util/bytecode_ptr.h" |
61 | 62 | #include "util/compile_error.h" |
62 | 63 | #include "util/target_info.h" |
63 | 64 | #include "util/verify_types.h" |
| 65 | +#include "util/ue2string.h" |
64 | 66 |
|
65 | 67 | #include <algorithm> |
66 | 68 | #include <cassert> |
@@ -107,6 +109,46 @@ void validateExt(const hs_expr_ext &ext) { |
107 | 109 |
|
108 | 110 | } |
109 | 111 |
|
| 112 | +void ParsedLitExpression::parseLiteral(const char *expression, size_t len, |
| 113 | + bool nocase) { |
| 114 | + const char *c = expression; |
| 115 | + for (size_t i = 0; i < len; i++) { |
| 116 | + lit.push_back(*c, nocase); |
| 117 | + c++; |
| 118 | + } |
| 119 | +} |
| 120 | + |
| 121 | +ParsedLitExpression::ParsedLitExpression(unsigned index_in, |
| 122 | + const char *expression, |
| 123 | + size_t expLength, unsigned flags, |
| 124 | + ReportID report) |
| 125 | + : expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false, |
| 126 | + SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) { |
| 127 | + // For pure literal expression, below 'HS_FLAG_'s are unuseful: |
| 128 | + // DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET |
| 129 | + |
| 130 | + if (flags & ~HS_FLAG_ALL) { |
| 131 | + DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags); |
| 132 | + throw CompileError("Unrecognised flag."); |
| 133 | + } |
| 134 | + |
| 135 | + // FIXME: we disallow highlander + SOM, see UE-1850. |
| 136 | + if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) { |
| 137 | + throw CompileError("HS_FLAG_SINGLEMATCH is not supported in " |
| 138 | + "combination with HS_FLAG_SOM_LEFTMOST."); |
| 139 | + } |
| 140 | + |
| 141 | + // Set SOM type. |
| 142 | + if (flags & HS_FLAG_SOM_LEFTMOST) { |
| 143 | + expr.som = SOM_LEFT; |
| 144 | + } |
| 145 | + |
| 146 | + // Transfer expression text into ue2_literal. |
| 147 | + bool nocase = flags & HS_FLAG_CASELESS ? true : false; |
| 148 | + parseLiteral(expression, expLength, nocase); |
| 149 | + |
| 150 | +} |
| 151 | + |
110 | 152 | ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, |
111 | 153 | unsigned flags, ReportID report, |
112 | 154 | const hs_expr_ext *ext) |
@@ -345,6 +387,49 @@ void addExpression(NG &ng, unsigned index, const char *expression, |
345 | 387 | } |
346 | 388 | } |
347 | 389 |
|
| 390 | +void addLitExpression(NG &ng, unsigned index, const char *expression, |
| 391 | + unsigned flags, const hs_expr_ext *ext, ReportID id, |
| 392 | + size_t expLength) { |
| 393 | + assert(expression); |
| 394 | + const CompileContext &cc = ng.cc; |
| 395 | + DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s', len='%zu'\n", index, |
| 396 | + id, flags, expression, expLength); |
| 397 | + |
| 398 | + // Extended parameters are not supported for pure literal patterns. |
| 399 | + if (ext && ext->flags != 0LLU) { |
| 400 | + throw CompileError("Extended parameters are not supported for pure " |
| 401 | + "literal matching API."); |
| 402 | + } |
| 403 | + |
| 404 | + // Ensure that our pattern isn't too long (in characters). |
| 405 | + if (strlen(expression) > cc.grey.limitPatternLength) { |
| 406 | + throw CompileError("Pattern length exceeds limit."); |
| 407 | + } |
| 408 | + |
| 409 | + // filter out flags not supported by pure literal API. |
| 410 | + u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 | |
| 411 | + HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION | |
| 412 | + HS_FLAG_QUIET; |
| 413 | + |
| 414 | + if (flags & not_supported) { |
| 415 | + throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, " |
| 416 | + "HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are " |
| 417 | + "supported in literal API."); |
| 418 | + } |
| 419 | + |
| 420 | + // This expression must be a pure literal, we can build ue2_literal |
| 421 | + // directly based on expression text. |
| 422 | + ParsedLitExpression ple(index, expression, expLength, flags, id); |
| 423 | + |
| 424 | + // Feed the ue2_literal into Rose. |
| 425 | + const auto &expr = ple.expr; |
| 426 | + if (ng.addLiteral(ple.lit, expr.index, expr.report, expr.highlander, |
| 427 | + expr.som, expr.quiet)) { |
| 428 | + DEBUG_PRINTF("took pure literal\n"); |
| 429 | + return; |
| 430 | + } |
| 431 | +} |
| 432 | + |
348 | 433 | static |
349 | 434 | bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) { |
350 | 435 | const u32 minWidth = |
@@ -416,10 +501,13 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { |
416 | 501 | } |
417 | 502 |
|
418 | 503 |
|
419 | | -struct hs_database *build(NG &ng, unsigned int *length) { |
| 504 | +struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) { |
420 | 505 | assert(length); |
421 | 506 |
|
422 | 507 | auto rose = generateRoseEngine(ng); |
| 508 | + struct RoseEngine *roseHead = rose.get(); |
| 509 | + roseHead->pureLiteral = pureFlag; |
| 510 | + |
423 | 511 | if (!rose) { |
424 | 512 | throw CompileError("Unable to generate bytecode."); |
425 | 513 | } |
|
0 commit comments