Skip to content

Commit 0a1c5c4

Browse files
committed
Merge branch develop into master
2 parents aff7242 + 582fd30 commit 0a1c5c4

File tree

136 files changed

+30403
-379
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

136 files changed

+30403
-379
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ bin
3030

3131
# sigs dir is handled externally
3232
signatures
33+
# but not the regression tests
34+
!tools/hscollider/test_cases/signatures
3335

3436
# ignore pcre symlink if it exists
3537
pcre

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22

33
This is a list of notable changes to Hyperscan, in reverse chronological order.
44

5+
## [4.7.0] 2018-01-24
6+
- Introduced hscollider pattern testing tool, for validating Hyperscan match
7+
behaviour against PCRE.
8+
- Introduced hscheck pattern compilation tool.
9+
- Introduced hsdump development tool for producing information about Hyperscan
10+
pattern compilation.
11+
- New API feature: extended approximate matching support for Hamming distance.
12+
- Bugfix for issue #69: Force C++ linkage in Xcode.
13+
- Bugfix for issue #73: More documentation for `hs_close_stream()`.
14+
- Bugfix for issue #78: Fix for fat runtime initialisation when used as a
15+
shared library.
16+
517
## [4.6.0] 2017-09-22
618
- New API feature: stream state compression. This allows the user to compress
719
and restore state for streams to reduce memory usage.

CMakeLists.txt

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 2.8.11)
22
project (hyperscan C CXX)
33

44
set (HS_MAJOR_VERSION 4)
5-
set (HS_MINOR_VERSION 6)
5+
set (HS_MINOR_VERSION 7)
66
set (HS_PATCH_VERSION 0)
77
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
88

@@ -30,7 +30,7 @@ else()
3030
message(STATUS "Build type ${CMAKE_BUILD_TYPE}")
3131
endif()
3232

33-
if(CMAKE_BUILD_TYPE MATCHES RELEASE|RELWITHDEBINFO)
33+
if(CMAKE_BUILD_TYPE MATCHES RELEASE|RELWITHDEBINFO|MINSIZEREL)
3434
set(RELEASE_BUILD TRUE)
3535
else()
3636
set(RELEASE_BUILD FALSE)
@@ -218,8 +218,13 @@ else()
218218
endif()
219219

220220
if(OPTIMISE)
221-
set(OPT_C_FLAG "-O3")
222-
set(OPT_CXX_FLAG "-O2")
221+
if (NOT CMAKE_BUILD_TYPE MATCHES MINSIZEREL)
222+
set(OPT_C_FLAG "-O3")
223+
set(OPT_CXX_FLAG "-O2")
224+
else ()
225+
set(OPT_C_FLAG "-Os")
226+
set(OPT_CXX_FLAG "-Os")
227+
endif ()
223228
else()
224229
set(OPT_C_FLAG "-O0")
225230
set(OPT_CXX_FLAG "-O0")
@@ -423,10 +428,10 @@ endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
423428

424429
if(NOT WIN32)
425430
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
426-
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable=remark")
431+
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 279 -diag-disable=remark")
427432
endif()
428433
if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
429-
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable=remark")
434+
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable 279 -diag-disable=remark")
430435
endif()
431436
endif()
432437

@@ -1271,25 +1276,42 @@ if (NOT BUILD_SHARED_LIBS)
12711276
endif()
12721277

12731278
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
1279+
set(hs_shared_SRCS
1280+
src/hs_version.c
1281+
src/hs_valid_platform.c
1282+
$<TARGET_OBJECTS:hs_compile_shared>)
1283+
1284+
if (XCODE)
1285+
# force this lib to use C++ linkage
1286+
add_custom_command(OUTPUT empty.cxx
1287+
COMMAND ${CMAKE_COMMAND} -E touch empty.cxx)
1288+
set (hs_shared_SRCS ${hs_shared_SRCS} empty.cxx)
1289+
endif (XCODE)
1290+
12741291
if (NOT FAT_RUNTIME)
1275-
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
1276-
$<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
1277-
else()
1278-
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
1279-
$<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_common_shared>
1292+
set(hs_shared_SRCS
1293+
${hs_shared_SRCS}
1294+
$<TARGET_OBJECTS:hs_exec_shared>)
1295+
else ()
1296+
set(hs_shared_SRCS
1297+
${hs_shared_SRCS}
1298+
$<TARGET_OBJECTS:hs_exec_common_shared>
12801299
${RUNTIME_SHLIBS})
1300+
endif ()
1301+
1302+
add_library(hs_shared SHARED ${hs_shared_SRCS})
12811303

1282-
endif()
12831304
add_dependencies(hs_shared ragel_Parser)
12841305
set_target_properties(hs_shared PROPERTIES
12851306
OUTPUT_NAME hs
12861307
VERSION ${LIB_VERSION}
12871308
SOVERSION ${LIB_SOVERSION}
12881309
MACOSX_RPATH ON)
1289-
install(TARGETS hs_shared
1290-
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
1291-
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
1292-
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
1310+
1311+
install(TARGETS hs_shared
1312+
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
1313+
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
1314+
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
12931315
endif()
12941316

12951317
# used by tools and other targets

cmake/pcre.cmake

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# first look in pcre-$version or pcre subdirs
2+
if (PCRE_SOURCE)
3+
# either provided on cmdline or we've seen it already
4+
set (PCRE_BUILD_SOURCE TRUE)
5+
elseif (EXISTS ${PROJECT_SOURCE_DIR}/pcre-${PCRE_REQUIRED_VERSION})
6+
set (PCRE_SOURCE ${PROJECT_SOURCE_DIR}/pcre-${PCRE_REQUIRED_VERSION})
7+
set (PCRE_BUILD_SOURCE TRUE)
8+
elseif (EXISTS ${PROJECT_SOURCE_DIR}/pcre)
9+
set (PCRE_SOURCE ${PROJECT_SOURCE_DIR}/pcre)
10+
set (PCRE_BUILD_SOURCE TRUE)
11+
endif()
12+
13+
if (PCRE_BUILD_SOURCE)
14+
if (NOT IS_ABSOLUTE ${PCRE_SOURCE})
15+
set(PCRE_SOURCE "${CMAKE_BINARY_DIR}/${PCRE_SOURCE}")
16+
endif ()
17+
set (saved_INCLUDES "${CMAKE_REQUIRED_INCLUDES}")
18+
set (CMAKE_REQUIRED_INCLUDES "${CMAKE_REQUIRED_INCLUDES} ${PCRE_SOURCE}")
19+
20+
if (PCRE_CHECKED)
21+
set(PCRE_INCLUDE_DIRS ${PCRE_SOURCE} ${PROJECT_BINARY_DIR}/pcre)
22+
set(PCRE_LDFLAGS -L"${LIBDIR}" -lpcre)
23+
24+
# already processed this file and set up pcre building
25+
return()
26+
endif ()
27+
28+
# first, check version number
29+
CHECK_C_SOURCE_COMPILES("#include <pcre.h.generic>
30+
#if PCRE_MAJOR != ${PCRE_REQUIRED_MAJOR_VERSION} || PCRE_MINOR != ${PCRE_REQUIRED_MINOR_VERSION}
31+
#error Incorrect pcre version
32+
#endif
33+
main() {}" CORRECT_PCRE_VERSION)
34+
set (CMAKE_REQUIRED_INCLUDES "${saved_INCLUDES}")
35+
36+
if (NOT CORRECT_PCRE_VERSION)
37+
unset(CORRECT_PCRE_VERSION CACHE)
38+
message(STATUS "Incorrect version of pcre - version ${PCRE_REQUIRED_VERSION} is required")
39+
return ()
40+
else()
41+
message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} - building from source.")
42+
endif()
43+
44+
# PCRE compile options
45+
option(PCRE_BUILD_PCRECPP OFF)
46+
option(PCRE_BUILD_PCREGREP OFF)
47+
option(PCRE_SHOW_REPORT OFF)
48+
set(PCRE_SUPPORT_UNICODE_PROPERTIES ON CACHE BOOL "Build pcre with unicode")
49+
add_subdirectory(${PCRE_SOURCE} ${PROJECT_BINARY_DIR}/pcre EXCLUDE_FROM_ALL)
50+
set(PCRE_INCLUDE_DIRS ${PCRE_SOURCE} ${PROJECT_BINARY_DIR}/pcre)
51+
set(PCRE_LDFLAGS -L"${LIBDIR}" -lpcre)
52+
else ()
53+
# pkgconf should save us
54+
find_package(PkgConfig)
55+
pkg_check_modules(PCRE libpcre=${PCRE_REQUIRED_VERSION})
56+
if (PCRE_FOUND)
57+
message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION}")
58+
else ()
59+
message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} not found")
60+
return ()
61+
endif ()
62+
endif (PCRE_BUILD_SOURCE)
63+
64+
set (PCRE_CHECKED TRUE PARENT_SCOPE)

cmake/sqlite3.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ else()
4444
# build sqlite as a static lib to compile into our test programs
4545
add_library(sqlite3_static STATIC "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
4646
if (NOT WIN32)
47-
set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION")
47+
set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-error -Wno-extra -Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION")
4848
endif()
4949
endif()
5050
endif()

doc/dev-reference/_static/hyperscan.css

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,9 @@
22
.regexp {
33
color: darkred !important;
44
}
5+
6+
/* Avoid (the alabaster theme default) Goudy Old Style, which renders in
7+
* italics on some Mac/Safari systems. */
8+
body {
9+
font-family: 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif;
10+
}

doc/dev-reference/compilation.rst

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ which provides the following fields:
287287
* ``min_length``: The minimum match length (from start to end) required to
288288
successfully match this expression.
289289
* ``edit_distance``: Match this expression within a given Levenshtein distance.
290+
* ``hamming_distance``: Match this expression within a given Hamming distance.
290291
291292
These parameters either allow the set of matches produced by a pattern to be
292293
constrained at compile time (rather than relying on the application to process
@@ -299,10 +300,15 @@ and a ``max_offset`` of 15 will not produce matches when scanned against
299300
streams ``foo0123bar`` or ``foo0123456bar``.
300301
301302
Similarly, the pattern :regexp:`/foobar/` when given an ``edit_distance`` of 2
302-
will produce matches when scanned against ``foobar``, ``fooba``, ``fobr``,
303-
``fo_baz``, ``foooobar``, and anything else that lies within edit distance of 2
304-
(as defined by Levenshtein distance). For more details, see the
305-
:ref:`approximate_matching` section.
303+
will produce matches when scanned against ``foobar``, ``f00bar``, ``fooba``,
304+
``fobr``, ``fo_baz``, ``foooobar``, and anything else that lies within edit
305+
distance of 2 (as defined by Levenshtein distance).
306+
307+
When the same pattern :regexp:`/foobar/` is given a ``hamming_distance`` of 2,
308+
it will produce matches when scanned against ``foobar``, ``boofar``,
309+
``f00bar``, and anything else with at most two characters substituted from the
310+
original pattern. For more details, see the :ref:`approximate_matching`
311+
section.
306312
307313
=================
308314
Prefiltering Mode
@@ -377,7 +383,7 @@ The :c:type:`hs_platform_info_t` structure has two fields:
377383
#. ``cpu_features``: This allows the application to specify a mask of CPU
378384
features that may be used on the target platform. For example,
379385
:c:member:`HS_CPU_FEATURES_AVX2` can be specified for Intel\ |reg| Advanced
380-
Vector Extensions +2 (Intel\ |reg| AVX2) instruction set support. If a flag
386+
Vector Extensions 2 (Intel\ |reg| AVX2) instruction set support. If a flag
381387
for a particular CPU feature is specified, the database will not be usable on
382388
a CPU without that feature.
383389
@@ -398,13 +404,20 @@ follows:
398404
399405
#. **Edit distance** is defined as Levenshtein distance. That is, there are
400406
three possible edit types considered: insertion, removal and substitution.
401-
More formal description can be found on
402-
`Wikipedia <https://en.wikipedia.org/wiki/Levenshtein_distance>`_.
407+
A more formal description can be found on
408+
`Wikipedia <https://en.wikipedia.org/wiki/Levenshtein_distance>`__.
409+
410+
#. **Hamming distance** is the number of positions by which two strings of
411+
equal length differ. That is, it is the number of substitutions required to
412+
convert one string to the other. There are no insertions or removals when
413+
approximate matching using a Hamming distance. A more formal description can
414+
be found on
415+
`Wikipedia <https://en.wikipedia.org/wiki/Hamming_distance>`__.
403416
404-
#. **Approximate matching** will match all *corpora* within a given edit
405-
distance. That is, given a pattern, approximate matching will match anything
406-
that can be edited to arrive at a corpus that exactly matches the original
407-
pattern.
417+
#. **Approximate matching** will match all *corpora* within a given edit or
418+
Hamming distance. That is, given a pattern, approximate matching will match
419+
anything that can be edited to arrive at a corpus that exactly matches the
420+
original pattern.
408421
409422
#. **Matching semantics** are exactly the same as described in :ref:`semantics`.
410423
@@ -437,7 +450,9 @@ matching support. Here they are, in a nutshell:
437450
reduce to so-called "vacuous" patterns (patterns that match everything). For
438451
example, pattern :regexp:`/foo/` with edit distance 3, if implemented,
439452
would reduce to matching zero-length buffers. Such patterns will result in a
440-
"Pattern cannot be approximately matched" compile error.
453+
"Pattern cannot be approximately matched" compile error. Approximate
454+
matching within a Hamming distance does not remove symbols, so will not
455+
reduce to a vacuous pattern.
441456
* Finally, due to the inherent complexities of defining matching behavior,
442457
approximate matching implements a reduced subset of regular expression
443458
syntax. Approximate matching does not support UTF-8 (and other

doc/dev-reference/conf.py.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ master_doc = 'index'
4444

4545
# General information about the project.
4646
project = u'Hyperscan'
47-
copyright = u'2015-2017, Intel Corporation'
47+
copyright = u'2015-2018, Intel Corporation'
4848

4949
# The version info for the project you're documenting, acts as replacement for
5050
# |version| and |release|, also used in various other places throughout the

doc/dev-reference/copyright.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ and/or other countries.
3030

3131
\*Other names and brands may be claimed as the property of others.
3232

33-
Copyright |copy| 2015-2017, Intel Corporation. All rights reserved.
33+
Copyright |copy| 2015-2018, Intel Corporation. All rights reserved.

doc/dev-reference/hyperscan.doxyfile.in

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@ EXCLUDE_PATTERNS =
820820
# Note that the wildcards are matched against the file with absolute path, so to
821821
# exclude all test directories use the pattern */test/*
822822

823-
EXCLUDE_SYMBOLS =
823+
EXCLUDE_SYMBOLS = "HS_CDECL"
824824

825825
# The EXAMPLE_PATH tag can be used to specify one or more files or directories
826826
# that contain example code fragments that are included (see the \include
@@ -1959,15 +1959,15 @@ ENABLE_PREPROCESSING = YES
19591959
# The default value is: NO.
19601960
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
19611961

1962-
MACRO_EXPANSION = NO
1962+
MACRO_EXPANSION = YES
19631963

19641964
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
19651965
# the macro expansion is limited to the macros specified with the PREDEFINED and
19661966
# EXPAND_AS_DEFINED tags.
19671967
# The default value is: NO.
19681968
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
19691969

1970-
EXPAND_ONLY_PREDEF = NO
1970+
EXPAND_ONLY_PREDEF = YES
19711971

19721972
# If the SEARCH_INCLUDES tag is set to YES, the include files in the
19731973
# INCLUDE_PATH will be searched if a #include is found.
@@ -1999,7 +1999,7 @@ INCLUDE_FILE_PATTERNS =
19991999
# recursively expanded use the := operator instead of the = operator.
20002000
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
20012001

2002-
PREDEFINED =
2002+
PREDEFINED = "HS_CDECL="
20032003

20042004
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
20052005
# tag can be used to specify a list of macro names that should be expanded. The

0 commit comments

Comments
 (0)