Skip to content

Commit 5bf6450

Browse files
authored
Merge pull request #8 from sansecio/wdg/vendor
Convert indexing from MD5 to CRC32
2 parents 230d829 + 4b712f9 commit 5bf6450

23 files changed

+218
-152
lines changed

.composer/config.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"config": {
3+
"allow-plugins": true
4+
},
5+
"repositories": {
6+
"0": {
7+
"type": "composer",
8+
"url": "https://repo.magento.com/"
9+
}
10+
}
11+
}

.envrc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
export COMPOSER_HOME=$PWD/.composer
2+
export COMPOSER_IGNORE_PLATFORM_REQS=1

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
!.composer/config.json
2+
.composer/*
13
.vscode
24
*.bin
35
/db
6+
/build/*

README.md

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
![](https://buq.eu/screenshots/6595XfnX5wwUPzbFQGkU0GgN.png)
44

5-
A forensic tool to quickly find unauthorized modifications in a Magento 1 or 2 code base. Corediff compares each line of code with a database of 1.7M legitimate code hashes and shows you the lines that have not been seen before. A bit like [@NYT_first_said](https://maxbittker.github.io/clear-pipes/).
5+
A forensic tool to quickly find unauthorized modifications in an open source code base, such as Magento. Corediff compares each line of code with a database of 1.7M legitimate code hashes and shows you the lines that have not been seen before. A bit like [@NYT_first_said](https://maxbittker.github.io/clear-pipes/).
66

77
> _"Corediff saved us countless hours"_
88
@@ -41,25 +41,29 @@ Use our binary package (available for Linux & Mac, arm64 & amd64)
4141
osarch=$(uname -sm | tr 'LD ' 'ld-')
4242
curl https://sansec.io/downloads/$osarch/corediff -O
4343
chmod 755 corediff
44-
./corediff <magento_path> | less -SR
44+
./corediff <store-path> | less -SR
4545
```
4646

47-
Or compile from source (requires Go 1.13+):
47+
Or compile from source (requires recent Go version):
4848

4949
```sh
50-
git clone https://github.com/sansecio/magento-corediff.git
51-
cd magento-corediff
52-
go run . <magento_path>
50+
git clone https://github.com/sansecio/corediff.git
51+
cd corediff
52+
go run . <store-path>
5353
```
5454

55-
At the first run, `corediff` will automatically download the Sansec hash database (~26MB).
55+
At the first run, `corediff` will automatically download the Sansec hash database.
5656

5757
# Community contributed datasets
5858

59-
[@fros_it](https://twitter.com/fros_it) has kindly contributed hashes for his collection of Magento Connect extensions, including all available historical copies. Download the [extension hash database](https://api.sansec.io/downloads/corediff-db/m1ext.db) here (62MB) and use it like this:
59+
[@fros_it](https://twitter.com/fros_it) has kindly contributed hashes for his collection of Magento Connect extensions, including all available historical copies. Download the [extension hash database](https://sansec.io/downloads/corediff-db/m1ext.db) here (62MB) and use it like this:
6060

6161
![](https://buq.eu/screenshots/RXdQ1Mmg5KliivMtK6DlHTcP.png)
6262

63+
# Todo
64+
65+
- [ ] Compression of hash db? Eg https://github.com/Smerity/govarint, https://github.com/bits-and-blooms/bloom
66+
6367
# Contributing
6468

6569
Adding or maintaining hashes?
@@ -80,4 +84,4 @@ Contributions welcome! Naturally, we only accept hashes from trusted sources. [C
8084

8185
Sansec's flagship software [eComscan](https://sansec.io/?corediff) is used by ecommerce agencies, law enforcement and PCI forensic investigators. We are proud to open source many of our internal tools and hope that it will benefit our partners and customers. Malware contributions welcome.
8286

83-
(C) 2022 [Sansec BV](https://sansec.io/?corediff) // [email protected]
87+
(C) 2023 [Sansec BV](https://sansec.io/?corediff) // [email protected]

corediff.go

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,46 +2,48 @@ package main
22

33
import (
44
"bufio"
5+
"encoding/binary"
56
"fmt"
7+
"io"
68
"log"
79
"os"
810
"path/filepath"
911
)
1012

11-
func loadDB(path string) hashDB {
13+
var placeholder = struct{}{}
1214

15+
func loadDB(path string) hashDB {
1316
m := make(hashDB)
14-
1517
f, err := os.Open(path)
1618
if os.IsNotExist(err) {
1719
return m
20+
} else if err != nil {
21+
log.Fatal(err)
1822
}
19-
check(err)
2023
defer f.Close()
2124
reader := bufio.NewReader(f)
2225
for {
23-
b := make([]byte, 16)
24-
n, err := reader.Read(b)
25-
if n == 0 {
26+
var b uint32
27+
err = binary.Read(reader, binary.LittleEndian, &b)
28+
if err == io.EOF {
2629
break
30+
} else if err != nil {
31+
log.Fatal(err)
2732
}
28-
check(err)
29-
var b2 [16]byte
30-
copy(b2[:], b) // need to convert to array first
31-
m[b2] = true
33+
m[b] = placeholder
3234
}
3335
return m
3436
}
3537

3638
func saveDB(path string, db hashDB) {
3739
f, err := os.Create(path)
40+
if err != nil {
41+
log.Fatal(err)
42+
}
3843
defer f.Close()
39-
check(err)
4044
for k := range db {
41-
n, err := f.Write(k[:])
42-
check(err)
43-
if n != 16 {
44-
log.Fatal("Wrote unexpected number of bytes?")
45+
if err := binary.Write(f, binary.LittleEndian, k); err != nil {
46+
log.Fatal(err)
4547
}
4648
}
4749
}
@@ -60,10 +62,10 @@ func parseFile(path, relPath string, db hashDB, updateDB bool) (hits []int, line
6062
copy(l, x)
6163
lines = append(lines, l)
6264
h := hash(normalizeLine(l))
63-
if !db[h] {
65+
if _, ok := db[h]; !ok {
6466
hits = append(hits, i)
6567
if updateDB {
66-
db[h] = true
68+
db[h] = placeholder
6769
}
6870
}
6971
}
@@ -103,7 +105,11 @@ func checkPath(root string, db hashDB, args *baseArgs) *walkStats {
103105

104106
// Only do path checking for non-root elts
105107
if path != root && !args.IgnorePaths {
106-
if !db[pathHash(relPath)] {
108+
109+
_, foundInDb := db[pathHash(relPath)]
110+
shouldExclude := pathIsExcluded(relPath)
111+
112+
if !foundInDb || shouldExclude {
107113
stats.filesCustomCode++
108114
logVerbose(grey(" ? ", relPath))
109115
return nil
@@ -163,7 +169,7 @@ func addPath(root string, db hashDB, args *baseArgs) {
163169
// If relPath has valid ext, add hash of "path:<relPath>" to db
164170
// Never add root path (possibly file)
165171
if !args.IgnorePaths && path != root && !pathIsExcluded(relPath) {
166-
db[pathHash(relPath)] = true
172+
db[pathHash(relPath)] = placeholder
167173
}
168174

169175
hits, _ := parseFile(path, relPath, db, true)
@@ -184,15 +190,15 @@ func main() {
184190
args := setup()
185191
db := loadDB(args.Database)
186192

187-
logInfo(boldwhite("\nMagento Corediff loaded ", len(db), " precomputed hashes. (C) 2020-2022 [email protected]"))
193+
logInfo(boldwhite("Corediff loaded ", len(db), " precomputed hashes. (C) 2020-2023 [email protected]"))
188194
logInfo("Using database:", args.Database, "\n")
189195

190196
if args.Merge {
191197
for _, p := range args.Path.Path {
192198
db2 := loadDB(p)
193199
logInfo("Merging", filepath.Base(p), "with", len(db2), "entries ..")
194200
for k := range db2 {
195-
db[k] = true
201+
db[k] = placeholder
196202
}
197203
}
198204
logInfo("Saving", args.Database, "with a total of", len(db), "entries.")
@@ -221,5 +227,4 @@ func main() {
221227
logInfo(" - Files without code :", stats.filesNoCode)
222228
}
223229
}
224-
logInfo()
225230
}

corediff_test.go

Lines changed: 18 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,47 +5,50 @@ import (
55
"fmt"
66
"log"
77
"os"
8-
"reflect"
98
"testing"
9+
10+
"github.com/stretchr/testify/assert"
1011
)
1112

12-
func digest(b [16]byte) string {
13+
func digest(b uint32) string {
1314
return fmt.Sprintf("%x", b)
1415
}
1516

1617
func Test_parseFile(t *testing.T) {
17-
hits, lines := parseFile("fixture/odd-encoding.js", "n/a", hashDB{}, false)
18-
fmt.Println("succeeded", len(hits), len(lines))
18+
hdb := hashDB{}
19+
updateDB := true
20+
hits, lines := parseFile("fixture/docroot/odd-encoding.js", "n/a", hdb, updateDB)
21+
assert.Equal(t, 220, len(hdb))
22+
assert.Equal(t, 220, len(hits))
23+
assert.Equal(t, 471, len(lines))
1924
}
2025

2126
func Test_hash(t *testing.T) {
2227
tests := []struct {
2328
args []byte
2429
want string
2530
}{
26-
{
27-
[]byte("banaan"),
28-
"31d674be46e1ba6b54388a671c09accb",
29-
},
31+
{[]byte("banaan"), "14ac6691"},
3032
}
3133
for _, tt := range tests {
3234
t.Run(string(tt.args), func(t *testing.T) {
33-
if got := digest(hash(tt.args)); !reflect.DeepEqual(got, tt.want) {
35+
if got := digest(hash(tt.args)); got != tt.want {
3436
t.Errorf("hash() = %x (%v), want %x", got, got, tt.want)
3537
}
3638
})
3739
}
3840
}
3941

4042
func Test_vendor_bug(t *testing.T) {
41-
db := loadDB("m233.db")
42-
h := [16]byte{145, 49, 107, 134, 191, 186, 29, 135, 27, 49, 110, 122, 36, 242, 133, 65}
43-
fmt.Println("hash is", h)
44-
fmt.Println("hash in db:", db[h])
45-
43+
db := loadDB("fixture/sample.db")
44+
assert.Len(t, db, 238)
45+
wantHash := uint32(3333369281)
46+
if _, ok := db[wantHash]; !ok {
47+
t.Error("hash not in db")
48+
}
4649
}
4750
func Test_Corruption(t *testing.T) {
48-
fh, _ := os.Open("fixture/sample")
51+
fh, _ := os.Open("fixture/docroot/sample")
4952
defer fh.Close()
5053

5154
lines := [][]byte{}
@@ -54,33 +57,11 @@ func Test_Corruption(t *testing.T) {
5457
for scanner.Scan() {
5558
x := scanner.Bytes()
5659
l := make([]byte, len(x))
57-
// Need to copy, underlying Scan array may change later
5860
copy(l, x)
59-
fmt.Printf("%s\n", l)
6061
lines = append(lines, l)
6162
}
6263
if err := scanner.Err(); err != nil {
6364
log.Fatal(err)
6465
}
6566

66-
fmt.Println("Scanning completed, lines:", len(lines))
67-
68-
for _, l := range lines {
69-
fmt.Printf("%s\n", l)
70-
}
71-
}
72-
73-
func Test_NoFileSource(t *testing.T) {
74-
lines := [][]byte{}
75-
76-
for i := 0; i < 70; i++ {
77-
line := fmt.Sprintf("LINE %3d =======================================================", i)
78-
lines = append(lines, []byte(line))
79-
}
80-
81-
fmt.Println("Scanning completed, lines:", len(lines))
82-
83-
for _, l := range lines {
84-
fmt.Printf("%s\n", l)
85-
}
8667
}
File renamed without changes.

0 commit comments

Comments
 (0)