Skip to content

Commit 54be6c4

Browse files
author
akelday
committed
COMPRESS-514: SevenZFile header buffers over 2G
1 parent a5ccbd6 commit 54be6c4

File tree

6 files changed

+633
-77
lines changed

6 files changed

+633
-77
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* limitations under the License.
15+
*
16+
*/
17+
package org.apache.commons.compress.archivers.sevenz;
18+
19+
import java.io.IOException;
20+
import java.util.zip.CRC32;
21+
22+
/**
23+
* Represents a buffer for a {@link SevenZFile} header.
24+
*
25+
* @since 1.21
26+
*/
27+
interface HeaderBuffer {
28+
void get(byte[] dst) throws IOException;
29+
30+
int getInt() throws IOException;
31+
32+
long getLong() throws IOException;
33+
34+
int getUnsignedByte() throws IOException;
35+
36+
boolean hasCRC();
37+
38+
CRC32 getCRC() throws IOException;
39+
40+
long skipBytesFully(long bytesToSkip) throws IOException;
41+
}
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*
17+
*/
18+
package org.apache.commons.compress.archivers.sevenz;
19+
20+
import org.apache.commons.compress.utils.IOUtils;
21+
22+
import java.io.IOException;
23+
import java.io.InputStream;
24+
import java.nio.BufferUnderflowException;
25+
import java.nio.ByteBuffer;
26+
import java.nio.ByteOrder;
27+
import java.nio.channels.Channels;
28+
import java.nio.channels.ReadableByteChannel;
29+
import java.util.zip.CRC32;
30+
31+
/**
32+
* Enables little-endian primitive type reads from a {@link ReadableByteChannel}
33+
* or {@link InputStream}, internally using a paged-in {@link ByteBuffer}.
34+
* <br>
35+
* Access is serial only but does allow a
36+
* virtual buffer capacity of {@code Long.MAX_VALUE}.
37+
* If the requested capacity is within the maximum page size (default 16MiB)
38+
* the buffer will be fully read and held in a {@link HeaderInMemoryBuffer}.
39+
*
40+
* @NotThreadSafe
41+
* @since 1.21
42+
*/
43+
class HeaderChannelBuffer implements HeaderBuffer {
44+
private static final int DEFAULT_PAGE_MAX = 16_777_216;
45+
// This must match the largest get<Element> (currently getLong)
46+
private static final int MAX_GET_ELEMENT_SIZE = 8;
47+
private final ReadableByteChannel channel;
48+
private final ByteBuffer buffer;
49+
private long remaining;
50+
51+
private HeaderChannelBuffer(final ReadableByteChannel channel, final long capacity, final int maxPageBytes) {
52+
this.channel = channel;
53+
int limit = (int) Math.min(maxPageBytes, capacity);
54+
this.buffer = ByteBuffer.allocate(limit).order(ByteOrder.LITTLE_ENDIAN);
55+
this.remaining = capacity;
56+
}
57+
58+
public static HeaderBuffer create(final ReadableByteChannel channel, final long capacity, final int maxPageBytes)
59+
throws IOException {
60+
if (maxPageBytes < MAX_GET_ELEMENT_SIZE) {
61+
throw new IllegalArgumentException("Page size must be at least " + MAX_GET_ELEMENT_SIZE);
62+
}
63+
if (capacity <= maxPageBytes) {
64+
ByteBuffer buf = ByteBuffer.allocate((int) capacity).order(ByteOrder.LITTLE_ENDIAN);
65+
IOUtils.readFully(channel, buf);
66+
buf.flip();
67+
return new HeaderInMemoryBuffer(buf);
68+
}
69+
HeaderChannelBuffer channelBuffer = new HeaderChannelBuffer(channel, capacity, maxPageBytes);
70+
channelBuffer.fill();
71+
return channelBuffer;
72+
}
73+
74+
public static HeaderBuffer create(final ReadableByteChannel channel, final long capacity) throws IOException {
75+
return HeaderChannelBuffer.create(channel, capacity, DEFAULT_PAGE_MAX);
76+
}
77+
78+
public static HeaderBuffer create(final InputStream inputStream, final long capacity, final int maxPageBytes)
79+
throws IOException {
80+
return create(Channels.newChannel(inputStream), capacity, maxPageBytes);
81+
}
82+
83+
public static HeaderBuffer create(final InputStream inputStream, final long capacity) throws IOException {
84+
return create(Channels.newChannel(inputStream), capacity, DEFAULT_PAGE_MAX);
85+
}
86+
87+
@Override
88+
public boolean hasCRC() {
89+
return false;
90+
}
91+
92+
@Override
93+
public CRC32 getCRC() throws IOException {
94+
throw new IOException("CRC is not implemented for this header type");
95+
}
96+
97+
@Override
98+
public void get(byte[] dst) throws IOException {
99+
int remainingBytes = dst.length;
100+
do {
101+
int length = Math.min(buffer.remaining(), remainingBytes);
102+
buffer.get(dst, dst.length - remainingBytes, length);
103+
remainingBytes -= length;
104+
} while (refilled(remainingBytes));
105+
}
106+
107+
private boolean refilled(final int remainingBytes) throws IOException {
108+
if (remainingBytes <= 0) {
109+
return false;
110+
}
111+
if (remainingBytes > this.remaining) {
112+
throw new BufferUnderflowException();
113+
}
114+
buffer.clear();
115+
this.fill();
116+
return true;
117+
}
118+
119+
@Override
120+
public int getInt() throws IOException {
121+
compactAndFill();
122+
return buffer.getInt();
123+
}
124+
125+
@Override
126+
public long getLong() throws IOException {
127+
compactAndFill();
128+
return buffer.getLong();
129+
}
130+
131+
@Override
132+
public int getUnsignedByte() throws IOException {
133+
compactAndFill();
134+
return buffer.get() & 0xff;
135+
}
136+
137+
@Override
138+
public long skipBytesFully(long bytesToSkip) throws IOException {
139+
if (bytesToSkip <= 0) {
140+
return 0;
141+
}
142+
int current = buffer.position();
143+
long length = buffer.remaining();
144+
if (bytesToSkip <= length) {
145+
buffer.position(current + (int) bytesToSkip);
146+
} else {
147+
long maxSkip = remaining + length;
148+
bytesToSkip = Math.min(bytesToSkip, maxSkip);
149+
while (length < bytesToSkip) {
150+
buffer.clear();
151+
fill();
152+
length += buffer.limit();
153+
}
154+
buffer.position(buffer.limit() - (int) (length - bytesToSkip));
155+
}
156+
return bytesToSkip;
157+
}
158+
159+
private void compactAndFill() throws IOException {
160+
if (buffer.remaining() <= MAX_GET_ELEMENT_SIZE) {
161+
buffer.compact();
162+
this.fill();
163+
}
164+
}
165+
166+
private void fill() throws IOException {
167+
if (buffer.remaining() > remaining) {
168+
buffer.limit(buffer.position() + (int) remaining);
169+
}
170+
remaining -= buffer.remaining();
171+
IOUtils.readFully(channel, buffer);
172+
buffer.flip();
173+
}
174+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*
17+
*/
18+
package org.apache.commons.compress.archivers.sevenz;
19+
20+
import java.io.IOException;
21+
import java.nio.ByteBuffer;
22+
import java.util.zip.CRC32;
23+
24+
/**
25+
* A thin and limited wrapper around a {@link ByteBuffer} with serial access only.
26+
*
27+
* @NotThreadSafe
28+
* @since 1.21
29+
*/
30+
class HeaderInMemoryBuffer implements HeaderBuffer {
31+
private final ByteBuffer buffer;
32+
33+
public HeaderInMemoryBuffer(ByteBuffer buf) {
34+
this.buffer = buf;
35+
}
36+
37+
@Override
38+
public boolean hasCRC() {
39+
return true;
40+
}
41+
42+
@Override
43+
public CRC32 getCRC() {
44+
final CRC32 crc = new CRC32();
45+
crc.update(buffer.array());
46+
return crc;
47+
}
48+
49+
@Override
50+
public void get(byte[] dst) {
51+
buffer.get(dst);
52+
}
53+
54+
@Override
55+
public int getInt() {
56+
return buffer.getInt();
57+
}
58+
59+
@Override
60+
public long getLong() {
61+
return buffer.getLong();
62+
}
63+
64+
@Override
65+
public int getUnsignedByte() {
66+
return buffer.get() & 0xff;
67+
}
68+
69+
@Override
70+
public long skipBytesFully(long bytesToSkip) throws IOException {
71+
if (bytesToSkip <= 0) {
72+
return 0;
73+
}
74+
int current = buffer.position();
75+
int maxSkip = buffer.remaining();
76+
if (maxSkip < bytesToSkip) {
77+
bytesToSkip = maxSkip;
78+
}
79+
buffer.position(current + (int) bytesToSkip);
80+
return bytesToSkip;
81+
}
82+
}

0 commit comments

Comments
 (0)