Skip to content

Commit 7393fee

Browse files
committed
Throughput enhancements:
1. EOL is now cached as constants, not calculated on every call. 2. ASCII output now bursts writes, and only parses known positions with CR/LF.
1 parent 7bac1cb commit 7393fee

File tree

1 file changed

+109
-24
lines changed

1 file changed

+109
-24
lines changed

VMSBackupProcess.py

+109-24
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@
1313
import math
1414
import struct
1515

16+
# Calculate these once globally rather than recomputing each time when needed.
17+
__LINESEP = bytes([ord(k) for k in os.linesep])
18+
__LINESEP_WINDOWS = bytes([ord(k) for k in "\r\n"])
19+
__CR = ord('\r')
20+
__LF = ord('\n')
21+
1622
def FileNameWildCardCompare(kString : str, kWildCard : str) :
1723

1824
# [] are frequently used in OpenVMS filenames, but to fnmatch, indicate ranges.
@@ -31,12 +37,12 @@ def VMSWriteEOL(kFileMetaData : VMSBackupTypes.VMSFileParameters, bForceEOL : bo
3137

3238
if (kFileMetaData.nFilePointer >= kFileMetaData.nFileSize) and kFileMetaData.bLFDetected :
3339

34-
kFileMetaData.kFileHandle.write(bytes([ord(k) for k in os.linesep]))
40+
kFileMetaData.kFileHandle.write(__LINESEP)
3541
kFileMetaData.bLFDetected = False
3642

3743
elif bForceEOL :
3844

39-
kFileMetaData.kFileHandle.write(bytes([ord(k) for k in os.linesep]))
45+
kFileMetaData.kFileHandle.write(__LINESEP)
4046

4147
#end
4248

@@ -49,63 +55,141 @@ def VMSWriteFile(kBlock : bytes, kFileMetaData : VMSBackupTypes.VMSFileParameter
4955
bLastElementWasLFCR = False
5056
bContainsLFCR = False
5157

58+
# TODO: This is covering up a bug elsewhere where 0 byte writes are somehow being passed onwards...
59+
if 0 == nDataLength :
60+
return
61+
#end
62+
5263
if VMSBackupTypes.ExtractMode.ASCII == kFileMetaData.kMode :
5364

54-
for i,nByte in enumerate(kBlock[:nDataLength]) :
65+
# Note: To improve throughput, ASCII Mode doesn't iterate one character at a time, and
66+
# instead finds all indices which contain the line seperators. This allows burst
67+
# writing of everything in between. This does improve average write time at the
68+
# expense of some additional complexity.
69+
kIndicesOfInterest = [i for i,k in enumerate(kBlock[:nDataLength]) if k in __LINESEP_WINDOWS]
70+
if 0 == len(kIndicesOfInterest) :
71+
72+
# This would fall into functionality associated with:
73+
# (__LF != kBlock[nCRLFIndex]) and kFileMetaData.bLFDetected :
74+
if kFileMetaData.bLFDetected :
75+
76+
# This is not seen as a valid EOL, so normalise it
77+
kFileMetaData.kFileHandle.write(__LINESEP)
78+
79+
#end
80+
81+
kFileMetaData.bLFDetected = False
82+
kFileMetaData.bLastElementWasLFCR = False
83+
84+
kFileMetaData.kFileHandle.write(kBlock[:nDataLength])
85+
return
86+
87+
#end
88+
89+
# File I/O Typically likes being performed in large bursts, therefore we buffer the data
90+
# into RAM first.
91+
kBytes = bytearray()
92+
nLastIndex = 0
93+
94+
for nCRLFIndex in kIndicesOfInterest :
95+
96+
if nCRLFIndex > nLastIndex :
97+
98+
# This would fall into functionality associated with:
99+
# (__LF != kBlock[nCRLFIndex]) and kFileMetaData.bLFDetected :
100+
if kFileMetaData.bLFDetected :
101+
102+
# This is not seen as a valid EOL, so normalise it
103+
kBytes.extend(__LINESEP)
104+
105+
#end
106+
107+
kFileMetaData.bLFDetected = False
108+
kFileMetaData.bLastElementWasLFCR = False
55109

56-
if (nByte == ord('\r')) and not kFileMetaData.bLFDetected :
110+
kBytes.extend(kBlock[nLastIndex:nCRLFIndex])
111+
112+
#end
113+
114+
if __CR == kBlock[nCRLFIndex] and not kFileMetaData.bLFDetected :
57115

58116
# Do nothing whilst the EOL is assessed
59117
kFileMetaData.bLFDetected = True
60118

61119
# Indicate this data package contains an LF/CR entry
62120
bContainsLFCR = True
63-
bLastElementWasLFCR = (i+1) == nDataLength
121+
bLastElementWasLFCR = (nCRLFIndex+1) == nDataLength
64122

65-
elif (nByte == ord('\n')) and kFileMetaData.bLFDetected :
123+
elif (__LF == kBlock[nCRLFIndex]) and kFileMetaData.bLFDetected :
66124

67125
# This file already contains standard EOL conventions
68-
kFileMetaData.kFileHandle.write(bytes([ord(k) for k in os.linesep]))
126+
kBytes += __LINESEP
69127
kFileMetaData.bLFDetected = False
70128

71129
# Indicate this data package contains an LF/CR entry
72130
bContainsLFCR = True
73-
bLastElementWasLFCR = (i+1) == nDataLength
131+
bLastElementWasLFCR = (nCRLFIndex+1) == nDataLength
74132

75-
elif (nByte == ord('\n')) and not kFileMetaData.bLFDetected :
133+
elif (__LF == kBlock[nCRLFIndex]) and not kFileMetaData.bLFDetected :
76134

77135
# This is not seen as a valid EOL, so normalise it
78-
kFileMetaData.kFileHandle.write(bytes([ord(k) for k in os.linesep]))
136+
kBytes += __LINESEP
79137
kFileMetaData.bLFDetected = False
80138

81139
# Indicate this data package contains an LF/CR entry
82140
bContainsLFCR = True
83-
bLastElementWasLFCR = (i+1) == nDataLength
141+
bLastElementWasLFCR = (nCRLFIndex+1) == nDataLength
84142

85-
elif (nByte != ord('\n')) and kFileMetaData.bLFDetected :
143+
elif (__LF != kBlock[nCRLFIndex]) and kFileMetaData.bLFDetected :
86144

87145
# This is not seen as a valid EOL, so normalise it
88-
kFileMetaData.kFileHandle.write(bytes([ord(k) for k in os.linesep]))
146+
kBytes += __LINESEP
89147
kFileMetaData.bLFDetected = False
90148

91149
# Indicate this data package contains an LF/CR entry
92150
bContainsLFCR = True
93151

94152
# Note: This indicates the *previous* byte was an LF/CR therefore the current
95153
# element isn't, hence no check to see if the Last Element is an LF/CR
96-
154+
97155
# Output the current byte since it contained non-EOL data
98-
kFileMetaData.kFileHandle.write(bytes([nByte]))
156+
kBytes.append(kBlock[nCRLFIndex])
99157

100158
else :
101159

102-
# Output the Current Byte
103-
kFileMetaData.kFileHandle.write(bytes([nByte]))
160+
# Shouldn't Occur
161+
assert(False)
104162

105163
#end
164+
165+
nLastIndex = nCRLFIndex + 1
106166

107167
#end
108168

169+
# Handle the Last Few Elements
170+
if nLastIndex < nDataLength :
171+
172+
# This would fall into functionality associated with:
173+
# (__LF != kBlock[nCRLFIndex]) and kFileMetaData.bLFDetected :
174+
if kFileMetaData.bLFDetected :
175+
176+
# This is not seen as a valid EOL, so normalise it
177+
kBytes.extend(__LINESEP)
178+
179+
#end
180+
181+
kFileMetaData.bLFDetected = False
182+
kFileMetaData.bLastElementWasLFCR = False
183+
184+
kBytes.extend(kBlock[nLastIndex:nDataLength])
185+
186+
#end
187+
188+
# Output the Buffered Data for Writing
189+
if len(kBytes) > 0 :
190+
kFileMetaData.kFileHandle.write(kBytes)
191+
#end
192+
109193
else :
110194

111195
kFileMetaData.kFileHandle.write(kBlock[:nDataLength])
@@ -119,15 +203,16 @@ def VMSWriteFile(kBlock : bytes, kFileMetaData : VMSBackupTypes.VMSFileParameter
119203

120204
def CloseOpenFiles(kExtractStatus : dict) :
121205

122-
if None != kExtractStatus["Current"] :
123-
if None != kExtractStatus["Current"].kFileHandle :
124-
if kExtractStatus["Current"].nFilePointer != kExtractStatus["Current"].nFileSize :
125-
print(f"Warning: {kExtractStatus["Current"].kFileName} extracted {kExtractStatus["Current"].nFilePointer}/{kExtractStatus["Current"].nFileSize} bytes.")
206+
kFileMetaData = kExtractStatus["Current"]
207+
if None != kFileMetaData :
208+
if None != kFileMetaData.kFileHandle :
209+
if kFileMetaData.nFilePointer != kFileMetaData.nFileSize :
210+
print(f"Warning: {kFileMetaData.kFileName} extracted {kFileMetaData.nFilePointer}/{kFileMetaData.nFileSize} bytes.")
126211
#end
127-
# assert(kExtractStatus["Current"].nFilePointer == kExtractStatus["Current"].nFileSize)
128-
kExtractStatus["Current"].closeFile()
129-
kExtractStatus["Current"] = None
212+
# assert(kFileMetaData.nFilePointer == kFileMetaData.nFileSize)
213+
kFileMetaData.closeFile()
130214
#end
215+
kExtractStatus["Current"] = None
131216
#end
132217

133218
#end

0 commit comments

Comments
 (0)