-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathextractMHT.au3
More file actions
391 lines (351 loc) · 11.8 KB
/
extractMHT.au3
File metadata and controls
391 lines (351 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
#Region ;**** Directives created by AutoIt3Wrapper_GUI ****
#AutoIt3Wrapper_Icon=support\Icons\extractmht_exe.ico
#AutoIt3Wrapper_Outfile=extractMHT32.exe
#AutoIt3Wrapper_Outfile_x64=extractMHT64.exe
#AutoIt3Wrapper_Compile_Both=y
#AutoIt3Wrapper_UseX64=y
#AutoIt3Wrapper_Res_Comment=Compiled with AutoIt http://www.autoitscript.com/
#AutoIt3Wrapper_Res_Description=extractMHT
#AutoIt3Wrapper_Res_Fileversion=1.0.0.0
#AutoIt3Wrapper_Res_LegalCopyright=GNU General Public License
#AutoIt3Wrapper_Run_AU3Check=n
#EndRegion ;**** Directives created by AutoIt3Wrapper_GUI ****
#Region converted Directives from C:\Delphi\extractMHT\extractMHT.au3.ini
#AutoIt3Wrapper_Allow_Decompile=4
#EndRegion converted Directives from C:\Delphi\extractMHT\extractMHT.au3.ini
;
; *** Start added by AutoIt3Wrapper ***
#include <GUIConstantsEx.au3>
#include <WindowsConstants.au3>
; *** End added by AutoIt3Wrapper ***
#Region converted Directives from C:\Users\soporte\Desktop\htmltools\extractMHT.au3.ini
#AutoIt3Wrapper_Allow_Decompile=4
#EndRegion converted Directives from C:\Users\soporte\Desktop\htmltools\extractMHT.au3.ini
;
; ----------------------------------------------------------------------------
;
; extractMHT v1.1
; Author: Jared Breland <jbreland@legroom.net>
; Author: Luis Carrasco <luis@bambucode.com>
; Homepage: http://www.legroom.net/mysoft
; Language: AutoIt v3.3.14.2
; License: GNU General Public License (http://www.gnu.org/copyleft/gpl.html)
;
; Script Function:
; Extract files from MHT archives
;
; ----------------------------------------------------------------------------
; Setup environment
#include <GUIConstants.au3>
#include <File.au3>
#include "Base64.au3"
opt("ExpandVarStrings", 1)
opt("GUIOnEventMode", 1)
global $name = "extractMHT"
global $version = "1.1"
global $title = "$name$ v$version$"
global $prompt = false
global $mht, $outdir, $filedir, $filename, $boundary, $firstpart, $infile
global $part, $parts, $newpart, $type, $encoding, $location, $content
; Check parameters
if $cmdline[0] = 0 then
$prompt = true
else
if $cmdline[1] == "/help" OR $cmdline[1] == "/h" OR $cmdline[1] == "/?" _
OR $cmdline[1] == "-h" OR $cmdline[1] == "-?" then
terminate("syntax")
else
if fileexists($cmdline[1]) then
$mht = $cmdline[1]
else
terminate("syntax")
endif
if $cmdline[0] > 1 then
$outdir = $cmdline[2]
else
$prompt = true
endif
endif
endif
; If no file passed, display GUI to select file and set options
if $prompt then
; Create GUI
GUICreate($title, 300, 115, -1, -1, -1, $WS_EX_ACCEPTFILES)
$dropzone = GUICtrlCreateLabel("", 0, 0, 300, 115)
GUICtrlCreateLabel("MHTML archive to extract:", 5, 5, -1, 15)
$filecont = GUICtrlCreateInput("", 5, 20, 260, 20)
$filebut = GUICtrlCreateButton("...", 270, 20, 25, 20)
GUICtrlCreateLabel("Target directory:", 5, 45, -1, 15)
$dircont = GUICtrlCreateInput("", 5, 60, 260, 20)
$dirbut = GUICtrlCreateButton("...", 270, 60, 25, 20)
$ok = GUICtrlCreateButton("&OK", 55, 90, 80, 20)
$cancel = GUICtrlCreateButton("&Cancel", 165, 90, 80, 20)
; Set properties
GUICtrlSetBkColor($dropzone, $GUI_BKCOLOR_TRANSPARENT)
GUICtrlSetState($dropzone, $GUI_DISABLE)
GUICtrlSetState($dropzone, $GUI_DROPACCEPTED)
GUICtrlSetState($filecont, $GUI_FOCUS)
GUICtrlSetState($ok, $GUI_DEFBUTTON)
if $mht <> "" then
GUICtrlSetData($filecont, $mht)
$filedir = stringleft($mht, stringinstr($mht, '\', 0, -1)-1)
$filename = stringtrimright(stringtrimleft($mht, stringlen($filedir)+1), 4)
GUICtrlSetData($dircont, "$filedir$\$filename$")
GUICtrlSetState($dircont, $GUI_FOCUS)
endif
; Set events
GUISetOnEvent($GUI_EVENT_DROPPED, "GUI_Drop")
GUICtrlSetOnEvent($filebut, "GUI_File")
GUICtrlSetOnEvent($dirbut, "GUI_Directory")
GUICtrlSetOnEvent($ok, "GUI_Ok")
GUICtrlSetOnEvent($cancel, "GUI_Exit")
GUISetOnEvent($GUI_EVENT_CLOSE, "GUI_Exit")
; Display GUI and wait for action
GUISetState(@SW_SHOW)
$finishgui = 0
while 1
if $finishgui then exitloop
wend
endif
; Set full output directory
$filedir = stringleft($mht, stringinstr($mht, '\', 0, -1)-1)
$filename = stringtrimright(stringtrimleft($mht, stringlen($filedir)+1), 4)
if $outdir = '/sub' then
$outdir = "$filedir$\$filename$"
elseif stringmid($outdir, 2, 1) <> ":" then
if stringleft($outdir, 1) == '\' then
$outdir = stringleft($filedir, 2) & $outdir
else
$outdir = _PathFull($filedir & '\' & $outdir)
endif
endif
; Determine boundry
_filereadtoarray($mht, $infile)
for $i = 1 to $infile[0]
if stringinstr($infile[$i], "boundary=", 0) then
$temp = stringtrimleft($infile[$i], stringinstr($infile[$i], "boundary=")+8)
if stringleft($temp, 1) == '"' then
$boundary = stringmid($temp, 2, stringinstr($temp, '"', 0, -1)-2)
elseif stringinstr($temp, ';') then
$boundary = stringleft($temp, stringinstr($temp, ';')-1)
else
$boundary = stringmid($temp, 2, stringinstr($temp, '"')-2)
endif
; Continue processing to count number of parts
elseif stringinstr($infile[$i], $boundary) then
if $parts == '' then $firstpart = $i + 1
$parts += 1
endif
next
$parts -= 1
; Verify boundary exists
if $boundary == '' then
msgbox(48, $title, "Error: This does not appear to be a valid MHT file.@CRLF@No boundary could be detected.")
exit
endif
; Begin processing MHT file
progresson($title, 'Extracting $filename$.mht', "", -1, -1, 16)
$part = 0
$newpart = 1
for $i = $firstpart to $infile[0]
; Initialize variables
if $newpart then
$type = ""
$encoding = ""
$location = ""
$content = ""
$newpart = false
$part += 1
endif
; Determine filetype
if stringinstr($infile[$i], "Content-Type:", 0) then
$temp = stringregexp($infile[$i], ":\s*([A-Za-z0-9/-]+)", 1)
$type = $temp[0]
; Determine encoding method
elseif stringinstr($infile[$i], "Content-Transfer-Encoding:", 0) then
$temp = stringregexp($infile[$i], ":\s*([A-Za-z0-9-]+)", 1)
$encoding = $temp[0]
; Determine filename
elseif stringinstr($infile[$i], "Content-Location:", 0) then
$temp = stringtrimleft($infile[$i], stringinstr($infile[$i], "Content-Location:")+17)
$location = getFName($temp, $type)
progressset(round($part/$parts, 2)*100, "Processing file $part$ of $parts$:@CRLF@$location$")
; Decode and write out new file when new boundary reached
elseif stringinstr($infile[$i], $boundary) then
writeFile($encoding, $location, $content)
$newpart = true
; Read encoded file content into memory until new boundary reached
elseif $type <> "" AND $encoding <> "" AND $location <> "" then
if $encoding = "base64" AND $infile[$i] <> "" then
$content &= $infile[$i]
elseif $encoding <> "base64" then
$content &= $infile[$i] & @CRLF
endif
endif
next
progressoff()
exit
; -------------------------- Begin Custom Functions ---------------------------
func terminate($status)
; Display error message if file could not be extracted
select
; Display usage information and exit
case $status == "syntax"
$syntax = "Extract files from MHT web archives."
$syntax &= @CRLF & "Usage: " & @scriptname & " [/help] [filename [destination]]"
$syntax &= @CRLF & @CRLF & "Supported Arguments:"
$syntax &= @CRLF & " /help" & @tab & @tab & "Display this help information"
$syntax &= @CRLF & " filename" & @tab & "Name of file to extract"
$syntax &= @CRLF & " destination" & @tab & "Directory to which to extract"
$syntax &= @CRLF & @CRLF & "Passing /sub instead of a destination directory name instructs" & @CRLF & $title & " to extract to subdirectory named after the archive."
$syntax &= @CRLF & @CRLF & "Example:"
$syntax &= @CRLF & " " & @scriptname & " c:\1\example.mht c:\test"
$syntax &= @CRLF & @CRLF & "Running " & $title & " without any arguments will" & @CRLF & "prompt the user for the filename and destination directory."
msgbox(48, $title, $syntax)
endselect
exit
endfunc
; Return the filename from the passed URL
func getFName($url, $type)
local $ext, $temp
; Determine file extension
if stringinstr($type, "jpeg") then
$ext = "jpg"
else
$ext = stringtrimleft($type, stringinstr($type, '/'))
endif
; If no filename specified, generate based on content-type
if stringright($url, 1) == "/" then
return unique("index", $ext)
; Otherwise take directlry from URL
else
; Remove everything before the /
$temp = stringtrimleft($url, stringinstr($url, '/', 0, -1))
; Split the filename and extension in an array using Regex
$temp = StringRegExp($temp, "([^\\]*)\.(\w+)$", 1)
if NOT @error AND @extended then
$fname = $temp[0]
$fext = $temp[1]
return unique($fname, $fext)
else
return unique("unknown", $ext)
endif
endif
endfunc
; Ensure a unique filename is returned
func unique($fname, $ext)
local $i
if fileexists("$outdir$\$fname$.$ext$") then
$i = 1
while fileexists("$outdir$\$fname$$i$.$ext$")
$i += 1
wend
return "$fname$$i$.$ext$"
else
return "$fname$.$ext$"
endif
endfunc
; Write contents to file
func writeFile($encoding, $location, $content)
if NOT fileexists($outdir) then dircreate($outdir)
$outfile = fileopen("$outdir$\$location$", 2)
; Decode file according to encoding type
if $encoding = "base64" then
;$content = B64Dec($content)
$content = _Base64Decode($content)
elseif $encoding = "quoted-printable" then
$content = QPDec($content)
endif
; Write decoded file
filewriteline($outfile, $content)
fileclose($outfile)
endfunc
; Decode quoted-printable data
func QPDec($text)
; Replace line terminators (RFC Rule 5)
$text = stringregexpreplace($text, "=\N\n", "")
; Strip malformed content from HTML pages (debugging)
$text = stringreplace($text, "=EF=BB=BF", "")
; Find all unique hex codes in text
$codes = stringregexp($text, "=(\x{2})", 3)
$codes = arrayUnique($codes)
; Convert each hex code to ASCII character and replace in text (RFC rule 1)
for $i = 1 to $codes[0]
$text = stringreplace($text, '=' & $codes[$i], chr(dec($codes[$i])))
next
return $text
endfunc
; Return unique aray
func arrayUnique($arr)
local $i
local $seen = ""
for $i = 0 to ubound($arr)-1
if NOT stringinstr($seen, $arr[$i]) then $seen &= $arr[$i] & "|"
next
if stringright($seen, 1) == '|' then $seen = stringtrimright($seen, 1)
return stringsplit($seen, '|')
endfunc
; ------------------------ Begin GUI Control Functions ------------------------
; Prompt user for file
func GUI_File()
$mht = fileopendialog("Open file", "", "Select file (*.mht)", 1)
if not @error then
GUICtrlSetData($filecont, $mht)
if GUICtrlRead($dircont) = "" then
$filedir = stringleft($mht, stringinstr($mht, '\', 0, -1)-1)
$filename = stringtrimright(stringtrimleft($mht, stringlen($filedir)+1), 4)
GUICtrlSetData($dircont, "$filedir$\$filename$")
endif
GUICtrlSetState($ok, $GUI_FOCUS)
endif
endfunc
; Prompt user for directory
func GUI_Directory()
if fileexists(GUICtrlRead($dircont)) then
$defdir = GUICtrlRead($dircont)
elseif fileexists(GUICtrlRead($filecont)) then
$defdir = stringleft(GUICtrlRead($filecont), stringinstr(GUICtrlRead($filecont), '\', 0, -1)-1)
else
$defdir = '';
endif
$outdir = fileselectfolder("Extract to", "", 3, $defdir)
if not @error then
GUICtrlSetData($dircont, $outdir)
endif
endfunc
; Set file to extract and target directory, then exit
func GUI_Ok()
$mht = GUICtrlRead($filecont)
if fileexists($mht) then
if GUICtrlRead($dircont) == "" then
$outdir = '/sub'
else
$outdir = GUICtrlRead($dircont)
endif
GUIDelete()
$finishgui = true
else
if $mht == '' then
$mht = '';
else
$mht &= " does not exist." & @CRLF;
endif
msgbox(48, $title, $mht & "Please select valid file.")
endif
endfunc
; Process dropped files outside of file input box
func GUI_Drop()
if fileexists(@GUI_DragFile) then
$mht = @GUI_DragFile
GUICtrlSetData($filecont, $mht)
if GUICtrlRead($dircont) = "" then
$filedir = stringleft($mht, stringinstr($mht, '\', 0, -1)-1)
$filename = stringtrimright(stringtrimleft($mht, stringlen($filedir)+1), 4)
GUICtrlSetData($dircont, "$filedir$\$filename$")
endif
endif
endfunc
; Exit if Cancel clicked or window closed
func GUI_Exit()
exit
endfunc