diff --git a/lib/filewatch/tail.rb b/lib/filewatch/tail.rb index 6c27bfe..f3a7c7b 100644 --- a/lib/filewatch/tail.rb +++ b/lib/filewatch/tail.rb @@ -1,5 +1,7 @@ require "filewatch/buftok" require "filewatch/watch" +require "zlib" + if RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/ require "filewatch/winhelper" end @@ -30,6 +32,7 @@ def initialize(opts={}) @logger.level = Logger::INFO end @files = {} + @gzip = {} @lastwarn = Hash.new { |h, k| h[k] = 0 } @buffers = {} @watch = FileWatch::Watch.new @@ -95,6 +98,7 @@ def subscribe(&block) _read_file(path, &block) @files[path].close @files.delete(path) + @gzip.delete(path) @statcache.delete(path) else @logger.warn("unknown event type #{event} for #{path}") @@ -123,6 +127,7 @@ def _open_file(path, event) @logger.debug("(warn supressed) failed to open #{path}: #{$!}") end @files.delete(path) + @gzip.delete(path) return false end @@ -133,10 +138,10 @@ def _open_file(path, event) inode = [fileId, stat.dev_major, stat.dev_minor] else inode = [stat.ino.to_s, stat.dev_major, stat.dev_minor] - end + end @statcache[path] = inode - + if @sincedb.member?(inode) last_size = @sincedb[inode] @logger.debug("#{path}: sincedb last value #{@sincedb[inode]}, cur size #{stat.size}") @@ -148,40 +153,87 @@ def _open_file(path, event) @sincedb[inode] = 0 end elsif event == :create_initial && @files[path] - # TODO(sissel): Allow starting at beginning of the file. - if @opts[:start_new_files_at] == :beginning - @logger.debug("#{path}: initial create, no sincedb, seeking to beginning of file") - @files[path].sysseek(0, IO::SEEK_SET) - @sincedb[inode] = 0 - else - # seek to end - @logger.debug("#{path}: initial create, no sincedb, seeking to end #{stat.size}") - @files[path].sysseek(stat.size, IO::SEEK_SET) - @sincedb[inode] = stat.size + _check_gzip_file(path, inode) + if !@gzip[path] + # TODO(sissel): Allow starting at beginning of the file. + if @opts[:start_new_files_at] == :beginning + @logger.debug("#{path}: initial create, no sincedb, seeking to beginning of file") + @files[path].sysseek(0, IO::SEEK_SET) + @sincedb[inode] = 0 + else + # seek to end + @logger.debug("#{path}: initial create, no sincedb, seeking to end #{stat.size}") + @files[path].sysseek(stat.size, IO::SEEK_SET) + @sincedb[inode] = stat.size + end end else - @logger.debug("#{path}: staying at position 0, no sincedb") + _check_gzip_file(path, inode) + if @gzip[path] + @logger.debug("#{path}: gzip file") + else + @logger.debug("#{path}: staying at position 0, no sincedb") + end end return true end # def _open_file + private + def _check_gzip_file(path, inode) + if path.end_with?('.gz') + # RFC1952 two first byte for gzip file is ID1 ID2 == 31=1f 139=8b + @opts[:start_new_files_at] = :beginning # force start to beginning + @files[path].sysseek(0, IO::SEEK_SET) + dataID1 = @files[path].getbyte() + dataID2 = @files[path].getbyte() + @gzip[path] = dataID1 == 31 and dataID2 == 139 + @logger.debug("gzip flag #{@gzip[path]} #{dataID1} #{dataID2}") + @files[path].rewind + @sincedb[inode] = 0 + end + end + private def _read_file(path, &block) - @buffers[path] ||= FileWatch::BufferedTokenizer.new + @logger.debug("_read_file gzip flag is #{@gzip[path]} for path #{path}") + if @gzip[path] + @buffers[path] ||= Zlib::GzipReader.new(@files[path]) + else + @buffers[path] ||= FileWatch::BufferedTokenizer.new + end changed = false - loop do - begin - data = @files[path].sysread(32768) - changed = true - @buffers[path].extract(data).each do |line| - yield(path, line) + if @gzip[path] + loop do + begin + changed = true + @buffers[path].each_line do |line| + line = line.gsub(/\r?\n$/,'') + yield(path, line) + end + @sincedb[@statcache[path]] = @files[path].pos + if @buffers[path].eof? + break + end + rescue Zlib::Error, Zlib::GzipFile::Error => error + @logger.warn("Tail error on gz reading : #{error}") + break + end + end + else + loop do + begin + data = @files[path].sysread(32768) + changed = true + @buffers[path].extract(data).each do |line| + yield(path, line) + end + + @sincedb[@statcache[path]] = @files[path].pos + rescue Errno::EWOULDBLOCK, Errno::EINTR, EOFError + break end - - @sincedb[@statcache[path]] = @files[path].pos - rescue Errno::EWOULDBLOCK, Errno::EINTR, EOFError - break end end